diff --git a/snscrape-normalise b/snscrape-normalise index fcd4d97..25a83ad 100755 --- a/snscrape-normalise +++ b/snscrape-normalise @@ -3,6 +3,14 @@ errorUrls=() while read -r url do + if [[ "${url}" == '* '* ]] + then + prefix="${url::2}" + url="${url:2}" + else + prefix="" + fi + if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|profile\.php\?id=[0-9]+(&|$)) ]] then if [[ "${url}" == *profile.php* ]] @@ -15,7 +23,7 @@ do user="$(grep -Po ']*(?<=\s)data-key\s*=\s*"tab_home".*?' <<< "${page}" | grep -Po ']*(?<=\s)href="/\K[^/]+')" if [[ "${user}" ]] then - echo "https://www.facebook.com/${user}/" + echo "${prefix}https://www.facebook.com/${user}/" continue else if grep -q 'id="pagelet_loggedout_sign_up"' <<< "${page}" @@ -25,13 +33,13 @@ do user="$(grep -Po '