Browse Source

Add support for lists

master
JustAnotherArchivist 4 years ago
parent
commit
663383830c
1 changed files with 15 additions and 7 deletions
  1. +15
    -7
      snscrape-normalise

+ 15
- 7
snscrape-normalise View File

@@ -3,6 +3,14 @@
errorUrls=() errorUrls=()
while read -r url while read -r url
do do
if [[ "${url}" == '* '* ]]
then
prefix="${url::2}"
url="${url:2}"
else
prefix=""
fi

if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|profile\.php\?id=[0-9]+(&|$)) ]] if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|profile\.php\?id=[0-9]+(&|$)) ]]
then then
if [[ "${url}" == *profile.php* ]] if [[ "${url}" == *profile.php* ]]
@@ -15,7 +23,7 @@ do
user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')" user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')"
if [[ "${user}" ]] if [[ "${user}" ]]
then then
echo "https://www.facebook.com/${user}/"
echo "${prefix}https://www.facebook.com/${user}/"
continue continue
else else
if grep -q 'id="pagelet_loggedout_sign_up"' <<< "${page}" if grep -q 'id="pagelet_loggedout_sign_up"' <<< "${page}"
@@ -25,13 +33,13 @@ do
user="$(grep -Po '<link rel="canonical" href="\K[^"]+' <<< "${page}")" user="$(grep -Po '<link rel="canonical" href="\K[^"]+' <<< "${page}")"
if [[ "${user}" ]] if [[ "${user}" ]]
then then
echo "${user}"
echo "${prefix}${user}"
continue continue
fi fi
fi fi
fi fi
errorUrls+=("${url}") errorUrls+=("${url}")
echo "${url}"
echo "${prefix}${url}"
elif [[ "${url}" =~ ^https?://(www\.)?twitter\.com/[^/]+/?(\?.*)?$ ]] elif [[ "${url}" =~ ^https?://(www\.)?twitter\.com/[^/]+/?(\?.*)?$ ]]
then then
url="${url%%\?*}" url="${url%%\?*}"
@@ -39,18 +47,18 @@ do
user="$(snscrape --max-results 1 twitter-user "${url##*/}" | grep -Po '^https?://twitter\.com/\K[^/]+')" user="$(snscrape --max-results 1 twitter-user "${url##*/}" | grep -Po '^https?://twitter\.com/\K[^/]+')"
if [[ "${user}" ]] if [[ "${user}" ]]
then then
echo "https://twitter.com/${user}"
echo "${prefix}https://twitter.com/${user}"
else else
errorUrls+=("${url}") errorUrls+=("${url}")
echo "${url}"
echo "${prefix}${url}"
fi fi
elif [[ "${url}" =~ ^https?://(www\.)?instagram\.com/[^/]+/?$ ]] elif [[ "${url}" =~ ^https?://(www\.)?instagram\.com/[^/]+/?$ ]]
then then
user="${url%/}" user="${url%/}"
user="${user##*/}" user="${user##*/}"
echo "https://www.instagram.com/${user,,}/"
echo "${prefix}https://www.instagram.com/${user,,}/"
else else
echo "${url}"
echo "${prefix}${url}"
fi fi
done done




Loading…
Cancel
Save