diff --git a/snscrape-extract-usernames b/snscrape-extract-usernames new file mode 100755 index 0000000..a81cb41 --- /dev/null +++ b/snscrape-extract-usernames @@ -0,0 +1,25 @@ +#!/bin/bash +# Extract from stdin social media usernames suitable for snscrape, grouped by service +grep -Po '(https?://www\.\K(facebook|instagram)\.com/\S+(?=/)|https?://\Ktwitter\.com/\S+)' | + sed 's,\.com/, ,' | + sort | + awk ' + BEGIN { + prev1=""; + } + + ($1 != prev1) { + if (prev1 != "") { + print ""; + } + printf "%s:", $1; + prev1 = $1; + } + + ($1 == prev1) { + printf " %s", $2; + } + + END { + print ""; + }' diff --git a/snscrape-normalise b/snscrape-normalise new file mode 100755 index 0000000..cdb05c6 --- /dev/null +++ b/snscrape-normalise @@ -0,0 +1,42 @@ +#!/bin/bash +# Read a list of URLs from stdin, replace suitable social media URLs with correctly capitalised version +errorUrls=() +while read -r url +do + if [[ "${url}" =~ ^https?://(www|m|[a-z][a-z]-[a-z][a-z]).facebook.com/[^/]+/?$ ]] + then + user="$(curl -s -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' -H 'Accept-Language: en-US,en;q=0.5' "https://www.${url#*.}" | grep -Po '