#!/bin/bash # Given social media links on stdin or as args, this extracts the link in the profile description, if any. function verbose_echo { if [[ "${verbose}" ]] then echo "$@" fi } function fetch { verbose_echo "Fetching $1" >&2 curl -sL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1" } function fetch_n_extract { url="$1" if [[ "${url}" == *'facebook.com/'* ]] then page="$(fetch "${url}")" if grep -qF '"tab_home"' <<<"${page}" then # Publicly accessible profile grep -Po '"website_url":\K"[^"]+"' <<<"${page}" | python3 -c 'import json, sys'$'\n''for line in sys.stdin:'$'\n'' print(json.loads(line))' | awk '!seen[$0]++' elif grep -qF 'id="pagelet_loggedout_sign_up"' <<< "${page}" then # Profile overview only grep -Po ']*\s)?class\s*=\s*"([^"]*\s)?ProfileHeaderCard-url(\s[^"]*)?">.*?' | grep -Po ']*\s)?class\s*=\s*"([^"]*\s)?u-textUserColor(\s[^"]*)?")([^>]*\s)?title="\K[^"]+' elif [[ "${url}" == *'youtube.com/'* ]] then if [[ "${url}" == *'?'* ]]; then u="${url}&disable_polymer=1"; else u="${url}?disable_polymer=1"; fi fetch "${u}" | tr -d '\n' | grep -Po ']*\s)?id\s*=\s*"header-links".*?' | grep -Po 'href="/redirect\?([^"]*&(amp;)?)?q=\K[^&"]+' | python3 -c 'import sys, urllib.parse; sys.stdout.write(urllib.parse.unquote(sys.stdin.read()))' fi } verbose= for arg in "$@" do if [[ "${arg}" == '--verbose' || "${arg}" == '-v' ]] then verbose=1 shift elif [[ "${arg}" == '--' ]] then shift else # Assume end of options break fi done { for arg in "$@" do echo "${arg}" done if [ ! -t 0 ] then cat fi } | while read -r url do if [[ "${url}" == '* '* ]] then url="${url:2}" fi fetch_n_extract "${url}" done