#!/bin/bash # Read a list of URLs from stdin, replace suitable social media URLs with correctly capitalised version errorUrls=() while read -r url do if [[ "${url}" == '* '* ]] then prefix="${url::2}" url="${url:2}" else prefix="" fi if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|profile\.php\?id=[0-9]+(&|$)) ]] then if [[ "${url}" == *profile.php* ]] then url="${url%%&*}" else url="${url%%\?*}" fi page="$(curl -sL -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")" user="$(grep -Po ']*(?<=\s)data-key\s*=\s*"tab_home".*?' <<< "${page}" | grep -Po ']*(?<=\s)href="/\K[^/]+')" if [[ "${user}" ]] then echo "${prefix}https://www.facebook.com/${user}/" continue else if grep -q 'id="pagelet_loggedout_sign_up"' <<< "${page}" then # Profile page which is only visible when logged in # Extract canonical URL user="$(grep -Po '&2 echo "Failed to process URLs:" >&2 for errorUrl in "${errorUrls[@]}" do echo "${errorUrl}" >&2 done fi