The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

78 lines
2.2 KiB

  1. #!/bin/bash
  2. # Given social media links on stdin or as args, this extracts the link in the profile description, if any.
  3. function verbose_echo {
  4. if [[ "${verbose}" ]]
  5. then
  6. echo "$@"
  7. fi
  8. }
  9. function fetch {
  10. verbose_echo "Fetching $1" >&2
  11. curl -sL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1"
  12. }
  13. function fetch_n_extract {
  14. url="$1"
  15. if [[ "${url}" == *'facebook.com/'* ]]
  16. then
  17. page="$(fetch "${url}")"
  18. if grep -qF '"tab_home"' <<<"${page}"
  19. then
  20. # Publicly accessible profile
  21. grep -Po '"website_url":\K"[^"]+"' <<<"${page}" | python3 -c 'import json, sys'$'\n''for line in sys.stdin:'$'\n'' print(json.loads(line))' | awk '!seen[$0]++'
  22. elif grep -qF 'id="pagelet_loggedout_sign_up"' <<< "${page}"
  23. then
  24. # Profile overview only
  25. grep -Po '<div\s([^<]*\s)?id\s*=\s*"pagelet_contact".*<div\s([^<]*\s)?id\s*=\s*"bottomContent"' <<<"${page}" | grep -Po 'href="https://l\.facebook\.com/l\.php\?u=\K[^&]+' | python3 -c 'import sys, urllib.parse; sys.stdout.write(urllib.parse.unquote(sys.stdin.read()))'
  26. fi
  27. elif [[ "${url}" == *'instagram.com/'* ]]
  28. then
  29. fetch "${url}" | grep -Po '"external_url":"\K[^"]+'
  30. sleep 3 # To avoid getting banned
  31. elif [[ "${url}" == *'twitter.com/'* ]]
  32. then
  33. fetch "${url}" | tr -d '\n' | grep -Po '<div\s+([^>]*\s)?class\s*=\s*"([^"]*\s)?ProfileHeaderCard-url(\s[^"]*)?">.*?</div>' | grep -Po '<a\s(?=([^>]*\s)?class\s*=\s*"([^"]*\s)?u-textUserColor(\s[^"]*)?")([^>]*\s)?title="\K[^"]+'
  34. elif [[ "${url}" == *'youtube.com/'* ]]
  35. then
  36. if [[ "${url}" == *'?'* ]]; then u="${url}&disable_polymer=1"; else u="${url}?disable_polymer=1"; fi
  37. fetch "${u}" | tr -d '\n' | grep -Po '<div\s([^>]*\s)?id\s*=\s*"header-links".*?</div>' | grep -Po 'href="/redirect\?([^"]*&(amp;)?)?q=\K[^&"]+' | python3 -c 'import sys, urllib.parse; sys.stdout.write(urllib.parse.unquote(sys.stdin.read()))'
  38. fi
  39. }
  40. verbose=
  41. for arg in "$@"
  42. do
  43. if [[ "${arg}" == '--verbose' || "${arg}" == '-v' ]]
  44. then
  45. verbose=1
  46. shift
  47. elif [[ "${arg}" == '--' ]]
  48. then
  49. shift
  50. else
  51. # Assume end of options
  52. break
  53. fi
  54. done
  55. {
  56. for arg in "$@"
  57. do
  58. echo "${arg}"
  59. done
  60. if [ ! -t 0 ]
  61. then
  62. cat
  63. fi
  64. } | while read -r url
  65. do
  66. if [[ "${url}" == '* '* ]]
  67. then
  68. url="${url:2}"
  69. fi
  70. fetch_n_extract "${url}"
  71. done