The little things give you away... A collection of various small helper stuff
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.
 
 
 

134 wiersze
4.1 KiB

  1. #!/bin/bash
  2. # Taking a list of URLs from stdin (optionally in new-viewer style wiki format), every URL is normalised as follows:
  3. # - For social media URLs, the correct capitalisation is extracted and extraneous parameters are removed.
  4. # - For YouTube user or channel URLs, the canonical base URL is extracted.
  5. # - For anything else, retrieval is attempted and the final, post-redirect URL is used. (To not follow redirects, use --other-no-redirects.)
  6. otherCurlRedirectOpt='-L'
  7. verbose=
  8. while [[ $# -gt 0 ]]
  9. do
  10. if [[ "$1" == '--other-no-redirects' ]]
  11. then
  12. otherCurlRedirectOpt=
  13. elif [[ "$1" == '--verbose' || "$1" == '-v' ]]
  14. then
  15. verbose=1
  16. else
  17. echo "Unknown option: $1" >&2
  18. exit 1
  19. fi
  20. shift
  21. done
  22. function verbose_echo {
  23. if [[ "${verbose}" ]]
  24. then
  25. echo "$@"
  26. fi
  27. }
  28. userAgent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0'
  29. while read -r line
  30. do
  31. if [[ "${line}" != 'http://'* && "${line}" != 'https://'* && "${line}" != '* http://'* && "${line}" != '* https://'* ]]
  32. then
  33. echo "${line}"
  34. continue
  35. fi
  36. if [[ "${line}" == '* '* ]]
  37. then
  38. prefix="${line::2}"
  39. url="${line:2}"
  40. else
  41. prefix=""
  42. url="${line}"
  43. fi
  44. if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|pg/[^/]+([/?]|$)|profile\.php\?id=[0-9]+(&|$)) ]]
  45. then
  46. verbose_echo "Normalising Facebook URL: ${url}" >&2
  47. if [[ "${url}" == *profile.php* ]]
  48. then
  49. url="${url%%&*}"
  50. else
  51. url="${url%%\?*}"
  52. fi
  53. page="$(curl -sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
  54. user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')"
  55. if [[ "${user}" ]]
  56. then
  57. echo "${prefix}https://www.facebook.com/${user}/"
  58. continue
  59. elif grep -q 'id="pagelet_loggedout_sign_up"' <<< "${page}"
  60. then
  61. # Profile page which is only visible when logged in
  62. # Extract canonical URL
  63. user="$(grep -Po '<link rel="canonical" href="\K[^"]+' <<< "${page}")"
  64. if [[ "${user}" ]]
  65. then
  66. echo "${prefix}${user}"
  67. continue
  68. fi
  69. fi
  70. echo "Failed to normalise Facebook URL: ${url}" >&2
  71. echo "${prefix}${url}"
  72. elif [[ "${url}" =~ ^https?://(www\.)?twitter\.com/[^/]+/?(\?.*)?$ ]]
  73. then
  74. verbose_echo "Normalising Twitter URL: ${url}" >&2
  75. url="${url%%\?*}"
  76. url="${url%/}"
  77. unnormalisedUser="${url##*/}"
  78. user="$(curl -sL -A "${userAgent}" "https://twitter.com/${unnormalisedUser}" | grep -Po '<a class="([^"]*\s)?ProfileHeaderCard-screennameLink(\s[^"]*)?" href="/\K[^/"]+(?=")')"
  79. if [[ "${user}" ]]
  80. then
  81. echo "${prefix}https://twitter.com/${user}"
  82. else
  83. echo "Failed to normalise Twitter URL: ${url}" >&2
  84. echo "${prefix}${url}"
  85. fi
  86. elif [[ "${url}" =~ ^https?://(www\.)?instagram\.com/[^/]+/?$ ]]
  87. then
  88. verbose_echo "Normalising Instagram URL: ${url}" >&2
  89. user="${url%/}"
  90. user="${user##*/}"
  91. echo "${prefix}https://www.instagram.com/${user,,}/"
  92. elif [[ "${url}" =~ ^https?://(www\.)?youtube\.com/ ]]
  93. then
  94. verbose_echo "Normalising YouTube URL: ${url}" >&2
  95. if [[ "${url}" == *'?'* ]]
  96. then
  97. rurl="${url}&disable_polymer=1"
  98. else
  99. rurl="${url}?disable_polymer=1"
  100. fi
  101. page="$(curl -4sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "${rurl}")"
  102. canonical="$(grep -Po '<link itemprop="url" href="http://www\.youtube\.com/\Kuser/[^"]+' <<< "${page}")"
  103. if [[ "${canonical}" ]]
  104. then
  105. echo "${prefix}https://www.youtube.com/${canonical}"
  106. else
  107. canonical="$(grep -Po '<link itemprop="url" href="http://www\.youtube\.com/\Kchannel/[^"]+' <<< "${page}")"
  108. if [[ "${canonical}" ]]
  109. then
  110. echo "${prefix}https://www.youtube.com/${canonical}"
  111. else
  112. echo "Failed to normalise YouTube URL: ${url}" >&2
  113. echo "${prefix}${url}"
  114. fi
  115. fi
  116. else
  117. verbose_echo "Normalising other URL: ${url}" >&2
  118. canonical="$(curl -sS ${otherCurlRedirectOpt} --max-time 10 -A "${userAgent}" -o /dev/null -w '%{url_effective}' "${url}")"
  119. if [[ "${canonical}" ]]
  120. then
  121. echo "${prefix}${canonical}"
  122. else
  123. echo "Failed to normalise other URL: ${url}" >&2
  124. echo "${prefix}${url}"
  125. fi
  126. fi
  127. done