The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

92 lines
2.1 KiB

  1. #!/bin/bash
  2. function fetch_n_extract {
  3. local url="$1"
  4. {
  5. curl -sSL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
  6. grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \
  7. tee \
  8. >(
  9. # Facebook
  10. grep -Poi 'facebook\.com/[^/ <"'"'"']+' | \
  11. sed 's,^,https://www.,' | \
  12. grep -vi -e '^https://www\.facebook\.com/2008$' -e '^https://www\.facebook\.com/tr\?' -e '^https://www\.facebook\.com/plugins$' | \
  13. grep -Pvi '^https://www\.facebook\.com/sharer(\.php\?|\?|$)'
  14. ) \
  15. >(
  16. # Flickr
  17. grep -Poi 'flickr\.com/photos/[^/ <"'"'"']+' | \
  18. sed 's,^,https://www.,'
  19. ) \
  20. >(
  21. # Instagram
  22. grep -Poi 'instagram\.com/[^/ <"'"'"']+' | \
  23. sed 's,^,https://www.,'
  24. ) \
  25. >(
  26. # Telegram
  27. grep -Poi '//(www\.)?t\.me/[^/ <"'"'"']+' | \
  28. sed 's,^//,,; s,^www\.,,; s,^,https://,'
  29. ) \
  30. >(
  31. # Twitter
  32. grep -Poi 'twitter\.com/[^/ <"'"'"']+' | \
  33. sed 's,^,https://,' | \
  34. grep -vi -e '^https://twitter\.com/home\?' -e '^https://twitter\.com/widgets\.js$' -e '^https://twitter\.com/share\?' | \
  35. sed 's,\([?&]\)ref_src=[^&]\+&\?,\1,; s,?$,,'
  36. ) \
  37. >(
  38. # VKontakte
  39. grep -Poi 'vk\.com/[^/ <"'"'"']+' | \
  40. sed 's,^,https://,'
  41. ) \
  42. >(
  43. # YouTube
  44. grep -Poi '(youtube\.com/((user|channel|embed)/)?[^/ <"'"'"']+|youtu\.be/[^/ <"'"'"']+)' | \
  45. awk '/^youtube/ { print "https://www." $0 } /^youtu\.be/ { print "https://" $0 }'
  46. ) \
  47. >/dev/null
  48. } | awk '!seen[$0]++'
  49. }
  50. # Parse options
  51. printInputUrl=
  52. while [[ $# -gt 0 ]]
  53. do
  54. if [[ "$1" == '--print-input-urls' || "$1" == '--print-input-url' ]]
  55. then
  56. printInputUrl=true
  57. shift
  58. elif [[ "$1" == '--' ]]
  59. then
  60. # End of options
  61. shift
  62. break
  63. elif [[ "$1" == '--'* ]]
  64. then
  65. echo "Unknown option: $1" >&2
  66. exit 1
  67. else
  68. # Assume end of options
  69. break
  70. fi
  71. done
  72. {
  73. for arg in "$@"
  74. do
  75. echo "${arg}"
  76. done
  77. if [ ! -t 0 ]
  78. then
  79. cat
  80. fi
  81. } | while read -r url
  82. do
  83. if [[ "${printInputUrl}" ]]
  84. then
  85. echo "${url}"
  86. fi
  87. fetch_n_extract "${url}"
  88. done