The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

60 lines
1.8 KiB

  1. #!/bin/bash
  2. # Collect all websites and social media for MEPs based on https://www.europarl.europa.eu/meps/en/full-list/all
  3. # Writes to several file descriptors:
  4. # - Info about what it's doing to stderr
  5. # - Extracted URLs to FD 3
  6. # - Warnings about EP Newshub links to FD 4
  7. # https://unix.stackexchange.com/a/206848
  8. if ! { >&3; } 2>/dev/null
  9. then
  10. echo "Error: FD 3 not open" >&1
  11. exit 1
  12. fi
  13. if ! { >&4; } 2>/dev/null
  14. then
  15. echo "Error: FD 4 not open" >&1
  16. exit 1
  17. fi
  18. scriptpath="$(cd "$(dirname "$0")"; pwd -P)"
  19. export PATH="${scriptpath}:${PATH}"
  20. echo "Fetching MEP list" >&1
  21. curl-archivebot-ua -s "https://www.europarl.europa.eu/meps/en/full-list/all" | \
  22. grep -Po '<a class="ep_content" href="\K/meps/en/\d+(?=")' | \
  23. while read -r profileUrl
  24. do
  25. profileUrl="https://www.europarl.europa.eu${profileUrl}"
  26. echo "Fetching ${profileUrl}" >&1
  27. profilePage="$(curl-archivebot-ua -sL "${profileUrl}")"
  28. mapfile -t urls < <(tr -d '\r\n' <<< "${profilePage}" | \
  29. grep -Po '<div class="ep-a_share ep-layout_socialnetwok">.*?</ul>' | \
  30. grep -Po '<a\s+([^>]*\s+)?href="\K(?!mailto:)[^"]+')
  31. # Classification
  32. for url in "${urls[@]}"
  33. do
  34. if [[ "${url}" =~ //((www|[a-z][a-z]-[a-z][a-z])\.)?facebook\.com/ ]]
  35. then
  36. echo "Facebook: ${url}"
  37. elif [[ "${url}" =~ //(www\.)?instagram\.com/ ]]
  38. then
  39. echo "Instagram: ${url}"
  40. elif [[ "${url}" =~ //(www\.)?twitter\.com/ ]]
  41. then
  42. echo "Twitter: ${url}"
  43. elif [[ "${url}" =~ //([^/]+\.)?youtube\.com/ || "${url}" =~ //youtu\.be/ ]]
  44. then
  45. echo "YouTube: ${url}"
  46. else
  47. echo "Other: ${url}"
  48. fi
  49. done >&3
  50. # Check if there's a newshub mention and print a warning about that if necessary
  51. if grep -q 'container_header_newshub' <<< "${profilePage}"
  52. then
  53. echo "Has EP Newshub link: ${profileUrl}" >&4
  54. fi
  55. done