The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

60 lignes
1.8 KiB

  1. #!/bin/bash
  2. # Collect all websites and social media for MEPs based on https://www.europarl.europa.eu/meps/en/full-list/all
  3. # Writes to several file descriptors:
  4. # - Info about what it's doing to stderr
  5. # - Extracted URLs to FD 3
  6. # - Warnings about EP Newshub links to FD 4
  7. # https://unix.stackexchange.com/a/206848
  8. if ! { >&3; } 2>/dev/null
  9. then
  10. echo "Error: FD 3 not open" >&1
  11. exit 1
  12. fi
  13. if ! { >&4; } 2>/dev/null
  14. then
  15. echo "Error: FD 4 not open" >&1
  16. exit 1
  17. fi
  18. scriptpath="$(cd "$(dirname "$0")"; pwd -P)"
  19. export PATH="${scriptpath}:${PATH}"
  20. echo "Fetching MEP list" >&1
  21. curl-archivebot-ua -s "https://www.europarl.europa.eu/meps/en/full-list/all" | \
  22. grep -Po '<a class="ep_content" href="\K/meps/en/\d+(?=")' | \
  23. while read -r profileUrl
  24. do
  25. profileUrl="https://www.europarl.europa.eu${profileUrl}"
  26. echo "Fetching ${profileUrl}" >&1
  27. profilePage="$(curl-archivebot-ua -sL "${profileUrl}")"
  28. mapfile -t urls < <(tr -d '\r\n' <<< "${profilePage}" | \
  29. grep -Po '<div class="ep-a_share ep-layout_socialnetwok">.*?</ul>' | \
  30. grep -Po '<a\s+([^>]*\s+)?href="\K(?!mailto:)[^"]+')
  31. # Classification
  32. for url in "${urls[@]}"
  33. do
  34. if [[ "${url}" =~ //((www|[a-z][a-z]-[a-z][a-z])\.)?facebook\.com/ ]]
  35. then
  36. echo "Facebook: ${url}"
  37. elif [[ "${url}" =~ //(www\.)?instagram\.com/ ]]
  38. then
  39. echo "Instagram: ${url}"
  40. elif [[ "${url}" =~ //(www\.)?twitter\.com/ ]]
  41. then
  42. echo "Twitter: ${url}"
  43. elif [[ "${url}" =~ //([^/]+\.)?youtube\.com/ || "${url}" =~ //youtu\.be/ ]]
  44. then
  45. echo "YouTube: ${url}"
  46. else
  47. echo "Other: ${url}"
  48. fi
  49. done >&3
  50. # Check if there's a newshub mention and print a warning about that if necessary
  51. if grep -q 'container_header_newshub' <<< "${profilePage}"
  52. then
  53. echo "Has EP Newshub link: ${profileUrl}" >&4
  54. fi
  55. done