The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

32 lignes
1.1 KiB

  1. #!/bin/bash
  2. # Search 4chan archives based on FoolFuuka
  3. # Searches each board individually to get as much content as possible due to the 5000 results limit
  4. # Output: one post per line in HTML
  5. domain="$1"
  6. q="$2"
  7. curl -s "https://${domain}/" | grep -Po 'href="(https?://'"$(sed 's/[]\.|$(){}?+*^]/\\&/g' <<<"${domain}")"')?/\K[^/]+(?=/")' | awk '!seen[$0]++' | while read -r board
  8. do
  9. content=$(curl -s "https://${domain}/${board}/search/text/${q}/")
  10. if grep -qP '<h3 class="section_title">.*Returning only' <<<"${content}"
  11. then
  12. echo "Warning: only 5000 results!" >&2
  13. fi
  14. declare -i page=1
  15. while [[ ${page} -lt 201 ]]
  16. do
  17. echo "Grabbing https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
  18. content=$(curl -s "https://${domain}/${board}/search/text/${q}/page/${page}/")
  19. if grep -qF '<div class="alert"' <<<"${content}"
  20. then
  21. if ! tr -d '\n' <<<"${content}" | grep -Po '<div class="alert".*?</div>' | grep -q 'No results found'
  22. then
  23. echo "Error on https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
  24. fi
  25. break
  26. fi
  27. tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>'
  28. page+=1
  29. done
  30. done