The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

32 lines
1.1 KiB

  1. #!/bin/bash
  2. # Search 4chan archives based on FoolFuuka
  3. # Searches each board individually to get as much content as possible due to the 5000 results limit
  4. # Output: one post per line in HTML
  5. domain="$1"
  6. q="$2"
  7. curl -s "https://${domain}/" | grep -Po 'href="(https?://'"$(sed 's/[]\.|$(){}?+*^]/\\&/g' <<<"${domain}")"')?/\K[^/]+(?=/")' | awk '!seen[$0]++' | while read -r board
  8. do
  9. content=$(curl -s "https://${domain}/${board}/search/text/${q}/")
  10. if grep -qP '<h3 class="section_title">.*Returning only' <<<"${content}"
  11. then
  12. echo "Warning: only 5000 results!" >&2
  13. fi
  14. declare -i page=1
  15. while [[ ${page} -lt 201 ]]
  16. do
  17. echo "Grabbing https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
  18. content=$(curl -s "https://${domain}/${board}/search/text/${q}/page/${page}/")
  19. if grep -qF '<div class="alert"' <<<"${content}"
  20. then
  21. if ! tr -d '\n' <<<"${content}" | grep -Po '<div class="alert".*?</div>' | grep -q 'No results found'
  22. then
  23. echo "Error on https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
  24. fi
  25. break
  26. fi
  27. tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>'
  28. page+=1
  29. done
  30. done