The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

37 lines
1.3 KiB

  1. #!/bin/bash
  2. # Search 4chan archives based on FoolFuuka
  3. # Output: one post per line in HTML, prefixed with the post ID
  4. # Note that posts can appear multiple times in the output in some cases. You're encouraged to filter based on the post ID.
  5. domain="$1"
  6. q="$2"
  7. end=
  8. nextend=2038-01-19
  9. while :
  10. do
  11. end="${nextend}"
  12. content=$(curl -s "https://${domain}/_/search/text/${q}/end/${end}/page/${page}/")
  13. declare -i page=1
  14. while [[ ${page} -lt 201 ]]
  15. do
  16. echo "Grabbing https://${domain}/_/search/text/${q}/end/${end}/page/${page}/" >&2
  17. content=$(curl -s "https://${domain}/_/search/text/${q}/end/${end}/page/${page}/")
  18. tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>' | perl -pe 's,^(.*?id="(\d+)".*$),\2 \1,'
  19. # Get last date seen to update end date; subtract one because the search appears to be a bit unreliable
  20. nextend="$(date --date="@$(($(date --date="$(tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>' | tail -1 | grep -Po '<time datetime="\K[^"]+')" '+%s') - 86400))" '+%Y-%m-%d')"
  21. if grep -qF '<div class="alert"' <<<"${content}"
  22. then
  23. if ! tr -d '\n' <<<"${content}" | grep -Po '<div class="alert".*?</div>' | grep -q 'No results found'
  24. then
  25. echo "Error" >&2
  26. break
  27. else
  28. break 2
  29. fi
  30. fi
  31. page+=1
  32. done
  33. done