#!/bin/bash # Search 4chan archives based on FoolFuuka # Searches each board individually to get as much content as possible due to the 5000 results limit # Output: one post per line in HTML domain="$1" q="$2" curl -s "https://${domain}/" | grep -Po 'href="(https?://'"$(sed 's/[]\.|$(){}?+*^]/\\&/g' <<<"${domain}")"')?/\K[^/]+(?=/")' | awk '!seen[$0]++' | while read -r board do content=$(curl -s "https://${domain}/${board}/search/text/${q}/") if grep -qP '

.*Returning only' <<<"${content}" then echo "Warning: only 5000 results!" >&2 fi declare -i page=1 while [[ ${page} -lt 201 ]] do echo "Grabbing https://${domain}/${board}/search/text/${q}/page/${page}/" >&2 content=$(curl -s "https://${domain}/${board}/search/text/${q}/page/${page}/") if grep -qF '
' | grep -q 'No results found' then echo "Error on https://${domain}/${board}/search/text/${q}/page/${page}/" >&2 fi break fi tr -d '\n' <<<"${content}" | grep -Po '