#!/bin/bash
# Search 4chan archives based on FoolFuuka
# Searches each board individually to get as much content as possible due to the 5000 results limit
# Output: one post per line in HTML
domain="$1"
q="$2"
curl -s "https://${domain}/" | grep -Po 'href="(https?://'"$(sed 's/[]\.|$(){}?+*^]/\\&/g' <<<"${domain}")"')?/\K[^/]+(?=/")' | awk '!seen[$0]++' | while read -r board
do
content=$(curl -s "https://${domain}/${board}/search/text/${q}/")
if grep -qP '
.*Returning only' <<<"${content}"
then
echo "Warning: only 5000 results!" >&2
fi
declare -i page=1
while [[ ${page} -lt 201 ]]
do
echo "Grabbing https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
content=$(curl -s "https://${domain}/${board}/search/text/${q}/page/${page}/")
if grep -qF '' | grep -q 'No results found'
then
echo "Error on https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
fi
break
fi
tr -d '\n' <<<"${content}" | grep -Po '