The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets
Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
|
- #!/bin/bash
- # Search 4chan archives based on FoolFuuka
- # Searches each board individually to get as much content as possible due to the 5000 results limit
- # Output: one post per line in HTML
- domain="$1"
- q="$2"
- curl -s "https://${domain}/" | grep -Po 'href="(https?://'"$(sed 's/[]\.|$(){}?+*^]/\\&/g' <<<"${domain}")"')?/\K[^/]+(?=/")' | awk '!seen[$0]++' | while read -r board
- do
- content=$(curl -s "https://${domain}/${board}/search/text/${q}/")
- if grep -qP '<h3 class="section_title">.*Returning only' <<<"${content}"
- then
- echo "Warning: only 5000 results!" >&2
- fi
-
- declare -i page=1
- while [[ ${page} -lt 201 ]]
- do
- echo "Grabbing https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
- content=$(curl -s "https://${domain}/${board}/search/text/${q}/page/${page}/")
- if grep -qF '<div class="alert"' <<<"${content}"
- then
- if ! tr -d '\n' <<<"${content}" | grep -Po '<div class="alert".*?</div>' | grep -q 'No results found'
- then
- echo "Error on https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
- fi
- break
- fi
- tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>'
- page+=1
- done
- done
|