diff --git a/bing-scrape b/bing-scrape
new file mode 100644
index 0000000..c4719ba
--- /dev/null
+++ b/bing-scrape
@@ -0,0 +1,16 @@
+#!/bin/bash
+q="$1"
+declare -i max=10000
+if [[ $# -eq 2 ]]; then max=$2; fi
+{
+ declare -i first=1
+ queryStr="q=${q}"
+ while [[ ${first} -lt ${max} ]]
+ do
+ echo "http://www.bing.com/search?${queryStr}" >&2
+ curl -s -A 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0' "http://www.bing.com/search?${queryStr}"
+ first+=10
+ queryStr="q=${q}&go=Search&qs=ds&first=${first}&FORM=PORE"
+ sleep 2
+ done
+} | grep -Po '
.*?' | grep -Po 'href="\Khttps?://(?!www\.microsofttranslator\.com/|view\.officeapps\.live\.com/)[^"]+' | awk '!seen[$0]++'
diff --git a/foolfuuka-search b/foolfuuka-search
new file mode 100644
index 0000000..8276d4e
--- /dev/null
+++ b/foolfuuka-search
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Search 4chan archives based on FoolFuuka
+# Searches each board individually to get as much content as possible due to the 5000 results limit
+# Output: one post per line in HTML
+domain="$1"
+q="$2"
+curl -s "https://${domain}/" | grep -Po 'href="(https?://'"$(sed 's/[]\.|$(){}?+*^]/\\&/g' <<<"${domain}")"')?/\K[^/]+(?=/")' | awk '!seen[$0]++' | while read -r board
+do
+ content=$(curl -s "https://${domain}/${board}/search/text/${q}/")
+ if grep -qP '.*Returning only' <<<"${content}"
+ then
+ echo "Warning: only 5000 results!" >&2
+ fi
+
+ declare -i page=1
+ while [[ ${page} -lt 201 ]]
+ do
+ echo "Grabbing https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
+ content=$(curl -s "https://${domain}/${board}/search/text/${q}/page/${page}/")
+ if grep -qF '' | grep -q 'No results found'
+ then
+ echo "Error on https://${domain}/${board}/search/text/${q}/page/${page}/" >&2
+ fi
+ break
+ fi
+ tr -d '\n' <<<"${content}" | grep -Po '
"${pipe}"; rm "${pipe}"; unset pipe
+while :
+do
+ {
+ if [[ "${mode}" == "comment" ]]
+ then
+ curl -s "https://api.pushshift.io/reddit/search/comment/?q=${q}&size=500&fields=author,body,created_utc,link_id,parent_id,permalink&before=${before}" | python3 -c 'import json,sys'$'\n''for d in json.loads(sys.stdin.read())["data"]:'$'\n'' print("%d %s %r" % (d["created_utc"], d["permalink"] if "permalink" in d else d["parent_id"] + "/" + d["link_id"] + "/" + d["author"], d["body"]))'
+ else
+ curl -s "https://api.pushshift.io/reddit/search/submission/?q=${q}&size=500&fields=author,created_utc,id,is_self,permalink,selftext,url&before=${before}" | python3 -c 'import json,sys'$'\n''for d in json.loads(sys.stdin.read())["data"]:'$'\n'' print("%d %s %s %s" % (d["created_utc"], d["permalink"], d["url"] if not d["is_self"] else "None", repr(d["selftext"]) if "selftext" in d else "None"))'
+ fi
+ } | awk 'BEGIN { timestamp = 0; } { timestamp=$1; print; } END { print timestamp >"/dev/fd/3" }'
+ before=$(head -1 <&3)
+ if [[ ${before} -eq 0 ]] # No data returned by Pushshift
+ then
+ break
+ fi
+done