@@ -0,0 +1,45 @@ | |||||
#!/usr/bin/env python3 | |||||
# The SHA1 UUID stuff in Ruby is actually more complicated. Everything's right until the `head -c32`, but then Ruby transforms it into an integer in a quite peculiar way: https://github.com/sporkmonger/uuidtools/blob/a10724236cefd922ee5cd3de7695fb6e5fd703f5/lib/uuidtools.rb#L480-L494 | |||||
# Ruby code: ArchiveBot lib/job.rb + https://github.com/sporkmonger/uuidtools/blob/a10724236cefd922ee5cd3de7695fb6e5fd703f5/lib/uuidtools.rb#L688-L691 | |||||
# Takes the SHA-1 hash of the namespace (as raw bytes) and the name, truncates it to 32 hex chars, creates a new UUID from it, transforms two fields, converts it to a bigint, and formats it in base-36 | |||||
# sed/sha1sum/head/bash-based version missing the time_hi_and_version and clock_seq_hi_and_reserved modification | |||||
#{ echo -n '82244de1-c354-4c89-bf2b-f153ce23af43' | sed 's,-,,g' | xxd -r -p; echo -n 'https://transfer.notkiska.pw/sDu6C/marwilliamson-twitter.txt'; } | sha1sum | head -c32 | { read -r hash; BASE36=($(echo {0..9} {a..z})); for i in $(bc <<< "obase=32; ibase=16; ${hash^^}" | tr -d '\\\n'); do echo -n ${BASE36[$((10#$i))]}; done; }; echo | |||||
import hashlib | |||||
import sys | |||||
import uuid | |||||
url = sys.argv[1] # Assume that it's normalised already | |||||
# Calculate hash | |||||
h = hashlib.sha1() | |||||
h.update(bytes.fromhex('82244de1-c354-4c89-bf2b-f153ce23af43'.replace('-', ''))) | |||||
h.update(url.encode('ascii')) | |||||
h = h.hexdigest() | |||||
# Create and transform UUID object | |||||
u = uuid.UUID(h[:32]) | |||||
f = list(u.fields) | |||||
f[2] &= 0x0FFF | |||||
f[2] |= (5 << 12) | |||||
f[3] &= 0x3F; | |||||
f[3] |= 0x80; | |||||
# Turn it into an int | |||||
#i = (f[0] << 96) + (f[1] << 80) + (f[2] << 64) + (f[3] << 56) + (f[4] << 48) + f[5] | |||||
i = uuid.UUID(fields = f).int | |||||
# Convert to base-36 | |||||
def int_to_base36(num): | |||||
# https://stackoverflow.com/a/31746873 | |||||
assert num >= 0 | |||||
digits = '0123456789abcdefghijklmnopqrstuvwxyz' | |||||
res = '' | |||||
while not res or num > 0: | |||||
num, i = divmod(num, 36) | |||||
res = digits[i] + res | |||||
return res | |||||
print(int_to_base36(i)) |
@@ -1,6 +1,6 @@ | |||||
#!/bin/bash | #!/bin/bash | ||||
# Extract from stdin social media usernames suitable for snscrape, grouped by service | # Extract from stdin social media usernames suitable for snscrape, grouped by service | ||||
grep -Po '(https?://www\.\K(facebook|instagram)\.com/\S+(?=/)|https?://\Ktwitter\.com/\S+)' | | |||||
grep -Po '(https?://www\.\Kfacebook\.com/(?!pages/)\S+(?=/)|https?://www\.\Kinstagram\.com/\S+(?=/)|https?://\Ktwitter\.com/\S+)' | | |||||
sed 's,\.com/, ,' | | sed 's,\.com/, ,' | | ||||
sort | | sort | | ||||
awk ' | awk ' | ||||
@@ -0,0 +1,10 @@ | |||||
#!/bin/bash | |||||
scriptpath="$(cd "$(dirname "$0")"; pwd -P)" | |||||
"${scriptpath}/snscrape-extract-usernames" | while read -r service line | |||||
do | |||||
if [[ "${service}" == "facebook:" || "${service}" == "instagram:" || "${service}" == "twitter:" ]] | |||||
then | |||||
echo "for user in ${line}; do $(printf "%q" "${scriptpath}")/snscrape-${service:0:-1}-user "'"${user}"; done' | |||||
fi | |||||
done |
@@ -0,0 +1,23 @@ | |||||
#!/bin/bash | |||||
mkdir -p /tmp/snscrape | |||||
echo 'snscrape-dev' > /tmp/snscrape/.python-version | |||||
scriptpath="$(cd "$(dirname "$0")"; pwd -P)" | |||||
export PATH="${scriptpath}:${PATH}" | |||||
cd /tmp/snscrape | |||||
tmux new -s snscrape \ | |||||
-n "normalise" 'printf "\033]2;%s\033\\" "normalise"; bash' \; \ | |||||
send-keys -t 'snscrape:normalise' 'pyenv_setup' Enter '# xclip -selection c -o | snscrape-normalise' Enter \; \ | |||||
new-window -n "prepare" 'printf "\033]2;%s\033\\" "prepare"; bash' \; \ | |||||
send-keys -t 'snscrape:prepare' '# xclip -selection c -o | snscrape-prepare-commands' Enter \; \ | |||||
new-window -n "scrape" 'printf "\033]2;%s\033\\" "scrape-facebook"; bash' \; \ | |||||
send-keys -t 'snscrape:scrape.1' 'pyenv_setup' Enter '# facebook' Enter \; \ | |||||
split-window -v 'printf "\033]2;%s\033\\" "scrape-instagram"; bash' \; \ | |||||
send-keys -t 'snscrape:scrape.2' 'pyenv_setup' Enter '# instagram' Enter \; \ | |||||
split-window -v 'printf "\033]2;%s\033\\" "scrape-twitter"; bash' \; \ | |||||
send-keys -t 'snscrape:scrape.3' 'pyenv_setup' Enter '# twitter' Enter \; \ | |||||
select-layout -t 'snscrape:scrape' even-vertical \; \ | |||||
new-window -n "upload" 'printf "\033]2;%s\033\\" "upload"; bash' \; \ | |||||
send-keys -t 'snscrape:upload' 'pyenv_setup' Enter '# snscrape-upload' Enter \; \ | |||||
new-window -n "merge" 'printf "\033]2;%s\033\\" "merge"; bash' \; \ | |||||
send-keys -t 'snscrape:merge' '# snscrape-wiki-transfer-merge' Enter \; \ | |||||
new-window -n "cleanup" 'printf "\033]2;%s\033\\" "cleanup"; bash' |
@@ -1,7 +1,7 @@ | |||||
#!/bin/bash | #!/bin/bash | ||||
scriptpath="$(cd "$(dirname "$0")"; pwd -P)" | scriptpath="$(cd "$(dirname "$0")"; pwd -P)" | ||||
insta= | |||||
instagramUrls=() | |||||
if [[ -e transfer ]] | if [[ -e transfer ]] | ||||
then | then | ||||
@@ -9,6 +9,12 @@ then | |||||
exit 1 | exit 1 | ||||
fi | fi | ||||
if ! python3 --version &>/dev/null | |||||
then | |||||
echo "Error: python3 not found" >&2 | |||||
exit 1 | |||||
fi | |||||
# ArchiveBot | # ArchiveBot | ||||
for f in "$@" | for f in "$@" | ||||
do | do | ||||
@@ -23,12 +29,21 @@ do | |||||
elif [[ "${f}" == instagram-* ]] | elif [[ "${f}" == instagram-* ]] | ||||
then | then | ||||
echo "!a < ${upurl}" | echo "!a < ${upurl}" | ||||
insta=1 | |||||
instagramUrls+=("${upurl}") | |||||
else | else | ||||
echo "!ao < ${upurl}" | echo "!ao < ${upurl}" | ||||
fi | fi | ||||
done 3>transfer | done 3>transfer | ||||
# Instagram ignores | |||||
if [[ ${#instagramUrls[@]} -gt 0 ]] | |||||
then | |||||
for url in "${instagramUrls[@]}" | |||||
do | |||||
echo "!ig $("${scriptpath}/archivebot-jobid-calculation" "${url}") ^https?://www.instagram.com/.*[?&]hl=" | |||||
done | |||||
fi | |||||
# chromebot | # chromebot | ||||
for f in "$@" | for f in "$@" | ||||
do | do | ||||
@@ -41,9 +56,4 @@ do | |||||
fi | fi | ||||
done | sed 's,^,chromebot: a ,' | done | sed 's,^,chromebot: a ,' | ||||
# Instagram ignore warning | |||||
if [[ "${insta}" ]] | |||||
then | |||||
echo "Don't forget to add the Instagram ignore! ^https?://www.instagram.com/.*[?&]hl=" >&2 | |||||
fi | |||||
echo "Wrote ./transfer, you can run snscrape-wiki-transfer-merge now if ./wiki exists." >&2 | echo "Wrote ./transfer, you can run snscrape-wiki-transfer-merge now if ./wiki exists." >&2 |