@@ -0,0 +1,45 @@ | |||
#!/usr/bin/env python3 | |||
# The SHA1 UUID stuff in Ruby is actually more complicated. Everything's right until the `head -c32`, but then Ruby transforms it into an integer in a quite peculiar way: https://github.com/sporkmonger/uuidtools/blob/a10724236cefd922ee5cd3de7695fb6e5fd703f5/lib/uuidtools.rb#L480-L494 | |||
# Ruby code: ArchiveBot lib/job.rb + https://github.com/sporkmonger/uuidtools/blob/a10724236cefd922ee5cd3de7695fb6e5fd703f5/lib/uuidtools.rb#L688-L691 | |||
# Takes the SHA-1 hash of the namespace (as raw bytes) and the name, truncates it to 32 hex chars, creates a new UUID from it, transforms two fields, converts it to a bigint, and formats it in base-36 | |||
# sed/sha1sum/head/bash-based version missing the time_hi_and_version and clock_seq_hi_and_reserved modification | |||
#{ echo -n '82244de1-c354-4c89-bf2b-f153ce23af43' | sed 's,-,,g' | xxd -r -p; echo -n 'https://transfer.notkiska.pw/sDu6C/marwilliamson-twitter.txt'; } | sha1sum | head -c32 | { read -r hash; BASE36=($(echo {0..9} {a..z})); for i in $(bc <<< "obase=32; ibase=16; ${hash^^}" | tr -d '\\\n'); do echo -n ${BASE36[$((10#$i))]}; done; }; echo | |||
import hashlib | |||
import sys | |||
import uuid | |||
url = sys.argv[1] # Assume that it's normalised already | |||
# Calculate hash | |||
h = hashlib.sha1() | |||
h.update(bytes.fromhex('82244de1-c354-4c89-bf2b-f153ce23af43'.replace('-', ''))) | |||
h.update(url.encode('ascii')) | |||
h = h.hexdigest() | |||
# Create and transform UUID object | |||
u = uuid.UUID(h[:32]) | |||
f = list(u.fields) | |||
f[2] &= 0x0FFF | |||
f[2] |= (5 << 12) | |||
f[3] &= 0x3F; | |||
f[3] |= 0x80; | |||
# Turn it into an int | |||
#i = (f[0] << 96) + (f[1] << 80) + (f[2] << 64) + (f[3] << 56) + (f[4] << 48) + f[5] | |||
i = uuid.UUID(fields = f).int | |||
# Convert to base-36 | |||
def int_to_base36(num): | |||
# https://stackoverflow.com/a/31746873 | |||
assert num >= 0 | |||
digits = '0123456789abcdefghijklmnopqrstuvwxyz' | |||
res = '' | |||
while not res or num > 0: | |||
num, i = divmod(num, 36) | |||
res = digits[i] + res | |||
return res | |||
print(int_to_base36(i)) |
@@ -1,6 +1,6 @@ | |||
#!/bin/bash | |||
# Extract from stdin social media usernames suitable for snscrape, grouped by service | |||
grep -Po '(https?://www\.\K(facebook|instagram)\.com/\S+(?=/)|https?://\Ktwitter\.com/\S+)' | | |||
grep -Po '(https?://www\.\Kfacebook\.com/(?!pages/)\S+(?=/)|https?://www\.\Kinstagram\.com/\S+(?=/)|https?://\Ktwitter\.com/\S+)' | | |||
sed 's,\.com/, ,' | | |||
sort | | |||
awk ' | |||
@@ -0,0 +1,10 @@ | |||
#!/bin/bash | |||
scriptpath="$(cd "$(dirname "$0")"; pwd -P)" | |||
"${scriptpath}/snscrape-extract-usernames" | while read -r service line | |||
do | |||
if [[ "${service}" == "facebook:" || "${service}" == "instagram:" || "${service}" == "twitter:" ]] | |||
then | |||
echo "for user in ${line}; do $(printf "%q" "${scriptpath}")/snscrape-${service:0:-1}-user "'"${user}"; done' | |||
fi | |||
done |
@@ -0,0 +1,23 @@ | |||
#!/bin/bash | |||
mkdir -p /tmp/snscrape | |||
echo 'snscrape-dev' > /tmp/snscrape/.python-version | |||
scriptpath="$(cd "$(dirname "$0")"; pwd -P)" | |||
export PATH="${scriptpath}:${PATH}" | |||
cd /tmp/snscrape | |||
tmux new -s snscrape \ | |||
-n "normalise" 'printf "\033]2;%s\033\\" "normalise"; bash' \; \ | |||
send-keys -t 'snscrape:normalise' 'pyenv_setup' Enter '# xclip -selection c -o | snscrape-normalise' Enter \; \ | |||
new-window -n "prepare" 'printf "\033]2;%s\033\\" "prepare"; bash' \; \ | |||
send-keys -t 'snscrape:prepare' '# xclip -selection c -o | snscrape-prepare-commands' Enter \; \ | |||
new-window -n "scrape" 'printf "\033]2;%s\033\\" "scrape-facebook"; bash' \; \ | |||
send-keys -t 'snscrape:scrape.1' 'pyenv_setup' Enter '# facebook' Enter \; \ | |||
split-window -v 'printf "\033]2;%s\033\\" "scrape-instagram"; bash' \; \ | |||
send-keys -t 'snscrape:scrape.2' 'pyenv_setup' Enter '# instagram' Enter \; \ | |||
split-window -v 'printf "\033]2;%s\033\\" "scrape-twitter"; bash' \; \ | |||
send-keys -t 'snscrape:scrape.3' 'pyenv_setup' Enter '# twitter' Enter \; \ | |||
select-layout -t 'snscrape:scrape' even-vertical \; \ | |||
new-window -n "upload" 'printf "\033]2;%s\033\\" "upload"; bash' \; \ | |||
send-keys -t 'snscrape:upload' 'pyenv_setup' Enter '# snscrape-upload' Enter \; \ | |||
new-window -n "merge" 'printf "\033]2;%s\033\\" "merge"; bash' \; \ | |||
send-keys -t 'snscrape:merge' '# snscrape-wiki-transfer-merge' Enter \; \ | |||
new-window -n "cleanup" 'printf "\033]2;%s\033\\" "cleanup"; bash' |
@@ -1,7 +1,7 @@ | |||
#!/bin/bash | |||
scriptpath="$(cd "$(dirname "$0")"; pwd -P)" | |||
insta= | |||
instagramUrls=() | |||
if [[ -e transfer ]] | |||
then | |||
@@ -9,6 +9,12 @@ then | |||
exit 1 | |||
fi | |||
if ! python3 --version &>/dev/null | |||
then | |||
echo "Error: python3 not found" >&2 | |||
exit 1 | |||
fi | |||
# ArchiveBot | |||
for f in "$@" | |||
do | |||
@@ -23,12 +29,21 @@ do | |||
elif [[ "${f}" == instagram-* ]] | |||
then | |||
echo "!a < ${upurl}" | |||
insta=1 | |||
instagramUrls+=("${upurl}") | |||
else | |||
echo "!ao < ${upurl}" | |||
fi | |||
done 3>transfer | |||
# Instagram ignores | |||
if [[ ${#instagramUrls[@]} -gt 0 ]] | |||
then | |||
for url in "${instagramUrls[@]}" | |||
do | |||
echo "!ig $("${scriptpath}/archivebot-jobid-calculation" "${url}") ^https?://www.instagram.com/.*[?&]hl=" | |||
done | |||
fi | |||
# chromebot | |||
for f in "$@" | |||
do | |||
@@ -41,9 +56,4 @@ do | |||
fi | |||
done | sed 's,^,chromebot: a ,' | |||
# Instagram ignore warning | |||
if [[ "${insta}" ]] | |||
then | |||
echo "Don't forget to add the Instagram ignore! ^https?://www.instagram.com/.*[?&]hl=" >&2 | |||
fi | |||
echo "Wrote ./transfer, you can run snscrape-wiki-transfer-merge now if ./wiki exists." >&2 |