Browse Source

Get rid of post-processing now that snscrape (dev version) has clean URLs

Keep the dirty URLs on Instagram because they're not that dirty and are linked from the profile pages. I usually throw it into ArchiveBot anyway such that it grabs the non-"taken-by" URLs as well.
master
JustAnotherArchivist 5 years ago
parent
commit
138c2a2d39
2 changed files with 2 additions and 5 deletions
  1. +1
    -4
      snscrape-facebook-user
  2. +1
    -1
      snscrape-instagram-user

+ 1
- 4
snscrape-facebook-user View File

@@ -6,9 +6,6 @@ then
if [[ "${user}" != "${origUser}" ]]; then echo "Username fix: ${origUser} -> ${user}" >&2; fi
{
echo "https://www.facebook.com/${user}/"
snscrape -v facebook-user "${user}" | \
perl -pe 's,[?&]\K__xts__%5B0%5D=[^&]+?(&|$),,; s,[?&]\K__tn__=[^&]+?(&|$),,; s,[?&]\Keid=[^&]+?(&|$),,; s,[?&]$,,;' | \
awk '{print} /\?type=/ {print substr($0, 1, index($0, "?type=") - 1)}' | \
awk '!seen[$0]++'
snscrape -v facebook-user "${user}"
} > "facebook-@${user}"
fi

+ 1
- 1
snscrape-instagram-user View File

@@ -1,2 +1,2 @@
#!/bin/bash
user="$1"; { echo "https://www.instagram.com/${user}/"; snscrape -v instagram-user "${user}" | awk '{print} /\?taken-by/ {print substr($0, 1, index($0, "?taken-by") - 1)}'; } > "instagram-@${user}"
user="$1"; { echo "https://www.instagram.com/${user}/"; snscrape -v --format '{dirtyUrl}' instagram-user "${user}"; } > "instagram-@${user}"

Loading…
Cancel
Save