From 138c2a2d39420a304babee2b2cd64e06d0a4af95 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Thu, 18 Apr 2019 17:09:24 +0000 Subject: [PATCH] Get rid of post-processing now that snscrape (dev version) has clean URLs Keep the dirty URLs on Instagram because they're not that dirty and are linked from the profile pages. I usually throw it into ArchiveBot anyway such that it grabs the non-"taken-by" URLs as well. --- snscrape-facebook-user | 5 +---- snscrape-instagram-user | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/snscrape-facebook-user b/snscrape-facebook-user index 0fcbbca..df2144d 100755 --- a/snscrape-facebook-user +++ b/snscrape-facebook-user @@ -6,9 +6,6 @@ then if [[ "${user}" != "${origUser}" ]]; then echo "Username fix: ${origUser} -> ${user}" >&2; fi { echo "https://www.facebook.com/${user}/" - snscrape -v facebook-user "${user}" | \ - perl -pe 's,[?&]\K__xts__%5B0%5D=[^&]+?(&|$),,; s,[?&]\K__tn__=[^&]+?(&|$),,; s,[?&]\Keid=[^&]+?(&|$),,; s,[?&]$,,;' | \ - awk '{print} /\?type=/ {print substr($0, 1, index($0, "?type=") - 1)}' | \ - awk '!seen[$0]++' + snscrape -v facebook-user "${user}" } > "facebook-@${user}" fi diff --git a/snscrape-instagram-user b/snscrape-instagram-user index 35a9277..e314d2f 100755 --- a/snscrape-instagram-user +++ b/snscrape-instagram-user @@ -1,2 +1,2 @@ #!/bin/bash -user="$1"; { echo "https://www.instagram.com/${user}/"; snscrape -v instagram-user "${user}" | awk '{print} /\?taken-by/ {print substr($0, 1, index($0, "?taken-by") - 1)}'; } > "instagram-@${user}" +user="$1"; { echo "https://www.instagram.com/${user}/"; snscrape -v --format '{dirtyUrl}' instagram-user "${user}"; } > "instagram-@${user}"