From e385c1d30285b3dd00e0492f63f6336b0c13000a Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 18 Nov 2019 01:40:42 +0000 Subject: [PATCH] Limit curl to 10 seconds --- social-media-extract-profile-link | 2 +- url-normalise | 6 +++--- website-extract-social-media | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/social-media-extract-profile-link b/social-media-extract-profile-link index be29f80..0896f44 100755 --- a/social-media-extract-profile-link +++ b/social-media-extract-profile-link @@ -10,7 +10,7 @@ function verbose_echo { function fetch { verbose_echo "Fetching $1" >&2 - curl -sL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1" + curl -sL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1" } function fetch_n_extract { diff --git a/url-normalise b/url-normalise index 982ae37..66ad505 100755 --- a/url-normalise +++ b/url-normalise @@ -63,7 +63,7 @@ do else url="${url%%\?*}" fi - page="$(curl -sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")" + page="$(curl -sL --max-time 10 -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")" user="$(grep -Po ']*(?<=\s)data-key\s*=\s*"tab_home".*?' <<< "${page}" | grep -Po ']*(?<=\s)href="/\K[^/]+')" if [[ "${user}" ]] then @@ -94,7 +94,7 @@ do url="${url%%\?*}" url="${url%/}" unnormalisedUser="${url##*/}" - user="$(curl -sL -A "${userAgent}" "https://twitter.com/${unnormalisedUser}" | grep -Po '&2 { - curl -sSL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \ + curl -sSL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \ grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \ tee \ >(