Browse Source

Limit curl to 10 seconds

master
JustAnotherArchivist 4 years ago
parent
commit
e385c1d302
3 changed files with 5 additions and 5 deletions
  1. +1
    -1
      social-media-extract-profile-link
  2. +3
    -3
      url-normalise
  3. +1
    -1
      website-extract-social-media

+ 1
- 1
social-media-extract-profile-link View File

@@ -10,7 +10,7 @@ function verbose_echo {

function fetch {
verbose_echo "Fetching $1" >&2
curl -sL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1"
curl -sL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1"
}

function fetch_n_extract {


+ 3
- 3
url-normalise View File

@@ -63,7 +63,7 @@ do
else
url="${url%%\?*}"
fi
page="$(curl -sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
page="$(curl -sL --max-time 10 -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')"
if [[ "${user}" ]]
then
@@ -94,7 +94,7 @@ do
url="${url%%\?*}"
url="${url%/}"
unnormalisedUser="${url##*/}"
user="$(curl -sL -A "${userAgent}" "https://twitter.com/${unnormalisedUser}" | grep -Po '<a class="([^"]*\s)?ProfileHeaderCard-screennameLink(\s[^"]*)?" href="/\K[^/"]+(?=")')"
user="$(curl -sL --max-time 10 -A "${userAgent}" "https://twitter.com/${unnormalisedUser}" | grep -Po '<a class="([^"]*\s)?ProfileHeaderCard-screennameLink(\s[^"]*)?" href="/\K[^/"]+(?=")')"
if [[ "${user}" ]]
then
echo "${prefix}https://twitter.com/${user}"
@@ -123,7 +123,7 @@ do
else
rurl="${url}?disable_polymer=1"
fi
page="$(curl -4sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "${rurl}")"
page="$(curl -4sL --max-time 10 -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "${rurl}")"
canonical="$(grep -Po '<link itemprop="url" href="http://www\.youtube\.com/\Kuser/[^"]+' <<< "${page}")"
if [[ "${canonical}" ]]
then


+ 1
- 1
website-extract-social-media View File

@@ -5,7 +5,7 @@ function fetch_n_extract {
local url="$1"
verbose_echo "Fetching ${url}" >&2
{
curl -sSL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
curl -sSL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \
tee \
>(


Loading…
Cancel
Save