Browse Source

Limit curl to 10 seconds

master
JustAnotherArchivist 4 years ago
parent
commit
e385c1d302
3 changed files with 5 additions and 5 deletions
  1. +1
    -1
      social-media-extract-profile-link
  2. +3
    -3
      url-normalise
  3. +1
    -1
      website-extract-social-media

+ 1
- 1
social-media-extract-profile-link View File

@@ -10,7 +10,7 @@ function verbose_echo {


function fetch { function fetch {
verbose_echo "Fetching $1" >&2 verbose_echo "Fetching $1" >&2
curl -sL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1"
curl -sL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1"
} }


function fetch_n_extract { function fetch_n_extract {


+ 3
- 3
url-normalise View File

@@ -63,7 +63,7 @@ do
else else
url="${url%%\?*}" url="${url%%\?*}"
fi fi
page="$(curl -sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
page="$(curl -sL --max-time 10 -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')" user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')"
if [[ "${user}" ]] if [[ "${user}" ]]
then then
@@ -94,7 +94,7 @@ do
url="${url%%\?*}" url="${url%%\?*}"
url="${url%/}" url="${url%/}"
unnormalisedUser="${url##*/}" unnormalisedUser="${url##*/}"
user="$(curl -sL -A "${userAgent}" "https://twitter.com/${unnormalisedUser}" | grep -Po '<a class="([^"]*\s)?ProfileHeaderCard-screennameLink(\s[^"]*)?" href="/\K[^/"]+(?=")')"
user="$(curl -sL --max-time 10 -A "${userAgent}" "https://twitter.com/${unnormalisedUser}" | grep -Po '<a class="([^"]*\s)?ProfileHeaderCard-screennameLink(\s[^"]*)?" href="/\K[^/"]+(?=")')"
if [[ "${user}" ]] if [[ "${user}" ]]
then then
echo "${prefix}https://twitter.com/${user}" echo "${prefix}https://twitter.com/${user}"
@@ -123,7 +123,7 @@ do
else else
rurl="${url}?disable_polymer=1" rurl="${url}?disable_polymer=1"
fi fi
page="$(curl -4sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "${rurl}")"
page="$(curl -4sL --max-time 10 -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "${rurl}")"
canonical="$(grep -Po '<link itemprop="url" href="http://www\.youtube\.com/\Kuser/[^"]+' <<< "${page}")" canonical="$(grep -Po '<link itemprop="url" href="http://www\.youtube\.com/\Kuser/[^"]+' <<< "${page}")"
if [[ "${canonical}" ]] if [[ "${canonical}" ]]
then then


+ 1
- 1
website-extract-social-media View File

@@ -5,7 +5,7 @@ function fetch_n_extract {
local url="$1" local url="$1"
verbose_echo "Fetching ${url}" >&2 verbose_echo "Fetching ${url}" >&2
{ {
curl -sSL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
curl -sSL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \ grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \
tee \ tee \
>( >(


Loading…
Cancel
Save