瀏覽代碼

Limit curl to 10 seconds

master
JustAnotherArchivist 4 年之前
父節點
當前提交
e385c1d302
共有 3 個檔案被更改,包括 5 行新增5 行删除
  1. +1
    -1
      social-media-extract-profile-link
  2. +3
    -3
      url-normalise
  3. +1
    -1
      website-extract-social-media

+ 1
- 1
social-media-extract-profile-link 查看文件

@@ -10,7 +10,7 @@ function verbose_echo {

function fetch {
verbose_echo "Fetching $1" >&2
curl -sL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1"
curl -sL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "$1"
}

function fetch_n_extract {


+ 3
- 3
url-normalise 查看文件

@@ -63,7 +63,7 @@ do
else
url="${url%%\?*}"
fi
page="$(curl -sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
page="$(curl -sL --max-time 10 -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')"
if [[ "${user}" ]]
then
@@ -94,7 +94,7 @@ do
url="${url%%\?*}"
url="${url%/}"
unnormalisedUser="${url##*/}"
user="$(curl -sL -A "${userAgent}" "https://twitter.com/${unnormalisedUser}" | grep -Po '<a class="([^"]*\s)?ProfileHeaderCard-screennameLink(\s[^"]*)?" href="/\K[^/"]+(?=")')"
user="$(curl -sL --max-time 10 -A "${userAgent}" "https://twitter.com/${unnormalisedUser}" | grep -Po '<a class="([^"]*\s)?ProfileHeaderCard-screennameLink(\s[^"]*)?" href="/\K[^/"]+(?=")')"
if [[ "${user}" ]]
then
echo "${prefix}https://twitter.com/${user}"
@@ -123,7 +123,7 @@ do
else
rurl="${url}?disable_polymer=1"
fi
page="$(curl -4sL -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "${rurl}")"
page="$(curl -4sL --max-time 10 -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "${rurl}")"
canonical="$(grep -Po '<link itemprop="url" href="http://www\.youtube\.com/\Kuser/[^"]+' <<< "${page}")"
if [[ "${canonical}" ]]
then


+ 1
- 1
website-extract-social-media 查看文件

@@ -5,7 +5,7 @@ function fetch_n_extract {
local url="$1"
verbose_echo "Fetching ${url}" >&2
{
curl -sSL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
curl -sSL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \
tee \
>(


Loading…
取消
儲存