Browse Source

Separate names in stderr annotations for the various url-normalise processes

master
JustAnotherArchivist 4 years ago
parent
commit
869ade27eb
1 changed files with 7 additions and 5 deletions
  1. +7
    -5
      wiki-recursive-extract-normalise

+ 7
- 5
wiki-recursive-extract-normalise View File

@@ -26,13 +26,15 @@ function verbose_echo {
} }


function stderr_annotate { function stderr_annotate {
name="${1##*/}"
name="$1"
shift
if [[ "${name}" == '' ]]; then name="${1##*/}"; fi
"$@" 2> >(while read -r line; do echo "[${name}] ${line}"; done >&2) "$@" 2> >(while read -r line; do echo "[${name}] ${line}"; done >&2)
} }


scriptpath="$(cd "$(dirname "$0")"; pwd -P)" scriptpath="$(cd "$(dirname "$0")"; pwd -P)"
declare -A sectionUrls declare -A sectionUrls
stderr_annotate "${scriptpath}/url-normalise" ${verbose} | while read -r line
stderr_annotate 'url-normalise/before' "${scriptpath}/url-normalise" ${verbose} | while read -r line
do do
echo "${line}" echo "${line}"
if [[ "${line}" == '=='* ]] if [[ "${line}" == '=='* ]]
@@ -58,9 +60,9 @@ do


if grep -Pq '//([^/]+\.)?(facebook\.com|flickr\.com|instagram\.com|twitter\.com|vk\.com|youtube\.com|youtu\.be)/' <<<"${curUrl}" if grep -Pq '//([^/]+\.)?(facebook\.com|flickr\.com|instagram\.com|twitter\.com|vk\.com|youtube\.com|youtu\.be)/' <<<"${curUrl}"
then then
mapfile -t outUrls < <(stderr_annotate "${scriptpath}/social-media-extract-profile-link" ${verbose} "${curUrl}" < <(:) | stderr_annotate "${scriptpath}/url-normalise" ${verbose})
mapfile -t outUrls < <(stderr_annotate '' "${scriptpath}/social-media-extract-profile-link" ${verbose} "${curUrl}" < <(:) | stderr_annotate 'url-normalise/post-social' "${scriptpath}/url-normalise" ${verbose})
else else
mapfile -t outUrls < <(stderr_annotate "${scriptpath}/website-extract-social-media" ${verbose} "${curUrl}" < <(:) | stderr_annotate "${scriptpath}/url-normalise" ${verbose})
mapfile -t outUrls < <(stderr_annotate '' "${scriptpath}/website-extract-social-media" ${verbose} "${curUrl}" < <(:) | stderr_annotate 'url-normalise/post-web' "${scriptpath}/url-normalise" ${verbose})
fi fi


for outUrl in "${outUrls[@]}" for outUrl in "${outUrls[@]}"
@@ -78,4 +80,4 @@ do
done done
done done
fi fi
done | stderr_annotate "${scriptpath}/url-normalise" ${verbose}
done | stderr_annotate 'url-normalise/after' "${scriptpath}/url-normalise" ${verbose}

Loading…
Cancel
Save