Browse Source

Add moinmoin-url-list

master
JustAnotherArchivist 4 months ago
parent
commit
2ccf28eb43
1 changed files with 28 additions and 0 deletions
  1. +28
    -0
      moinmoin-url-list

+ 28
- 0
moinmoin-url-list View File

@@ -0,0 +1,28 @@
#!/bin/bash
if [[ $# -ne 1 || "$1" != http* ]]; then
printf 'Usage: %q FRONT_PAGE_URL\n' "$0" >&2
exit 1
fi

url="$1"
if [[ "${url}" == *\?* ]]; then
printf 'Error: URLs with query strings not supported\n' >&2
exit 1
fi
url="${url%/*}"

curl "${url}/TitleIndex" | \
grep -A 1000000 -F 'href="/TitleIndex?allpages=1"' | \
grep -B 1000000 -F 'id="pagebottom"' | \
grep -Po 'href="\K[^"]+' | \
grep -Fxv '/TitleIndex?allpages=1' | \
sed 's,\?action=AttachFile$,,' | \
perl -ne 'print if ! $a{$_}++' | \
awk -v url="${url}" \
'{
print url $0;
print url $0 "?action=info";
print url $0 "?action=info&general=1";
print url $0 "?action=raw";
print url $0 "?action=AttachFile";
}'

Loading…
Cancel
Save