From 1830d67283d275cc665f62f35ae885f55a302c38 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Wed, 1 Jun 2022 04:05:05 +0000 Subject: [PATCH] Add ia-cdx-search-subdomains --- ia-cdx-search-subdomains | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 ia-cdx-search-subdomains diff --git a/ia-cdx-search-subdomains b/ia-cdx-search-subdomains new file mode 100755 index 0000000..8971c23 --- /dev/null +++ b/ia-cdx-search-subdomains @@ -0,0 +1,15 @@ +#!/bin/bash +set -euo pipefail + +if [[ $# -eq 0 || "$1" == '-h' || "$1" == '--help' ]] +then + echo "Usage: ia-cdx-search-subdomains [OPTIONS] DOMAIN" >&2 + echo "Extracts all known subdomains from IA's CDX API, printing unique subdomains to stdout" >&2 + echo "Any options are passed through to ia-cdx-search; use this to customise parallelism or resume after errors" >&2 + exit 1 +fi + +domain="${@: -1}" +set -- "${@:1:$(($#-1))}" +scriptpath="$(cd "$(dirname "$0")"; pwd -P)" +"${scriptpath}/ia-cdx-search" "$@" "url=${domain}&collapse=urlkey&fl=original&matchType=domain" | grep -o '^[^/]*//[^/]*' | sed 's,^.*/,,; s,:[0-9]\+$,,' | "${scriptpath}/uniqify"