diff --git a/html-extract-stupid b/html-extract-stupid index b0b2ea1..3629676 100755 --- a/html-extract-stupid +++ b/html-extract-stupid @@ -32,7 +32,7 @@ perl -pe "s,^([a-zA-Z]+) (['\"])(.*)\2$,\1 \3," | # img srcset splitting - python3 -c 'import re, sys'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if tag != "imgsrcset":'$'\n'' print(l); continue'$'\n'' for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n'' if url: print(f"img {url}")' | + python3 -c 'import re, sys'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' try:'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if tag != "imgsrcset":'$'\n'' print(l); continue'$'\n'' for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n'' if url: print(f"img {url}")'$'\n'' except BrokenPipeError: break' | # Decode HTML references python3 -c 'import html, sys'$'\n''for l in sys.stdin:'$'\n'' try: print(html.unescape(l.strip()))'$'\n'' except BrokenPipeError: break' |