Browse Source

Handle 
 and 

master
JustAnotherArchivist 10 months ago
parent
commit
ddc9dc6b44
1 changed files with 1 additions and 1 deletions
  1. +1
    -1
      html-extract-stupid

+ 1
- 1
html-extract-stupid View File

@@ -41,7 +41,7 @@
python3 -c 'import os, re, sys'$'\n''try:'$'\n'' for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if tag != "imgsrcset":'$'\n'' print(l); continue'$'\n'' for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n'' if url: print(f"img {url}")'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)' |

# Decode HTML references
python3 -c 'import html, os, sys'$'\n''try:'$'\n'' for l in sys.stdin:'$'\n'' print(html.unescape(l.strip()))'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)' |
python3 -c 'import html, os, sys'$'\n''try:'$'\n'' for l in sys.stdin:'$'\n'' print(html.unescape(l.strip()).split("\r", 1)[0].split("\n", 1)[0])'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)' |

# Combine base and values to get absolute URLs
# If multiple base tags are present, they all get respected. This violates the HTML specs.


Loading…
Cancel
Save