From 511405bbca511bd895132d90c83e5976e41aa4c7 Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Tue, 24 Jan 2023 07:41:34 +0000
Subject: [PATCH] Fix case sensitivity on img srcset processing

---
 html-extract-stupid | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/html-extract-stupid b/html-extract-stupid
index deb288a..b0b2ea1 100755
--- a/html-extract-stupid
+++ b/html-extract-stupid
@@ -14,7 +14,7 @@
 	perl -pe 's,\\,,g' |
 
 	# Split img tags with src and srcset
-	perl -pe "s,^img(?=\s(?:(?:[^>'\"]|\"[^\"]*\"|'[^']*')*\s)?src\s*=\s*([^>'\"\s]*|\"[^\"]*\"|'[^']*')(?:\s|>))\s(?:(?:[^>'\"]|\"[^\"]*\"|'[^']*')*\s)?srcset\s*=\s*([^>'\"\s]*|\"[^\"]*\"|'[^']*')(\s|>).*,img src=\1\nimg srcset=\2," |
+	perl -pe "s,^img(?=\s(?:(?:[^>'\"]|\"[^\"]*\"|'[^']*')*\s)?src\s*=\s*([^>'\"\s]*|\"[^\"]*\"|'[^']*')(?:\s|>))\s(?:(?:[^>'\"]|\"[^\"]*\"|'[^']*')*\s)?srcset\s*=\s*([^>'\"\s]*|\"[^\"]*\"|'[^']*')(\s|>).*,img src=\1\nimg srcset=\2,i" |
 
 	# Extract interesting tags/attributes
 	perl -pe "s,^(a|base)\s(?:(?:[^>'\"]|\"[^\"]*\"|'[^']*')*\s)?href\s*=\s*([^>'\"\s]*|\"[^\"]*\"|'[^']*')(\$|\s|>).*,+\1 \2,i;
@@ -32,7 +32,7 @@
 	perl -pe "s,^([a-zA-Z]+) (['\"])(.*)\2$,\1 \3," |
 
 	# img srcset splitting
-	python3 -c 'import re, sys'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' if tag != "imgsrcset":'$'\n''  print(l); continue'$'\n'' for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n''  if url: print(f"img {url}")' |
+	python3 -c 'import re, sys'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if tag != "imgsrcset":'$'\n''  print(l); continue'$'\n'' for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n''  if url: print(f"img {url}")' |
 
 	# Decode HTML references
 	python3 -c 'import html, sys'$'\n''for l in sys.stdin:'$'\n'' try: print(html.unescape(l.strip()))'$'\n'' except BrokenPipeError: break' |