Fix 'binary' lines

1 年之前 · e3380e6e2a
--- a/+ 3
+++ b/+ 3
@@ -8,7 +8,7 @@
 	tr '\r\n' '  ' | tr '<' '\n' |

 	# Extract tags of interest
 	grep -i '^\(a\|base\|img\|link\|script\)\s' |
 	grep -ai '^\(a\|base\|img\|link\|script\)\s' |

 	# Fix scripty backslash nonsense
 	perl -pe 's,\\,,g' |
@@ -26,13 +26,13 @@
 	         " |

 	# Filter out unprocessed lines
 	grep '^+' | sed 's,^+,,' |
 	grep -a '^+' | sed 's,^+,,' |

 	# Remove quotes from attribute values
 	perl -pe "s,^([a-zA-Z]+) (['\"])(.*)\2$,\1 \3," |

 	# Filter out lines without an attribute value
 	grep -Pv '^[a-zA-Z]+ $' |
 	grep -Pva '^[a-zA-Z]+ $' |

 	# img srcset splitting
 	python3 -c 'import re, sys'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' try:'$'\n''  tag, value = l.split(" ", 1)'$'\n''  tag = tag.lower()'$'\n''  if tag != "imgsrcset":'$'\n''   print(l); continue'$'\n''  for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n''   if url: print(f"img {url}")'$'\n'' except BrokenPipeError: break' |