Fix handling of invalid UTF-8 input

1 year ago · a07c2b2374
--- a/+ 4
+++ b/+ 4
@@ -113,9 +113,7 @@ for e in matchers:
 	e[0] = re.compile(e[0])

 for origLine in sys.stdin.buffer:
 	origLine = origLine.decode('utf-8', 'surrogateescape')
 	origLine = origLine.strip()
 	line = re.sub(r'https?://', '//', origLine)
 	line = re.sub(r'https?://', '//', origLine.strip().decode('utf-8', 'surrogateescape'))
 	line = domainPattern.sub('/www.youtube.com/', line)
 	decodedLine = percentdecode(line)
 	hadMatches = False
@@ -129,8 +127,9 @@ for origLine in sys.stdin.buffer:
 			results.add(r)
 			if r is None:
 				break
 			print(r)
 			sys.stdout.buffer.write(r.encode('utf-8', 'surrogateescape'))
 			sys.stdout.buffer.write(b'\n')
 		if None in results:
 			break
 	if not hadMatches:
 		print(origLine, file = sys.stderr)
 		sys.stderr.buffer.write(origLine)