Explorar el Código

Fix handling of invalid UTF-8 input

master
JustAnotherArchivist hace 1 año
padre
commit
a07c2b2374
Se han modificado 1 ficheros con 4 adiciones y 5 borrados
  1. +4
    -5
      youtube-extract

+ 4
- 5
youtube-extract Ver fichero

@@ -113,9 +113,7 @@ for e in matchers:
e[0] = re.compile(e[0])

for origLine in sys.stdin.buffer:
origLine = origLine.decode('utf-8', 'surrogateescape')
origLine = origLine.strip()
line = re.sub(r'https?://', '//', origLine)
line = re.sub(r'https?://', '//', origLine.strip().decode('utf-8', 'surrogateescape'))
line = domainPattern.sub('/www.youtube.com/', line)
decodedLine = percentdecode(line)
hadMatches = False
@@ -129,8 +127,9 @@ for origLine in sys.stdin.buffer:
results.add(r)
if r is None:
break
print(r)
sys.stdout.buffer.write(r.encode('utf-8', 'surrogateescape'))
sys.stdout.buffer.write(b'\n')
if None in results:
break
if not hadMatches:
print(origLine, file = sys.stderr)
sys.stderr.buffer.write(origLine)

Cargando…
Cancelar
Guardar