Shield values in colons with angled brackets

3 jaren geleden · 4c90bacaed
--- a/+ 5
+++ b/+ 5
@@ -4,8 +4,9 @@
 # Operating modes:
 #  warc-tiny colour FILES  --  coloured output of the WARCs for easier reading
 #  warc-tiny dump-responses [-m|--meta] FILES  --  dump the HTTP response bodies to stdout
 #    With --meta, prefix every line with the filename, record offset, record ID, and target URI; e.g. 'file.warc.gz:123:<urn:uuid:41b76f1f-f946-4723-91f8-cee6491e92f3>:https://example.org/:    foobar'
 #    With --meta, prefix every line with the filename, record offset, record ID, and target URI; e.g. 'file.warc.gz:123:<urn:uuid:41b76f1f-f946-4723-91f8-cee6491e92f3>:<https://example.org/>:    foobar'
 #      The record offset may be -1 if it is not known.
 #      The filename is wrapped in angled brackets if it contains a colon; the target URI is always wrapped in angled brackets (since it virtually always contains a colon).
 #  warc-tiny verify FILES  --  verify the integrity of a WARC by comparing the digests

 import base64
@@ -309,13 +310,15 @@ class DumpResponsesMode(ProcessMode):
 		lines = buf.split(b'\n')
 		self._buffer = lines.pop() # Since there's an explicit `_write(b'\r\n')` at the end of the record, this implicitly resets the buffer as well
 		for line in lines:
 			sys.stdout.buffer.write(':'.join((self._filename, '-1', self._recordID, self._targetURI, '')).encode('utf-8'))
 			sys.stdout.buffer.write(':'.join((self._filename, '-1', self._recordID, '<' + self._targetURI + '>', '')).encode('utf-8'))
 			sys.stdout.buffer.write(line)
 			sys.stdout.buffer.write(b'\n')

 	def process_event(self, event):
 		if type(event) is NewFile:
 			self._filename = event.filename
 			if ':' in self._filename:
 				self._filename = '<' + self._filename + '>'
 		elif type(event) is BeginOfRecord:
 			warcContentType = next(x[1] for x in event.warcHeaders if x[0] == b'Content-Type')
 			warcType = next(x[1] for x in event.warcHeaders if x[0] == b'WARC-Type')