Browse Source

Add record ID to log messages

master
JustAnotherArchivist 4 years ago
parent
commit
d4826abde2
1 changed files with 4 additions and 3 deletions
  1. +4
    -3
      warc-tiny

+ 4
- 3
warc-tiny View File

@@ -109,6 +109,7 @@ def iter_warc(f):
warcContentLength = int(next(x[1] for x in warcHeaders if x[0] == b'Content-Length'))
warcType = next(x[1] for x in warcHeaders if x[0] == b'WARC-Type')
yield BeginOfRecord(warcHeaders, warcHeaderBuf)
recordID = next(x[1] for x in warcHeaders if x[0] == b'WARC-Record-ID')

# Read WARC block (and skip CRLFCRLF at the end of the record)
if len(buf) < warcContentLength + 4:
@@ -158,7 +159,7 @@ def iter_warc(f):
try:
chunkLineEnd = httpBody.index(b'\r\n')
except ValueError:
print('Error: could not find chunk line end, skipping', file = sys.stderr)
print('Error: could not find chunk line end in record {}, skipping'.format(recordID), file = sys.stderr)
break
chunkLine = httpBody[:chunkLineEnd]
if b';' in chunkLine:
@@ -166,7 +167,7 @@ def iter_warc(f):
else:
chunkLength = chunkLine.strip()
if chunkLength.lstrip(b'0123456789abcdef') != b'':
print('Error: malformed chunk length, skipping', file = sys.stderr)
print('Error: malformed chunk length {!r} in record {}, skipping'.format(chunkLength, recordID), file = sys.stderr)
break
chunkLength = int(chunkLength, base = 16)
if chunkLength == 0:
@@ -177,7 +178,7 @@ def iter_warc(f):
else:
yield HTTPResponseBodyChunk(httpDecompressor.decompress(httpBody))
else:
print('Warning: malformed HTTP response, skipping', file = sys.stderr)
print('Warning: malformed HTTP response in record {}, skipping'.format(recordID), file = sys.stderr)
else:
yield WARCBlockChunk(warcContent)
yield EndOfRecord()


Loading…
Cancel
Save