@@ -275,6 +305,7 @@ class VerifyMode(ProcessMode):
self._brokenPayloadDigester = None
self._recordedPayloadDigest = None
self._printedBrokenPayloadWarning = False
self._verificationFailed = False
def parse_digest(self, digest):
if not digest.startswith(b'sha1:'):
@@ -289,6 +320,7 @@ class VerifyMode(ProcessMode):
def process_event(self, event):
if type(event) is NewFile:
self._printedBrokenPayloadWarning = False
self._verificationFailed = False
elif type(event) is BeginOfRecord:
if any(x[0] == b'WARC-Block-Digest' for x in event.warcHeaders):
self._blockDigester = hashlib.sha1()
@@ -315,10 +347,13 @@ class VerifyMode(ProcessMode):
elif type(event) is RawHTTPBodyChunk:
if self._brokenPayloadDigester:
self._brokenPayloadDigester.update(event.data)
elif type(event) is WARCParsingIssueEvent:
self._verificationFailed = True
elif type(event) is EndOfRecord:
if self._blockDigester and self._recordedBlockDigest:
if not self._recordedBlockDigest.equals(self._blockDigester.digest()):
print('Block digest mismatch for record {}: recorded {} v calculated {}'.format(self._recordID, self._recordedBlockDigest.format(), self._recordedBlockDigest.format(self._blockDigester.digest())), file = sys.stderr)
self._verificationFailed = True
if self._payloadDigester and self._recordType in (b'request', b'response'): #TODO: Support revisit
if not self._recordedPayloadDigest.equals(self._payloadDigester.digest()):
if self._recordedPayloadDigest.equals(self._brokenPayloadDigester.digest()):
@@ -327,6 +362,9 @@ class VerifyMode(ProcessMode):
self._printedBrokenPayloadWarning = True
else:
print('Payload digest mismatch for record {}: recorded {} vs. calculated {} (calculated broken {})'.format(self._recordID, self._recordedPayloadDigest.format(), self._recordedPayloadDigest.format(self._payloadDigester.digest()), self._recordedPayloadDigest.format(self._brokenPayloadDigester.digest())), file = sys.stderr)
self._verificationFailed = True
elif type(event) is EndOfFile and self._verificationFailed:
raise VerificationError('one or more errors encountered while verifying {}'.format(event.filename))