From 660f315d434902fef5cf248b93981d75e4c88d73 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 6 Jul 2020 15:43:41 +0000 Subject: [PATCH] Fix trailing percent-encoded equals signs on initial extraction --- comments.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/comments.py b/comments.py index 57681d2..7ed76e6 100644 --- a/comments.py +++ b/comments.py @@ -56,7 +56,7 @@ class Comments(qwarc.Item): if not sessionToken: self.logger.error('Could not find session token') return - if sessionToken.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'=='): + if sessionToken.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'==', b'%3D', b'%3D%3D'): self.logger.error(f'Unexpected session token value: {sessionToken!r}') return sessionToken = sessionToken.decode('ascii') @@ -71,12 +71,12 @@ class Comments(qwarc.Item): return section = content[continuationStartPos:sectionIdentifierPos] continuationToken = qwarc.utils.str_get_between(section, b'"continuation":"', b'"') - if continuationToken.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'=='): + if continuationToken.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'==', b'%3D', b'%3D%3D'): self.logger.error(f'Unexpected continuation token value: {continuationToken!r}') return continuationToken = continuationToken.decode('ascii') itct = qwarc.utils.str_get_between(section, b'"clickTrackingParams":"', b'"') - if itct.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'=='): + if itct.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'==', b'%3D', b'%3D%3D'): self.logger.error(f'Unexpected itct value: {itct!r}') return itct = itct.decode('ascii')