瀏覽代碼

Fix trailing percent-encoded equals signs on initial extraction

master
JustAnotherArchivist 3 年之前
父節點
當前提交
660f315d43
共有 1 個檔案被更改,包括 3 行新增3 行删除
  1. +3
    -3
      comments.py

+ 3
- 3
comments.py 查看文件

@@ -56,7 +56,7 @@ class Comments(qwarc.Item):
if not sessionToken:
self.logger.error('Could not find session token')
return
if sessionToken.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'=='):
if sessionToken.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'==', b'%3D', b'%3D%3D'):
self.logger.error(f'Unexpected session token value: {sessionToken!r}')
return
sessionToken = sessionToken.decode('ascii')
@@ -71,12 +71,12 @@ class Comments(qwarc.Item):
return
section = content[continuationStartPos:sectionIdentifierPos]
continuationToken = qwarc.utils.str_get_between(section, b'"continuation":"', b'"')
if continuationToken.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'=='):
if continuationToken.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'==', b'%3D', b'%3D%3D'):
self.logger.error(f'Unexpected continuation token value: {continuationToken!r}')
return
continuationToken = continuationToken.decode('ascii')
itct = qwarc.utils.str_get_between(section, b'"clickTrackingParams":"', b'"')
if itct.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'=='):
if itct.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'=', b'==', b'%3D', b'%3D%3D'):
self.logger.error(f'Unexpected itct value: {itct!r}')
return
itct = itct.decode('ascii')


Loading…
取消
儲存