import itertools import os import qwarc import qwarc.utils responseHandler = qwarc.utils.handle_response_limit_error_retries(5) class LiveChatReplay(qwarc.Item): itemType = 'chat-replay' # itemValue = '{videoId}' @classmethod def generate(cls): yield os.environ['YOUTUBE_VIDEOID'] async def process(self): response, _ = await self.fetch(f'https://www.youtube.com/watch?v={self.itemValue}&disable_polymer=1', responseHandler = responseHandler) if not response or response.status != 200: self.logger.error('Could not fetch video page') return contents = await response.read() conversationBarPos = contents.find(b'\\"conversationBar\\":{') if conversationBarPos < 0: self.logger.error('Could not find conversation bar') return # No regerts openParens = 0 for pos in itertools.count(start = conversationBarPos + 20): char = contents[pos:pos+1] if char in (b'{', b'['): openParens += 1 elif char in (b'}', b']'): openParens -= 1 if openParens == 0: break conversationBar = contents[conversationBarPos + 20 : pos] for continuation in qwarc.utils.str_get_all_between(conversationBar, b'\\"continuation\\":\\"', b'\\"'): if not continuation or continuation.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'): self.logger.warning('Skipping unexpected continuation value: {continuation!r}') continue cont = continuation while True: page, _ = await self.fetch(f'https://www.youtube.com/live_chat_replay?continuation={cont.decode("ascii")}', responseHandler = responseHandler) if not page or page.status != 200: self.logger.error(f'Could not fetch continuation {cont!r}') break pageContents = await page.read() contBlock = qwarc.utils.str_get_between(pageContents, b'"liveChatReplayContinuationData":', b'}') if not contBlock: break cont = qwarc.utils.str_get_between(contBlock, b'"continuation":"', b'"') if not cont: break if cont.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'): self.logger.warning(f'Skipping unexpected cont value: {cont!r}') specDependencies = qwarc.utils.SpecDependencies(extra = (('videoId', os.environ['YOUTUBE_VIDEOID']),))