A method to grab the live chat replay from YouTube
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

62 linhas
2.2 KiB

  1. import itertools
  2. import os
  3. import qwarc
  4. import qwarc.utils
  5. responseHandler = qwarc.utils.handle_response_limit_error_retries(5)
  6. class LiveChatReplay(qwarc.Item):
  7. itemType = 'chat-replay'
  8. # itemValue = '{videoId}'
  9. @classmethod
  10. def generate(cls):
  11. yield os.environ['YOUTUBE_VIDEOID']
  12. async def process(self):
  13. response, _ = await self.fetch(f'https://www.youtube.com/watch?v={self.itemValue}&disable_polymer=1', responseHandler = responseHandler)
  14. if not response or response.status != 200:
  15. self.logger.error('Could not fetch video page')
  16. return
  17. contents = await response.read()
  18. conversationBarPos = contents.find(b'\\"conversationBar\\":{')
  19. if conversationBarPos < 0:
  20. self.logger.error('Could not find conversation bar')
  21. return
  22. # No regerts
  23. openParens = 0
  24. for pos in itertools.count(start = conversationBarPos + 20):
  25. char = contents[pos:pos+1]
  26. if char in (b'{', b'['):
  27. openParens += 1
  28. elif char in (b'}', b']'):
  29. openParens -= 1
  30. if openParens == 0:
  31. break
  32. conversationBar = contents[conversationBarPos + 20 : pos]
  33. for continuation in qwarc.utils.str_get_all_between(conversationBar, b'\\"continuation\\":\\"', b'\\"'):
  34. if not continuation or continuation.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'):
  35. self.logger.warning('Skipping unexpected continuation value: {continuation!r}')
  36. continue
  37. cont = continuation
  38. while True:
  39. page, _ = await self.fetch(f'https://www.youtube.com/live_chat_replay?continuation={cont.decode("ascii")}', responseHandler = responseHandler)
  40. if not page or page.status != 200:
  41. self.logger.error(f'Could not fetch continuation {cont!r}')
  42. break
  43. pageContents = await page.read()
  44. contBlock = qwarc.utils.str_get_between(pageContents, b'"liveChatReplayContinuationData":', b'}')
  45. if not contBlock:
  46. break
  47. cont = qwarc.utils.str_get_between(contBlock, b'"continuation":"', b'"')
  48. if not cont:
  49. break
  50. if cont.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'):
  51. self.logger.warning(f'Skipping unexpected cont value: {cont!r}')
  52. specDependencies = qwarc.utils.SpecDependencies(extra = (('videoId', os.environ['YOUTUBE_VIDEOID']),))