A method to grab the live chat from YouTube
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 

96 lignes
3.2 KiB

  1. import asyncio
  2. import itertools
  3. import os
  4. import qwarc
  5. import qwarc.utils
  6. responseHandler = qwarc.utils.handle_response_limit_error_retries(5)
  7. class LiveChat(qwarc.Item):
  8. itemType = 'chat'
  9. # itemValue = '{videoId}'
  10. @classmethod
  11. def generate(cls):
  12. yield os.environ['YOUTUBE_VIDEOID']
  13. async def recurse(self, continuation):
  14. cont = continuation
  15. extra = ''
  16. while True:
  17. page, _ = await self.fetch(f'https://www.youtube.com/live_chat?continuation={cont.decode("ascii")}{extra}', responseHandler = responseHandler)
  18. if not page or page.status != 200:
  19. self.logger.error(f'Could not fetch continuation {cont!r}')
  20. break
  21. pageContents = await page.read()
  22. invalidationContDataPos = pageContents.find(b'"invalidationContinuationData":')
  23. if invalidationContDataPos < 0:
  24. self.logger.error(f'Could not find continuation data on continuation {cont!r}')
  25. break
  26. openParens = 0
  27. for pos in itertools.count(start = invalidationContDataPos + 31):
  28. char = pageContents[pos:pos+1]
  29. if char in (b'{', b'['):
  30. openParens += 1
  31. elif char in (b'}', b']'):
  32. openParens -= 1
  33. if openParens == 0:
  34. break
  35. contBlock = pageContents[invalidationContDataPos + 31 : pos]
  36. if not contBlock:
  37. break
  38. timeout = qwarc.utils.str_get_between(contBlock, b'"timeoutMs":', b',')
  39. if not timeout:
  40. self.logger.warning(f'Could not find timeout in {contBlock!r}')
  41. break
  42. if timeout.lstrip(b'0123456789') != b'':
  43. self.logger.warning(f'Invalid timeout value: {timeout!r}')
  44. break
  45. cont = qwarc.utils.str_get_between(contBlock, b'"continuation":"', b'"')
  46. if not cont:
  47. break
  48. if cont.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'):
  49. self.logger.warning(f'Skipping unexpected cont value: {cont!r}')
  50. break
  51. extra = '&isInvalidationTimeoutRequest=true&hidden=false&pbj=1'
  52. await asyncio.sleep(int(timeout) / 1000)
  53. async def process(self):
  54. response, _ = await self.fetch(f'https://www.youtube.com/watch?v={self.itemValue}&disable_polymer=1', responseHandler = responseHandler)
  55. if not response or response.status != 200:
  56. self.logger.error('Could not fetch video page')
  57. return
  58. contents = await response.read()
  59. conversationBarPos = contents.find(b'\\"conversationBar\\":{')
  60. if conversationBarPos < 0:
  61. self.logger.error('Could not find conversation bar')
  62. return
  63. # No regerts
  64. openParens = 0
  65. for pos in itertools.count(start = conversationBarPos + 20):
  66. char = contents[pos:pos+1]
  67. if char in (b'{', b'['):
  68. openParens += 1
  69. elif char in (b'}', b']'):
  70. openParens -= 1
  71. if openParens == 0:
  72. break
  73. conversationBar = contents[conversationBarPos + 20 : pos]
  74. tasks = []
  75. for continuation in qwarc.utils.str_get_all_between(conversationBar, b'\\"continuation\\":\\"', b'\\"'):
  76. if not continuation or continuation.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'):
  77. self.logger.warning('Skipping unexpected continuation value: {continuation!r}')
  78. continue
  79. tasks.append(asyncio.ensure_future(self.recurse(continuation)))
  80. await asyncio.gather(*tasks)
  81. specDependencies = qwarc.utils.SpecDependencies(extra = (('videoId', os.environ['YOUTUBE_VIDEOID']),))