archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 

149 lignes
6.8 KiB

  1. from requests import session
  2. from youtube_util import getinitialdata, fullyexpand, getapikey, getlver
  3. from time import sleep
  4. mysession = session()
  5. #extract latest version automatically
  6. homepage = mysession.get("https://www.youtube.com/").text
  7. API_KEY = getapikey(homepage)
  8. params = (
  9. ('key', API_KEY),
  10. )
  11. API_VERSION = getlver(getinitialdata(homepage))
  12. continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"}
  13. del homepage
  14. def process_channel(channelid: str):
  15. playlists = set()
  16. shelfres = set()
  17. channellist = set()
  18. # PLAYLISTS
  19. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"}
  20. while True:
  21. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
  22. if initdata.status_code == 200:
  23. initdata = initdata.json()
  24. break
  25. else:
  26. print("Non-200 API status code, waiting 30 seconds before retrying...")
  27. sleep(30)
  28. CHANNELS_ID = 0
  29. PLAYLISTS_ID = 0
  30. current = 0
  31. for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
  32. if "tabRenderer" in tab.keys():
  33. if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
  34. PLAYLISTS_ID = current
  35. elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
  36. CHANNELS_ID = current
  37. current += 1
  38. del current
  39. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  40. for item in shelflist:
  41. itemint = item["itemSectionRenderer"]["contents"][0]
  42. if "shelfRenderer" in itemint.keys():
  43. shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  44. elif "gridRenderer" in itemint.keys():
  45. playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  46. for playlist in playlistsint:
  47. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  48. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  49. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  50. for item in shelfres:
  51. while True:
  52. shelfintp = mysession.get("https://www.youtube.com/"+str(item))
  53. if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p>
  54. <p>To continue with your YouTube experience, please fill out the form below.</p>""" in shelfintp.text and shelfintp.status_code == 200:
  55. break
  56. else:
  57. print("Non-200 status code, waiting 30 seconds before retrying...")
  58. sleep(30)
  59. shelfiteminitdata = getinitialdata(shelfintp.text)
  60. playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  61. for playlist in playlistsint:
  62. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  63. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  64. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  65. # CHANNELS
  66. cshelfres = set()
  67. # PLAYLISTS
  68. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"}
  69. while True:
  70. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
  71. if initdata.status_code == 200:
  72. initdata = initdata.json()
  73. break
  74. else:
  75. print("Non-200 API status code, waiting 30 seconds before retrying...")
  76. sleep(30)
  77. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  78. for item in shelflist:
  79. itemint = item["itemSectionRenderer"]["contents"][0]
  80. if "shelfRenderer" in itemint.keys():
  81. cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  82. elif "gridRenderer" in itemint.keys():
  83. chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  84. for channel in chanlistint:
  85. channellist.add(channel["gridChannelRenderer"]["channelId"])
  86. for item in cshelfres:
  87. while True:
  88. shelfintc = mysession.get("https://www.youtube.com/"+str(item))
  89. if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p>
  90. <p>To continue with your YouTube experience, please fill out the form below.</p>""" in shelfintc.text and shelfintc.status_code == 200:
  91. break
  92. else:
  93. print("Non-200 status code, waiting 30 seconds before retrying...")
  94. sleep(30)
  95. shelfiteminitdata = getinitialdata(shelfintc.text)
  96. chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  97. for channel in chanlistint:
  98. channellist.add(channel["gridChannelRenderer"]["channelId"])
  99. return {"playlists": playlists, "channels": channellist}
  100. if __name__ == "__main__":
  101. from sys import argv
  102. chanl = argv
  103. chanl.pop(0)
  104. for channel in chanl:
  105. print(process_channel(channel))
  106. # SAMPLES:
  107. # UCqj7Cz7revf5maW9g5pgNcg lots of playlists
  108. # UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
  109. # UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
  110. # UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured
  111. # UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels
  112. # UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels
  113. # UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
  114. # UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels