archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

149 lines
6.8 KiB

  1. from requests import session
  2. from youtube_util import getinitialdata, fullyexpand, getapikey, getlver
  3. from time import sleep
  4. mysession = session()
  5. #extract latest version automatically
  6. homepage = mysession.get("https://www.youtube.com/").text
  7. API_KEY = getapikey(homepage)
  8. params = (
  9. ('key', API_KEY),
  10. )
  11. API_VERSION = getlver(getinitialdata(homepage))
  12. continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"}
  13. del homepage
  14. def process_channel(channelid: str):
  15. playlists = set()
  16. shelfres = set()
  17. channellist = set()
  18. # PLAYLISTS
  19. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"}
  20. while True:
  21. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
  22. if initdata.status_code == 200:
  23. initdata = initdata.json()
  24. break
  25. else:
  26. print("Non-200 API status code, waiting 30 seconds before retrying...")
  27. sleep(30)
  28. CHANNELS_ID = 0
  29. PLAYLISTS_ID = 0
  30. current = 0
  31. for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
  32. if "tabRenderer" in tab.keys():
  33. if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
  34. PLAYLISTS_ID = current
  35. elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
  36. CHANNELS_ID = current
  37. current += 1
  38. del current
  39. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  40. for item in shelflist:
  41. itemint = item["itemSectionRenderer"]["contents"][0]
  42. if "shelfRenderer" in itemint.keys():
  43. shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  44. elif "gridRenderer" in itemint.keys():
  45. playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  46. for playlist in playlistsint:
  47. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  48. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  49. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  50. for item in shelfres:
  51. while True:
  52. shelfintp = mysession.get("https://www.youtube.com/"+str(item))
  53. if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p>
  54. <p>To continue with your YouTube experience, please fill out the form below.</p>""" in shelfintp.text and shelfintp.status_code == 200:
  55. break
  56. else:
  57. print("Non-200 status code, waiting 30 seconds before retrying...")
  58. sleep(30)
  59. shelfiteminitdata = getinitialdata(shelfintp.text)
  60. playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  61. for playlist in playlistsint:
  62. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  63. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  64. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  65. # CHANNELS
  66. cshelfres = set()
  67. # PLAYLISTS
  68. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"}
  69. while True:
  70. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
  71. if initdata.status_code == 200:
  72. initdata = initdata.json()
  73. break
  74. else:
  75. print("Non-200 API status code, waiting 30 seconds before retrying...")
  76. sleep(30)
  77. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  78. for item in shelflist:
  79. itemint = item["itemSectionRenderer"]["contents"][0]
  80. if "shelfRenderer" in itemint.keys():
  81. cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  82. elif "gridRenderer" in itemint.keys():
  83. chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  84. for channel in chanlistint:
  85. channellist.add(channel["gridChannelRenderer"]["channelId"])
  86. for item in cshelfres:
  87. while True:
  88. shelfintc = mysession.get("https://www.youtube.com/"+str(item))
  89. if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p>
  90. <p>To continue with your YouTube experience, please fill out the form below.</p>""" in shelfintc.text and shelfintc.status_code == 200:
  91. break
  92. else:
  93. print("Non-200 status code, waiting 30 seconds before retrying...")
  94. sleep(30)
  95. shelfiteminitdata = getinitialdata(shelfintc.text)
  96. chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  97. for channel in chanlistint:
  98. channellist.add(channel["gridChannelRenderer"]["channelId"])
  99. return {"playlists": playlists, "channels": channellist}
  100. if __name__ == "__main__":
  101. from sys import argv
  102. chanl = argv
  103. chanl.pop(0)
  104. for channel in chanl:
  105. print(process_channel(channel))
  106. # SAMPLES:
  107. # UCqj7Cz7revf5maW9g5pgNcg lots of playlists
  108. # UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
  109. # UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
  110. # UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured
  111. # UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels
  112. # UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels
  113. # UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
  114. # UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels