archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

36 lines
1.4 KiB

  1. from json import loads
  2. from urllib.parse import unquote
  3. from time import sleep
  4. import requests
  5. def getinitialdata(html: str):
  6. for line in html.splitlines():
  7. if line.strip().startswith('window["ytInitialData"] = '):
  8. return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
  9. return {}
  10. def getapikey(html: str):
  11. return html.split('"INNERTUBE_API_KEY":"', 1)[-1].split('"', 1)[0]
  12. #extract latest version automatically
  13. def getlver(initialdata: dict):
  14. try:
  15. return initialdata["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
  16. except:
  17. return "2.20201016.02.00"
  18. def fullyexpand(inputdict: dict, mysession: requests.session, continuationheaders: dict):
  19. lastrequestj = inputdict
  20. while "continuations" in lastrequestj.keys():
  21. while True:
  22. lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]), headers=continuationheaders)
  23. if lastrequest.status_code == 200:
  24. break
  25. else:
  26. print("Non-200 API status code, waiting 30 seconds before retrying...")
  27. sleep(30)
  28. lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
  29. inputdict["items"].extend(lastrequestj["items"])
  30. return inputdict