瀏覽代碼

Retrieve all channels and playlists on channel items

master
tech234a 3 年之前
父節點
當前提交
3ed2f2178b
共有 4 個檔案被更改,包括 141 行新增5 行删除
  1. +1
    -1
      tracker.py
  2. +8
    -4
      worker.py
  3. +106
    -0
      youtube_channel.py
  4. +26
    -0
      youtube_util.py

+ 1
- 1
tracker.py 查看文件

@@ -9,7 +9,7 @@ from os.path import isfile
from json import loads from json import loads


# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
VERSION = "20201001.01"
VERSION = "20201002.01"


TRACKER_ID = "ext-yt-communitycontribs" TRACKER_ID = "ext-yt-communitycontribs"
TRACKER_HOST = "trackerproxy.meo.ws" TRACKER_HOST = "trackerproxy.meo.ws"


+ 8
- 4
worker.py 查看文件

@@ -5,6 +5,8 @@ from os import mkdir, rmdir, listdir, system, environ
from os.path import isdir, isfile, getsize from os.path import isdir, isfile, getsize
from json import dumps, loads from json import dumps, loads


from youtube_channel import main

import signal import signal


import tracker import tracker
@@ -176,10 +178,12 @@ def threadrunner():
jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video)) jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video))


#channel created playlists #channel created playlists
y = ydl.extract_info("https://www.youtube.com/channel/"+desit.split(":", 1)[1]+"/playlists?view=1", download=False)
for itemyv in y["entries"]:
jobs.put(("submitdiscovery", itemyv["url"].split("?list=", 1)[1], tracker.ItemType.Playlist)) #[38:]
#TODO: saved playlists, featured channels
y = main(desit.split(":", 1)[1])
for itemyv in y["playlists"]:
jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Playlist))
for itemyv in y["channels"]:
jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Channel))

jobs.put(("complete", None, "channel:"+args)) jobs.put(("complete", None, "channel:"+args))
except: except:
print("YouTube-DL error, ignoring but not marking as complete...", "https://www.youtube.com/channel/"+desit.split(":", 1)[1]) print("YouTube-DL error, ignoring but not marking as complete...", "https://www.youtube.com/channel/"+desit.split(":", 1)[1])


+ 106
- 0
youtube_channel.py 查看文件

@@ -0,0 +1,106 @@
from requests import session
from youtube_util import getinitialdata, fullyexpand

# TODO: Rate limit detection, HTTP3?

mysession = session()
#extract latest version automatically
try:
lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
lver = "2.20201002.02.01"

#print(lver)
mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})

def main(channelid: str):
playlists = set()
shelfres = set()
channellist = set()

# PLAYLISTS
initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/playlists").text)

CHANNELS_ID = 0
PLAYLISTS_ID = 0

current = 0
for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
if "tabRenderer" in tab.keys():
if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
PLAYLISTS_ID = current
elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
CHANNELS_ID = current
current += 1

del current

shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]

for item in shelflist:
itemint = item["itemSectionRenderer"]["contents"][0]
if "shelfRenderer" in itemint.keys():
shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
playlistsint = fullyexpand(itemint["gridRenderer"])["items"]

for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])

for item in shelfres:
shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]

for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])

# CHANNELS
cshelfres = set()

initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/channels").text)

shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]

for item in shelflist:
itemint = item["itemSectionRenderer"]["contents"][0]
if "shelfRenderer" in itemint.keys():
cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
chanlistint = fullyexpand(itemint["gridRenderer"])["items"]

for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])

for item in cshelfres:
shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]

for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])

return {"playlists": playlists, "channels": channellist}

if __name__ == "__main__":
from sys import argv
chanl = argv
chanl.pop(0)
for channel in chanl:
print(main(channel))

# SAMPLES:
# UCqj7Cz7revf5maW9g5pgNcg lots of playlists
# UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
# UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
# UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured

# UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels

# UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels

# UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels

# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels

+ 26
- 0
youtube_util.py 查看文件

@@ -0,0 +1,26 @@
from requests import session
from json import loads
from urllib.parse import unquote

def getinitialdata(html: str):
for line in html.splitlines():
if line.strip().startswith('window["ytInitialData"] = '):
return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
return {}

mysession = session()
#extract latest version automatically
try:
lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
lver = "2.20201002.02.01"
mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})

def fullyexpand(inputdict: dict):
lastrequestj = inputdict
while "continuations" in lastrequestj.keys():
lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]))
lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
inputdict["items"].extend(lastrequestj["items"])

return inputdict

Loading…
取消
儲存