Ver código fonte

Filenames

microtasks
tech234a 3 anos atrás
pai
commit
dadd93f53f
1 arquivos alterados com 31 adições e 8 exclusões
  1. +31
    -8
      export.py

+ 31
- 8
export.py Ver arquivo

@@ -42,6 +42,8 @@ class MyHTMLParser(HTMLParser):
self.captions = [] self.captions = []
self.title = "" self.title = ""
self.description = "" self.description = ""
self.inittitle = ""
self.initdescription = ""




def check_attr(self, attrs, attr, value): def check_attr(self, attrs, attr, value):
@@ -63,6 +65,10 @@ class MyHTMLParser(HTMLParser):
self.captions[len(self.captions)-1]["endTime"] = int(self.get_attr(attrs, "data-end-ms")) self.captions[len(self.captions)-1]["endTime"] = int(self.get_attr(attrs, "data-end-ms"))
elif tag == "input" and self.check_attr(attrs, "id", "metadata-title"): elif tag == "input" and self.check_attr(attrs, "id", "metadata-title"):
self.title = self.get_attr(attrs, "value") self.title = self.get_attr(attrs, "value")
elif tag == "textarea" and self.check_attr(attrs, "id", "metadata-description"):
self.initdescription = self.get_attr(attrs, "data-original-description")
elif tag == "input" and self.check_attr(attrs, "id", "metadata-title"):
self.inittitle = self.get_attr(attrs, "data-original-title")


def handle_data(self, data): def handle_data(self, data):
if self.get_starttag_text() and self.get_starttag_text().startswith("<textarea "): if self.get_starttag_text() and self.get_starttag_text().startswith("<textarea "):
@@ -114,18 +120,24 @@ def subprrun(jobs, mysession):


page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams) page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams)


assert not "accounts.google.com" in page.url, "Please supply authentication cookie information in config.json. See README.md for more information."
assert not "accounts.google.com" in page.url, "Please supply authentication cookie information in config.json or environment variables. See README.md for more information."


inttext = page.text inttext = page.text

try:
initlang = page.text.split("'metadataLanguage': \"", 1)[1].split('"', 1)[0]
except:
initlang = ""

del page del page


filestring = "_community"
filestring = "_community_draft"
if '<li id="captions-editor-nav-captions" role="tab" data-state="published" class="published">' in inttext: if '<li id="captions-editor-nav-captions" role="tab" data-state="published" class="published">' in inttext:
filestring = "_published"
filestring = "_community_published"


if mode == "forceedit-captions": if mode == "forceedit-captions":
filestring = "_community_revised"
filestring = "_community_draft"


if 'title="The video owner already provided subtitles/CC"' in inttext: if 'title="The video owner already provided subtitles/CC"' in inttext:
filestring = "_uploader_provided" filestring = "_uploader_provided"
@@ -164,22 +176,33 @@ def subprrun(jobs, mysession):


del captiontext del captiontext


if parser.title or parser.description[:-16] and (mode == "default" or mode == "forceedit-metadata"):
if (parser.title or parser.description[:-16]) and (mode == "default" or mode == "forceedit-metadata"):
metadata = {} metadata = {}
metadata["title"] = parser.title metadata["title"] = parser.title
if metadata["title"] == False: if metadata["title"] == False:
metadata["title"] = "" metadata["title"] = ""
metadata["description"] = parser.description[:-16] metadata["description"] = parser.description[:-16]


filestring = "_community"
filestring = "_community_draft"
if '<li id="captions-editor-nav-metadata" role="tab" data-state="published" class="published">' in inttext: if '<li id="captions-editor-nav-metadata" role="tab" data-state="published" class="published">' in inttext:
filestring = "_published"
filestring = "_community_published"


if mode == "forceedit-metadata": if mode == "forceedit-metadata":
filestring = "_community_revised"
filestring = "_community_draft"
open("out/"+vid+"/"+vid+"_"+langcode+filestring+".json", "w", encoding="utf-8").write(dumps(metadata)) open("out/"+vid+"/"+vid+"_"+langcode+filestring+".json", "w", encoding="utf-8").write(dumps(metadata))
del metadata del metadata


if (parser.inittitle or parser.initdescription) and (mode == "default" or mode == "forceedit-metadata" and initlang):
metadata = {}
metadata["title"] = parser.inittitle
if metadata["title"] == False:
metadata["title"] = ""
metadata["description"] = parser.initdescription

filestring = "_uploader_provided"
open("out/"+vid+"/"+vid+"_"+initlang+filestring+".json", "w", encoding="utf-8").write(dumps(metadata))
del metadata

del inttext del inttext


del langcode del langcode


Carregando…
Cancelar
Salvar