Преглед на файлове

Significant optimization

pull/3/head
tech234a преди 3 години
родител
ревизия
7fd09ed2ae
променени са 1 файла, в които са добавени 38 реда и са изтрити 33 реда
  1. +38
    -33
      export.py

+ 38
- 33
export.py Целия файл

@@ -88,41 +88,46 @@ def subprrun(jobs, headers):


assert not "accounts.google.com" in page.url, "Please supply authentication cookie information in config.json. See README.md for more information." assert not "accounts.google.com" in page.url, "Please supply authentication cookie information in config.json. See README.md for more information."


parser = MyHTMLParser()
parser.feed(page.text)
inttext = page.text
del page del page


captiontext = False
for item in parser.captions:
if item["text"][:-9]:
captiontext = True

if captiontext:
myfs = open("out/"+vid+"/"+vid+"_"+langcode+".sbv", "w", encoding="utf-8")
captions = parser.captions
captions.pop(0) #get rid of the fake one
while captions:
item = captions.pop(0)

myfs.write(timedelta_to_sbv_timestamp(timedelta(milliseconds=item["startTime"])) + "," + timedelta_to_sbv_timestamp(timedelta(milliseconds=item["endTime"])) + "\n" + item["text"][:-9] + "\n")
del item
if captions:
myfs.write("\n")
del captions
myfs.close()
del myfs

del captiontext

if parser.title or parser.description[:-16]:
metadata = {}
metadata["title"] = parser.title
if metadata["title"] == False:
metadata["title"] = ""
metadata["description"] = parser.description[:-16]
open("out/"+vid+"/"+vid+"_"+langcode+".json", "w", encoding="utf-8").write(dumps(metadata))
del metadata
if 'id="reject-captions-button"' in inttext or 'id="reject-metadata-button"' in inttext: #quick way of checking if this page is worth parsing
parser = MyHTMLParser()
parser.feed(inttext)

captiontext = False
for item in parser.captions:
if item["text"][:-9]:
captiontext = True

if captiontext:
myfs = open("out/"+vid+"/"+vid+"_"+langcode+".sbv", "w", encoding="utf-8")
captions = parser.captions
captions.pop(0) #get rid of the fake one
while captions:
item = captions.pop(0)

myfs.write(timedelta_to_sbv_timestamp(timedelta(milliseconds=item["startTime"])) + "," + timedelta_to_sbv_timestamp(timedelta(milliseconds=item["endTime"])) + "\n" + item["text"][:-9] + "\n")
del item
if captions:
myfs.write("\n")
del captions
myfs.close()
del myfs

del captiontext

if parser.title or parser.description[:-16]:
metadata = {}
metadata["title"] = parser.title
if metadata["title"] == False:
metadata["title"] = ""
metadata["description"] = parser.description[:-16]
open("out/"+vid+"/"+vid+"_"+langcode+".json", "w", encoding="utf-8").write(dumps(metadata))
del metadata

del inttext


del langcode del langcode
del vid del vid


Зареждане…
Отказ
Запис