Browse Source

Handle youtube-nocookie.com and fix removenonyt mode not recognising CC domains

master
JustAnotherArchivist 3 years ago
parent
commit
362f66eb26
1 changed files with 4 additions and 4 deletions
  1. +4
    -4
      youtube-extract

+ 4
- 4
youtube-extract View File

@@ -17,10 +17,13 @@ if any(x in sys.argv for x in ['--help', '-h', '-?', 'help']):

mode = sys.argv[1] if len(sys.argv) >= 2 else 'massage'

# Only one slash before so it still matches inside URLs when slashes were collapsed.
domainPattern = re.compile(r'/(www\.)?(youtube\.(com|de|fr|co\.uk|it|es|at|pt|gr|hu|ro|pl|dk|no|se|fi|ee|lt|lv|ru|by|cz|sk|si|rs|hr|ca)|youtube-nocookie\.com)/')


if mode == 'removenonyt':
# Anything in here could never be as fast as grep, so just delegate to that...
os.execlp('grep', 'grep', '-F', '-e', '/www.youtube.com/', '-e', '/youtu.be/', '-e', '%2Fwww.youtube.com%2F', '-e', '%2Fyoutu.be%2F')
os.execlp('grep', 'grep', '-P', domainPattern.pattern + '|' + domainPattern.pattern.replace('/', '%2F') + '|/youtu\.be/|%2Fyoutu\.be%2F')
sys.exit(0)
assert mode == 'massage'

@@ -104,9 +107,6 @@ matchers = [
for e in matchers:
e[0] = re.compile(e[0])

# Only one slash before so it still matches inside URLs when slashes were collapsed.
domainPattern = re.compile(r'/(www\.)?youtube\.(com|de|fr|co\.uk|it|es|at|pt|gr|hu|ro|pl|dk|no|se|fi|ee|lt|lv|ru|by|cz|sk|si|rs|hr|ca)/')

for origLine in sys.stdin:
origLine = origLine.strip()
line = re.sub(r'^https?://', '//', origLine)


Loading…
Cancel
Save