diff --git a/youtube-extract b/youtube-extract index d4acfe8..fbe3dd8 100755 --- a/youtube-extract +++ b/youtube-extract @@ -17,10 +17,13 @@ if any(x in sys.argv for x in ['--help', '-h', '-?', 'help']): mode = sys.argv[1] if len(sys.argv) >= 2 else 'massage' +# Only one slash before so it still matches inside URLs when slashes were collapsed. +domainPattern = re.compile(r'/(www\.)?(youtube\.(com|de|fr|co\.uk|it|es|at|pt|gr|hu|ro|pl|dk|no|se|fi|ee|lt|lv|ru|by|cz|sk|si|rs|hr|ca)|youtube-nocookie\.com)/') + if mode == 'removenonyt': # Anything in here could never be as fast as grep, so just delegate to that... - os.execlp('grep', 'grep', '-F', '-e', '/www.youtube.com/', '-e', '/youtu.be/', '-e', '%2Fwww.youtube.com%2F', '-e', '%2Fyoutu.be%2F') + os.execlp('grep', 'grep', '-P', domainPattern.pattern + '|' + domainPattern.pattern.replace('/', '%2F') + '|/youtu\.be/|%2Fyoutu\.be%2F') sys.exit(0) assert mode == 'massage' @@ -104,9 +107,6 @@ matchers = [ for e in matchers: e[0] = re.compile(e[0]) -# Only one slash before so it still matches inside URLs when slashes were collapsed. -domainPattern = re.compile(r'/(www\.)?youtube\.(com|de|fr|co\.uk|it|es|at|pt|gr|hu|ro|pl|dk|no|se|fi|ee|lt|lv|ru|by|cz|sk|si|rs|hr|ca)/') - for origLine in sys.stdin: origLine = origLine.strip() line = re.sub(r'^https?://', '//', origLine)