diff --git a/youtube-extract b/youtube-extract index 614716e..3cef573 100755 --- a/youtube-extract +++ b/youtube-extract @@ -53,10 +53,10 @@ noisePattern = '|'.join([ ]) channelPattern = '|'.join([ - r'/www\.youtube\.com/c/[^/?&=.\s]+', - r'/www\.youtube\.com/user/[^/?&=.\s]+', + r'''/www\.youtube\.com/c/[^/?&=."'>\s]+''', + r'/www\.youtube\.com/user/[A-Za-z0-9]{1,20}', r'/www\.youtube\.com/channel/UC[0-9A-Za-z_-]{22}', - r'/www\.youtube\.com/[^/?&=.\s]+(?=/?(\s|$))', + r'''/www\.youtube\.com/[^/?&=."'>\s]+(?=/?(\s|["'>]|$))''', ]) # Make sure that the last 11 chars of the match are always the video ID (because Python's re doesn't support \K). @@ -98,9 +98,9 @@ matchers = [ [videoPattern, True, lambda m: f'https://www.youtube.com/watch?v={m[0][-11:]}'], [r'/www\.youtube\.com/(?:playlist|watch|embed(?:/videoseries|/\+lastest|/playlist)?/?)\?(?:\S*&)?list=UU([0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/channel/UC{m[1]}'], [r'/www\.youtube\.com/(?:playlist|watch|embed(?:/videoseries|/\+lastest|/playlist)?/?)\?(?:\S*&)?list=((PL|FL|RD|OL)[0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/playlist?list={m[1]}'], - [r'/www\.youtube\.com/embed/?\?(?=(?:\S*&)?listType=user_uploads(?:&|$))(?:\S*&)?list=([^&\s]+)', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], - [r'/www\.youtube\.com/rss/user/([^/?\s]+)', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], - [r'/www\.youtube\.com/(?:subscription_center\?(?:\S*&)?add_user=|subscribe_widget\?(?:\S*&)?p=|profile\?(?:\S*&)?user=)([^/=&\s]+)(?=(&|\s|$))', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], + [r'/www\.youtube\.com/embed/?\?(?=(?:\S*&)?listType=user_uploads(?:&|$))(?:\S*&)?list=([A-Za-z0-9]{1,20})', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], + [r'/www\.youtube\.com/rss/user/([A-Za-z0-9]{1,20})', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], + [r'/www\.youtube\.com/(?:subscription_center\?(?:\S*&)?add_user=|subscribe_widget\?(?:\S*&)?p=|profile\?(?:\S*&)?user=)([A-Za-z0-9]{1,20})', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], [r'/www\.youtube\.com/feeds/videos\.xml\?(?:\S*&)?channel_id=(UC[0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/channel/{m[1]}'], [r'/www\.youtube\.com(?:/view_play_list\?(?:\S*&)?p=|/playlist\?(?:.*&)?list=)([0-9A-F]{16})(?=(&|\s|$))', True, lambda m: f'https://www.youtube.com/playlist?list=PL{m[1]}'], [r'/(?i:i\.ytimg\.com|img\.youtube\.com)(?::\d+)?/vi/([0-9A-Za-z_-]{11})/', True, lambda m: f'https://www.youtube.com/watch?v={m[1]}'],