Compare commits
2 Commits
master
...
df-invid-r
Author | SHA1 | Date | |
---|---|---|---|
88b767d8e2 | |||
fc92c793d2 |
@ -89,8 +89,10 @@ class FakeYDL(YoutubeDL):
|
|||||||
self.report_warning = types.MethodType(report_warning, self)
|
self.report_warning = types.MethodType(report_warning, self)
|
||||||
|
|
||||||
|
|
||||||
def gettestcases(include_onlymatching=False):
|
def gettestcases(include_onlymatching=False, include_pagesuitable=True):
|
||||||
for ie in youtube_dl.extractor.gen_extractors():
|
for ie in youtube_dl.extractor.gen_extractors():
|
||||||
|
if not include_pagesuitable and callable(getattr(ie, 'page_suitable', None)):
|
||||||
|
continue
|
||||||
for tc in ie.get_testcases(include_onlymatching):
|
for tc in ie.get_testcases(include_onlymatching):
|
||||||
yield tc
|
yield tc
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
for tc in gettestcases(include_onlymatching=True):
|
for tc in gettestcases(include_onlymatching=True, include_pagesuitable=False):
|
||||||
url = tc['url']
|
url = tc['url']
|
||||||
for ie in ies:
|
for ie in ies:
|
||||||
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
||||||
|
@ -1613,6 +1613,7 @@ from .youtube import (
|
|||||||
YoutubeYtBeIE,
|
YoutubeYtBeIE,
|
||||||
YoutubeYtUserIE,
|
YoutubeYtUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
|
InvidiousIE,
|
||||||
)
|
)
|
||||||
from .zapiks import ZapiksIE
|
from .zapiks import ZapiksIE
|
||||||
from .zattoo import (
|
from .zattoo import (
|
||||||
|
@ -7,7 +7,11 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import (
|
||||||
|
YoutubeIE,
|
||||||
|
InvidiousIE,
|
||||||
|
)
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_str,
|
compat_str,
|
||||||
@ -2633,6 +2637,10 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
|
youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
|
# Invidious YT front-end
|
||||||
|
if InvidiousIE.page_suitable(self, url, webpage):
|
||||||
|
return InvidiousIE.page_url_result(url, video_id, video_title, webpage)
|
||||||
|
|
||||||
matches = DailymotionIE._extract_urls(webpage)
|
matches = DailymotionIE._extract_urls(webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return self.playlist_from_matches(matches, video_id, video_title)
|
return self.playlist_from_matches(matches, video_id, video_title)
|
||||||
|
@ -63,7 +63,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
# If True it will raise an error if no login info is provided
|
# If True it will raise an error if no login info is provided
|
||||||
_LOGIN_REQUIRED = False
|
_LOGIN_REQUIRED = False
|
||||||
|
|
||||||
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
|
_VIDEO_ID_RE = r'[0-9A-Za-z_-]{11}'
|
||||||
|
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z_-]{10,}|RDMM)'
|
||||||
|
_CHANNEL_ID_RE = r'(?:UC[0-9A-Za-z_-]{10,})'
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
"""
|
"""
|
||||||
@ -342,65 +344,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com'
|
IE_DESC = 'YouTube.com'
|
||||||
_INVIDIOUS_SITES = (
|
_CANONICAL_VIDEO_RE = r'https://www\.youtube\.com/watch\?v=%s' % YoutubeBaseInfoExtractor._VIDEO_ID_RE
|
||||||
# invidious-redirect websites
|
|
||||||
r'(?:www\.)?redirect\.invidious\.io',
|
|
||||||
r'(?:(?:www|dev)\.)?invidio\.us',
|
|
||||||
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
|
|
||||||
r'(?:(?:www|no)\.)?invidiou\.sh',
|
|
||||||
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
|
|
||||||
r'(?:www\.)?invidious\.kabi\.tk',
|
|
||||||
r'(?:www\.)?invidious\.13ad\.de',
|
|
||||||
r'(?:www\.)?invidious\.mastodon\.host',
|
|
||||||
r'(?:www\.)?invidious\.zapashcanon\.fr',
|
|
||||||
r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
|
|
||||||
r'(?:www\.)?invidious\.tinfoil-hat\.net',
|
|
||||||
r'(?:www\.)?invidious\.himiko\.cloud',
|
|
||||||
r'(?:www\.)?invidious\.reallyancient\.tech',
|
|
||||||
r'(?:www\.)?invidious\.tube',
|
|
||||||
r'(?:www\.)?invidiou\.site',
|
|
||||||
r'(?:www\.)?invidious\.site',
|
|
||||||
r'(?:www\.)?invidious\.xyz',
|
|
||||||
r'(?:www\.)?invidious\.nixnet\.xyz',
|
|
||||||
r'(?:www\.)?invidious\.048596\.xyz',
|
|
||||||
r'(?:www\.)?invidious\.drycat\.fr',
|
|
||||||
r'(?:www\.)?inv\.skyn3t\.in',
|
|
||||||
r'(?:www\.)?tube\.poal\.co',
|
|
||||||
r'(?:www\.)?tube\.connect\.cafe',
|
|
||||||
r'(?:www\.)?vid\.wxzm\.sx',
|
|
||||||
r'(?:www\.)?vid\.mint\.lgbt',
|
|
||||||
r'(?:www\.)?vid\.puffyan\.us',
|
|
||||||
r'(?:www\.)?yewtu\.be',
|
|
||||||
r'(?:www\.)?yt\.elukerio\.org',
|
|
||||||
r'(?:www\.)?yt\.lelux\.fi',
|
|
||||||
r'(?:www\.)?invidious\.ggc-project\.de',
|
|
||||||
r'(?:www\.)?yt\.maisputain\.ovh',
|
|
||||||
r'(?:www\.)?ytprivate\.com',
|
|
||||||
r'(?:www\.)?invidious\.13ad\.de',
|
|
||||||
r'(?:www\.)?invidious\.toot\.koeln',
|
|
||||||
r'(?:www\.)?invidious\.fdn\.fr',
|
|
||||||
r'(?:www\.)?watch\.nettohikari\.com',
|
|
||||||
r'(?:www\.)?invidious\.namazso\.eu',
|
|
||||||
r'(?:www\.)?invidious\.silkky\.cloud',
|
|
||||||
r'(?:www\.)?invidious\.exonip\.de',
|
|
||||||
r'(?:www\.)?invidious\.riverside\.rocks',
|
|
||||||
r'(?:www\.)?invidious\.blamefran\.net',
|
|
||||||
r'(?:www\.)?invidious\.moomoo\.de',
|
|
||||||
r'(?:www\.)?ytb\.trom\.tf',
|
|
||||||
r'(?:www\.)?yt\.cyberhost\.uk',
|
|
||||||
r'(?:www\.)?kgg2m7yk5aybusll\.onion',
|
|
||||||
r'(?:www\.)?qklhadlycap4cnod\.onion',
|
|
||||||
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
|
|
||||||
r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
|
|
||||||
r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
|
|
||||||
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
|
|
||||||
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
|
|
||||||
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
|
|
||||||
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
|
|
||||||
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
|
|
||||||
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
|
|
||||||
r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
|
|
||||||
)
|
|
||||||
_VALID_URL = r"""(?x)^
|
_VALID_URL = r"""(?x)^
|
||||||
(
|
(
|
||||||
(?:https?://|//) # http(s):// or protocol-independent URL
|
(?:https?://|//) # http(s):// or protocol-independent URL
|
||||||
@ -410,7 +354,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
(?:www\.)?hooktube\.com|
|
(?:www\.)?hooktube\.com|
|
||||||
(?:www\.)?yourepeat\.com|
|
(?:www\.)?yourepeat\.com|
|
||||||
tube\.majestyc\.net|
|
tube\.majestyc\.net|
|
||||||
%(invidious)s|
|
|
||||||
youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
|
youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
(?: # the various things that can precede the ID:
|
(?: # the various things that can precede the ID:
|
||||||
@ -425,16 +368,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|(?:
|
|(?:
|
||||||
youtu\.be| # just youtu.be/xxxx
|
youtu\.be| # just youtu.be/xxxx
|
||||||
vid\.plus| # or vid.plus/xxxx
|
vid\.plus| # or vid.plus/xxxx
|
||||||
zwearz\.com/watch| # or zwearz.com/watch/xxxx
|
zwearz\.com/watch # or zwearz.com/watch/xxxx
|
||||||
%(invidious)s
|
|
||||||
)/
|
)/
|
||||||
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||||
)
|
)
|
||||||
)? # all until now is optional -> you can pass the naked ID
|
)? # all until now is optional -> you can pass the naked ID
|
||||||
(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
(?P<id>%(video_id)s) # here is it! the YouTube video ID
|
||||||
(?(1).+)? # if we found the ID, everything can follow
|
(?(1).+)? # if we found the ID, everything can follow
|
||||||
$""" % {
|
$""" % {
|
||||||
'invidious': '|'.join(_INVIDIOUS_SITES),
|
'video_id': YoutubeBaseInfoExtractor._VIDEO_ID_RE,
|
||||||
}
|
}
|
||||||
_PLAYER_INFO_RE = (
|
_PLAYER_INFO_RE = (
|
||||||
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
|
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
|
||||||
@ -944,19 +886,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'sJL6WA-aGkQ',
|
'url': 'sJL6WA-aGkQ',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
|
|
||||||
'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
# DRM protected
|
# DRM protected
|
||||||
'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
|
'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
|
||||||
@ -2013,10 +1942,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:\w+\.)?
|
(?:\w+\.)?
|
||||||
(?:
|
youtube(?:kids)?\.com
|
||||||
youtube(?:kids)?\.com|
|
/
|
||||||
invidio\.us
|
|
||||||
)/
|
|
||||||
(?:
|
(?:
|
||||||
(?:channel|c|user|feed|hashtag)/|
|
(?:channel|c|user|feed|hashtag)/|
|
||||||
(?:playlist|watch)\?.*?\blist=|
|
(?:playlist|watch)\?.*?\blist=|
|
||||||
@ -2139,9 +2066,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 138,
|
'playlist_mincount': 138,
|
||||||
}, {
|
|
||||||
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -2192,9 +2116,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader': 'Computerphile',
|
'uploader': 'Computerphile',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 11,
|
'playlist_mincount': 11,
|
||||||
}, {
|
|
||||||
'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
}, {
|
||||||
# Playlist URL that does not actually serve a playlist
|
# Playlist URL that does not actually serve a playlist
|
||||||
'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
|
'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
|
||||||
@ -2870,7 +2791,6 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
youtube(?:kids)?\.com|
|
youtube(?:kids)?\.com|
|
||||||
invidio\.us
|
|
||||||
)
|
)
|
||||||
/.*?\?.*?\blist=
|
/.*?\?.*?\blist=
|
||||||
)?
|
)?
|
||||||
@ -3255,3 +3175,222 @@ class YoutubeTruncatedIDIE(InfoExtractor):
|
|||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
|
'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
|
|
||||||
|
class InvidiousIE(YoutubeIE):
|
||||||
|
# Invidious Instances (#29885) per https://github.com/iv-org/invidious/pull/1730
|
||||||
|
# Thanks: https://github.com/yt-dlp/yt-dlp/commit/df0c81513e0bb37986d00c532a5ad8cef31a24ea
|
||||||
|
IE_NAME = 'invidious'
|
||||||
|
IE_DESC = 'Invidious YT front-end videos, playlists, channels, searches'
|
||||||
|
_VALID_URL = r'ytdlie://Invidious#(?P<id>.+)'
|
||||||
|
_REAL_VALID_URL = r"""(?x)^
|
||||||
|
(?:(
|
||||||
|
(?:https?:)?// # http(s):// or protocol-independent URL
|
||||||
|
(?:[a-zA-Z\d-]+\.)+[a-zA-Z\d-]+/ # any domain
|
||||||
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
|
(?: # the various things that can precede the ID:
|
||||||
|
(?:(?:v|embed|e)/(?!videoseries))? # v/ or embed/ or e/, or nothing
|
||||||
|
|(?: # or the v= param in all its forms
|
||||||
|
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||||
|
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||||
|
(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
|
||||||
|
v=
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)?|(?: # all until now is optional -> you can pass the naked ID
|
||||||
|
(?:https?:)?// # http(s):// or protocol-independent URL
|
||||||
|
(?:[a-zA-Z\d-]+\.)+[a-zA-Z\d-]+/ # any domain
|
||||||
|
(?:(
|
||||||
|
(?:playlist|watch)\?.*?\blist=
|
||||||
|
)|(
|
||||||
|
(?:feed/)?(?:channel|c|user)/
|
||||||
|
)|(
|
||||||
|
(?:feed|hashtag)/
|
||||||
|
)|(search\?(?=q=)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(?(2)IV)?(?P<id>(?(2)%(playlist_id)s|
|
||||||
|
(?(3)%(channel_id)s|
|
||||||
|
(?(4)%(feed_id)s|
|
||||||
|
(?(5).+|
|
||||||
|
%(video_id)s))))
|
||||||
|
)
|
||||||
|
(?(1).*) # if we found the ID, everything can follow
|
||||||
|
$""" % {
|
||||||
|
'video_id': YoutubeBaseInfoExtractor._VIDEO_ID_RE,
|
||||||
|
'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
|
||||||
|
'channel_id': YoutubeBaseInfoExtractor._CHANNEL_ID_RE,
|
||||||
|
'feed_id': r'[0-9a-zA-Z_-]{10,}',
|
||||||
|
}
|
||||||
|
_LINK_RE = r'<link\s[^>]*?%s[^>]*>'
|
||||||
|
# <link title="Invidious">
|
||||||
|
_LINK_TITLE_RE = _LINK_RE % r'title\s*=\s*(?P<q>"|\'|\b)Invidious(?P=q)'
|
||||||
|
_TITLE_RE = r'<title\b[^>]*?>(.+)\s*-\s*Invidious</title>'
|
||||||
|
_TESTS = [{
|
||||||
|
# Invidious video page with standard link to YT
|
||||||
|
'url': 'https://invidious.snopyta.org/watch?v=aU_jWooBxzI',
|
||||||
|
'md5': 'fad656e510b491dcbefba0b0065ceb37',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aU_jWooBxzI',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'PCs are TOO Powerful… and it’s a problem',
|
||||||
|
'thumbnail': r're:https?://i.ytimg.com/.+\.jpg',
|
||||||
|
'upload_date': '20210818',
|
||||||
|
'uploader': 'Linus Tech Tips',
|
||||||
|
'uploader_id': 'LinusTechTips',
|
||||||
|
'description': 'md5:749b04d3931048628191889dfb14c5ba',
|
||||||
|
'duration': 708,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Invidious video page with standard link to YT
|
||||||
|
'url': 'https://invidious-us.kavin.rocks/watch?v=15TvLqK29PU&list=IVPLxy40xZSaui6mZCrEUbd-MeMQD41-k6D',
|
||||||
|
'md5': '7a7ab808f6cee434361463161c046d25',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '15TvLqK29PU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:5130b529083cd4a692c4917beb059428',
|
||||||
|
'thumbnail': 're:https?://i.ytimg.com/.+',
|
||||||
|
'upload_date': '20120921',
|
||||||
|
'uploader': 'md5:42326ad7441688122b035175a51de385',
|
||||||
|
'uploader_id': 'UCuel_9Lg9WH9P5dFnXZ0zKQ',
|
||||||
|
'description': 'md5:541ed05829043b077d920029641ad831',
|
||||||
|
'duration': 366,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Cloudflare breaks HTTP if Chrome is mentioned in the UA (2021-08)
|
||||||
|
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/6533.18.5 (KHTML, like Gecko) Safari/6533.18.5',
|
||||||
|
},
|
||||||
|
'skip': 'test_download doesn\'t respect the user_agent option',
|
||||||
|
}, {
|
||||||
|
# Invidious playlist, more than one page
|
||||||
|
# Results may be fewer than YoutubePlaylistIE because of global deduplication
|
||||||
|
'url': 'https://invidious.snopyta.org/playlist?list=PL143B3D4078ECAD35',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PL143B3D4078ECAD35',
|
||||||
|
'title': 'yum yum gimme sum',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 95,
|
||||||
|
}, {
|
||||||
|
# Invidious channel, more than one page
|
||||||
|
'url': 'https://invidious.snopyta.org/channel/UCFEFodsnfvA2diJOn7xxd0g',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCFEFodsnfvA2diJOn7xxd0g',
|
||||||
|
'title': 'waaghalsrecords',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 90,
|
||||||
|
}, {
|
||||||
|
# Invidious search, more than one page
|
||||||
|
'url': 'https://invidious.snopyta.org/search?q=dale+"hawkins"',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'q=dale+"hawkins"',
|
||||||
|
'title': 'dale "hawkins"',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 410,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def page_suitable(cls, caller, url, webpage):
|
||||||
|
'''Return truthy iff the webpage at the URL is suitable for the extractor
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
cls -- the InfoExtractor class being tested
|
||||||
|
caller -- the calling InfoExtractor instance
|
||||||
|
url -- compat_str url to test against the class
|
||||||
|
webpage -- compat_str text of the page at the url
|
||||||
|
'''
|
||||||
|
|
||||||
|
return caller._search_regex(cls._LINK_TITLE_RE, webpage, 'Invidious title', default=False)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def page_url_result(cls, url, video_id=None, video_title=None, webpage=None):
|
||||||
|
'''Pass the URL to an extractor using the custom ytdlie:// scheme'''
|
||||||
|
ie_key = cls.ie_key()
|
||||||
|
url = smuggle_url('ytdlie://' + ie_key, {'url': url, })
|
||||||
|
return cls.url_result(url, ie=ie_key, video_id=video_id, video_title=video_title)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _real_match_id(cls, url):
|
||||||
|
'''Return ID from url matched against _REAL_VALID_URL'''
|
||||||
|
if '_REAL_VALID_URL_RE' not in cls.__dict__:
|
||||||
|
cls._REAL_VALID_URL_RE = re.compile(cls._REAL_VALID_URL)
|
||||||
|
m = re.match(cls._REAL_VALID_URL_RE, url)
|
||||||
|
assert m
|
||||||
|
return compat_str(m.group('id'))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
'''Return YT URL of video in an Invidious single video page
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
caller -- an InfoExtractor
|
||||||
|
webpage -- compat_str text of the video page
|
||||||
|
'''
|
||||||
|
url = unsmuggle_url(url, {})[1]['url']
|
||||||
|
video_id = self._real_match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# single video page?
|
||||||
|
REL_ALT_RE = r'(?P<rel%(n)d>rel\s*=\s*(?P<q%(n)d>"|\'|\b)alternate(?P=q%(n)d))'
|
||||||
|
LINK_REL_ALT_TEMPL = (
|
||||||
|
r'''
|
||||||
|
%(rel_alt1)s # rel="alternate"
|
||||||
|
\s[^>]*?
|
||||||
|
href\s*=\s*(?P<q0>"|\'|\b) # href="invid URL"
|
||||||
|
(?P<invid_url>%(canonical_video_url)s)(?P=q0)
|
||||||
|
(?(rel1)|\s[^>]*?%(rel_alt2)s) # rel="alternate" if following
|
||||||
|
''')
|
||||||
|
LINK_REL_ALT_RE = '(?x)' + self._LINK_RE % LINK_REL_ALT_TEMPL
|
||||||
|
yt_url = self._search_regex(
|
||||||
|
LINK_REL_ALT_RE
|
||||||
|
% {
|
||||||
|
'rel_alt1': REL_ALT_RE % {'n': 1, },
|
||||||
|
'canonical_video_url': YoutubeIE._CANONICAL_VIDEO_RE,
|
||||||
|
'rel_alt2': REL_ALT_RE % {'n': 2, },
|
||||||
|
},
|
||||||
|
webpage, 'youtube link', default=None, group='invid_url')
|
||||||
|
if yt_url:
|
||||||
|
return self.url_result(yt_url, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
# perhaps it's a playlist or a channel?
|
||||||
|
title = self._html_search_regex(self._TITLE_RE, webpage, 'page title', default=None)
|
||||||
|
|
||||||
|
NEXT_PAGE_RE = r'''(?x)
|
||||||
|
<a\s[^>]*?href\s*=\s*(?P<q>"|\'|\b)
|
||||||
|
(?P<next_page>.+?[&?]page=(?P<page_num>%s))
|
||||||
|
(?P=q)>
|
||||||
|
'''
|
||||||
|
|
||||||
|
# generate all video links from page and further pages
|
||||||
|
def gen_extract(url, webpage, video_id):
|
||||||
|
next_page_re = NEXT_PAGE_RE
|
||||||
|
next_page = (int_or_none(
|
||||||
|
self._search_regex(
|
||||||
|
next_page_re % r'\d+',
|
||||||
|
webpage, 'next page num', default=None, group='page_num'))
|
||||||
|
or 2)
|
||||||
|
next_page_re = next_page_re % '%d'
|
||||||
|
VIDEO_LINK_RE = r'''(?x)
|
||||||
|
<a\s[^>]*?
|
||||||
|
href\s*=\s*(?P<q>"|\'|\b)
|
||||||
|
/watch\?v=(%s)(?!.+\blisten=1.*).*?
|
||||||
|
(?P=q)
|
||||||
|
''' % self._VIDEO_ID_RE
|
||||||
|
for n in itertools.count(next_page):
|
||||||
|
video_ids = re.findall(VIDEO_LINK_RE, webpage)
|
||||||
|
for _, vid in video_ids:
|
||||||
|
yield 'https://www.youtube.com/watch?v=%s' % vid
|
||||||
|
next_page = self._search_regex(
|
||||||
|
next_page_re % n,
|
||||||
|
webpage, 'next page', default=None, group='next_page')
|
||||||
|
webpage = (
|
||||||
|
next_page
|
||||||
|
and self._download_webpage(urljoin(url, next_page), video_id, fatal=False))
|
||||||
|
|
||||||
|
if not webpage:
|
||||||
|
break
|
||||||
|
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
gen_extract(url, webpage, video_id),
|
||||||
|
playlist_id=video_id, playlist_title=title,
|
||||||
|
ie=YoutubeIE.ie_key())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user