From 65712d99c46454a475f02337911a2cccaf7ece8a Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 7 Apr 2022 23:08:42 +0100 Subject: [PATCH] Support Series page --- youtube_dl/extractor/rtve.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f060ffdc9..638eddd4d 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -9,7 +9,9 @@ import sys from .common import InfoExtractor from ..compat import ( compat_b64decode, + compat_parse_qs, compat_struct_unpack, + compat_urllib_parse_urlparse, ) from ..utils import ( determine_ext, @@ -25,8 +27,8 @@ _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(ch class RTVEALaCartaIE(InfoExtractor): - IE_NAME = 'rtve.es:alacarta' - IE_DESC = 'RTVE a la carta' + IE_NAME = 'rtve.es:play' + IE_DESC = 'RTVE Play' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?((alacarta|playz?)/videos|filmoteca)/[^/]+/[^/]+/(?P\d+)' _TESTS = [{ @@ -90,6 +92,12 @@ class RTVEALaCartaIE(InfoExtractor): }, { 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 'only_matching': True, + }, { + 'url': 'https://www.rtve.es/play/videos/modulos/capitulos/11332/?currentpage=pf_serie', + 'info_dict': { + 'id': '11332', + }, + 'playlist_mincount': 20, }] def _real_initialize(self): @@ -164,8 +172,21 @@ class RTVEALaCartaIE(InfoExtractor): self._sort_formats(formats) return formats + def _extract_playlist(self, url, playlist_id): + + webpage = self._download_webpage(url, playlist_id) + + matches = re.findall(r''']*\bhref\s*=\s*["'](%s)''' % (self._VALID_URL, ), webpage) + + return self.playlist_from_matches(matches, playlist_id=playlist_id, getter=lambda x: x[0], ie=self.ie_key()) + def _real_extract(self, url): video_id = self._match_id(url) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + if 'pf_serie' == qs.get('currentpage', [None])[-1]: + return self._extract_playlist(url, video_id) + info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0]