From 7cc35d10511f4ead5e92a3949a444d81ecadb119 Mon Sep 17 00:00:00 2001 From: Alba Mendez Date: Sat, 17 Sep 2022 17:21:24 +0200 Subject: [PATCH 01/10] fix decoding logic --- youtube_dl/extractor/rtve.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index d2fb754cf..cecea5812 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -86,8 +86,12 @@ class RTVEALaCartaIE(InfoExtractor): break data = encrypted_data.read(length) if chunk_type == b'tEXt': - alphabet_data, text = data.split(b'\0') - quality, url_data = text.split(b'%%') + alphabet_data, text = data.replace(b'\0', b'').split(b'#') + components = text.split(b'%%') + if len(components) < 2: + components.insert(0, b'') + quality, url_data = components + alphabet = [] e = 0 d = 0 From 33650c8eb69e87ef49d463e050dc4a8082d23ece Mon Sep 17 00:00:00 2001 From: Alba Mendez Date: Sat, 17 Sep 2022 17:27:57 +0200 Subject: [PATCH 02/10] fix URLs change to ztnr.rtve.es domain doesn't seem to be required, but switch to it just in case the old route is dropped someday --- youtube_dl/extractor/rtve.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index cecea5812..eb3c1e13f 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -27,7 +27,7 @@ _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(ch class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(playz?/videos|filmoteca)/[^/]+/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -124,7 +124,7 @@ class RTVEALaCartaIE(InfoExtractor): def _extract_png_formats(self, video_id): png = self._download_webpage( - 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id), + 'http://ztnr.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id), video_id, 'Downloading url information', query={'q': 'v2'}) q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) formats = [] @@ -205,7 +205,7 @@ class RTVEInfantilIE(RTVEALaCartaIE): class RTVELiveIE(RTVEALaCartaIE): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P[a-zA-Z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/directo/(?P.+)' _TESTS = [{ 'url': 'http://www.rtve.es/directo/la-1/', @@ -263,7 +263,7 @@ class RTVETelevisionIE(InfoExtractor): webpage = self._download_webpage(url, page_id) alacarta_url = self._search_regex( - r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', + r'data-location="alacarta_videos"[^<]+url":"(https?://www\.rtve\.es/play.+?)&', webpage, 'alacarta url', default=None) if alacarta_url is None: raise ExtractorError( From 11bd5b961283f2761669d6714ca4f2ac06d6ea4e Mon Sep 17 00:00:00 2001 From: Alba Mendez Date: Sat, 17 Sep 2022 19:08:04 +0200 Subject: [PATCH 03/10] fix rtve.es:live --- youtube_dl/extractor/rtve.py | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index eb3c1e13f..9fd591cee 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -16,8 +16,6 @@ from ..utils import ( ExtractorError, float_or_none, qualities, - remove_end, - remove_start, std_headers, ) @@ -147,7 +145,9 @@ class RTVEALaCartaIE(InfoExtractor): return formats def _real_extract(self, url): - video_id = self._match_id(url) + return self._real_extract_from_id(self._match_id(url)) + + def _real_extract_from_id(self, video_id): info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0] @@ -220,25 +220,11 @@ class RTVELiveIE(RTVEALaCartaIE): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') - title = remove_start(title, 'Estoy viendo ') - - vidplayer_id = self._search_regex( - (r'playerId=player([0-9]+)', - r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', - r'data-id=["\'](\d+)'), + webpage = self._download_webpage(url, self._match_id(url)) + asset_id = self._search_regex( + r'class=["\'].*?\bvideoPlayer\b.*?["\'][^>]+data-setup=[^>]+?(?:"|")idAsset(?:"|")\s*:\s*(?:"|")(\d+)(?:"|")', webpage, 'internal video ID') - - return { - 'id': video_id, - 'title': self._live_title(title), - 'formats': self._extract_png_formats(vidplayer_id), - 'is_live': True, - } + return self._real_extract_from_id(asset_id) class RTVETelevisionIE(InfoExtractor): From 050b52baf9e90f0b25b8354182389119f59e3d4a Mon Sep 17 00:00:00 2001 From: Alba Mendez Date: Sat, 17 Sep 2022 17:26:02 +0200 Subject: [PATCH 04/10] rename rtve.es:alacarta to rtve.es:play --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/rtve.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 751fc38b6..c0fb3815b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1044,7 +1044,7 @@ from .rtl2 import ( ) from .rtp import RTPIE from .rts import RTSIE -from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE +from .rtve import RTVEPlayIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rtvs import RTVSIE from .ruhd import RUHDIE diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 9fd591cee..2686b6e37 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -22,9 +22,9 @@ from ..utils import ( _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x)) -class RTVEALaCartaIE(InfoExtractor): - IE_NAME = 'rtve.es:alacarta' - IE_DESC = 'RTVE a la carta' +class RTVEPlayIE(InfoExtractor): + IE_NAME = 'rtve.es:play' + IE_DESC = 'RTVE Play' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(playz?/videos|filmoteca)/[^/]+/[^/]+/(?P\d+)' _TESTS = [{ @@ -183,7 +183,7 @@ class RTVEALaCartaIE(InfoExtractor): for s in subs) -class RTVEInfantilIE(RTVEALaCartaIE): +class RTVEInfantilIE(RTVEPlayIE): IE_NAME = 'rtve.es:infantil' IE_DESC = 'RTVE infantil' _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P[0-9]+)/' @@ -202,7 +202,7 @@ class RTVEInfantilIE(RTVEALaCartaIE): }] -class RTVELiveIE(RTVEALaCartaIE): +class RTVELiveIE(RTVEPlayIE): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' _VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/directo/(?P.+)' @@ -255,4 +255,4 @@ class RTVETelevisionIE(InfoExtractor): raise ExtractorError( 'The webpage doesn\'t contain any video', expected=True) - return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) + return self.url_result(alacarta_url, ie=RTVEPlayIE.ie_key()) From 52858d5879067e15f095da51336c33bb7080c9ef Mon Sep 17 00:00:00 2001 From: Alba Mendez Date: Sat, 17 Sep 2022 17:27:03 +0200 Subject: [PATCH 05/10] generalize to other URLs --- youtube_dl/extractor/rtve.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 2686b6e37..4cdfe18c0 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -229,7 +229,8 @@ class RTVELiveIE(RTVEPlayIE): class RTVETelevisionIE(InfoExtractor): IE_NAME = 'rtve.es:television' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P\d+).shtml' + # https://www.rtve.es/SECTION/YYYYMMDD/CONTENT_SLUG/CONTENT_ID.shtml + _VALID_URL = r'https?://(?:www\.)?rtve\.es/[^/]+/\d{8}/[^/]+/(?P\d+)\.shtml' _TEST = { 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', From 4a48a17eea32d8fa8d30a822d54173cd7f4dc49a Mon Sep 17 00:00:00 2001 From: Alba Mendez Date: Sat, 17 Sep 2022 18:13:44 +0200 Subject: [PATCH 06/10] make tests up to date --- youtube_dl/extractor/rtve.py | 85 +++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 4cdfe18c0..d0f148cd9 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -29,7 +29,7 @@ class RTVEPlayIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', - 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', + 'md5': '2c70aacf8a415d1b4e7fcc0525951162', 'info_dict': { 'id': '2491869', 'ext': 'mp4', @@ -52,7 +52,7 @@ class RTVEPlayIE(InfoExtractor): }, }, { 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', - 'md5': 'd850f3c8731ea53952ebab489cf81cbf', + 'md5': '30b8827cba25f39d1af5a7c482cc8ac5', 'info_dict': { 'id': '4236788', 'ext': 'mp4', @@ -189,14 +189,14 @@ class RTVEInfantilIE(RTVEPlayIE): _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P[0-9]+)/' _TESTS = [{ - 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', - 'md5': '5747454717aedf9f9fdf212d1bcfc48d', + 'url': 'https://www.rtve.es/infantil/serie/dino-ranch/video/pequeno-gran-ayudante/6693248/', + 'md5': '06d3f57eec593ad93fe9dcf079fbd940', 'info_dict': { - 'id': '3040283', + 'id': '6693248', 'ext': 'mp4', - 'title': 'Maneras de vivir', - 'thumbnail': r're:https?://.+/1426182947956\.JPG', - 'duration': 357.958, + 'title': 'Un pequeño gran ayudante', + 'thumbnail': r're:https?://.+/1663318364013\.jpg', + 'duration': 691.44, }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], }] @@ -208,15 +208,35 @@ class RTVELiveIE(RTVEPlayIE): _VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/directo/(?P.+)' _TESTS = [{ - 'url': 'http://www.rtve.es/directo/la-1/', + 'url': 'https://www.rtve.es/play/videos/directo/la-1/', 'info_dict': { - 'id': 'la-1', + 'id': '1688877', 'ext': 'mp4', 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', }, 'params': { 'skip_download': 'live stream', } + }, { + 'url': 'https://www.rtve.es/play/videos/directo/canales-lineales/la-1/', + 'info_dict': { + 'id': '1688877', + 'ext': 'mp4', + 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + 'skip_download': 'live stream', + } + }, { + 'url': 'https://www.rtve.es/play/videos/directo/canales-lineales/capilla-ardiente-isabel-westminster/10886/', + 'info_dict': { + 'id': '1938028', + 'ext': 'mp4', + 'title': 're:^Mas24 - 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + 'skip_download': 'live stream', + } }] def _real_extract(self, url): @@ -232,18 +252,51 @@ class RTVETelevisionIE(InfoExtractor): # https://www.rtve.es/SECTION/YYYYMMDD/CONTENT_SLUG/CONTENT_ID.shtml _VALID_URL = r'https?://(?:www\.)?rtve\.es/[^/]+/\d{8}/[^/]+/(?P\d+)\.shtml' - _TEST = { - 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', + _TESTS = [{ + 'url': 'https://www.rtve.es/television/20220916/destacados-festival-san-sebastian-rtve-play/2395620.shtml', + 'info_dict': { + 'id': '6668919', + 'ext': 'mp4', + 'title': 'Las películas del Festival de San Sebastián en RTVE Play', + 'duration': 20.048, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.rtve.es/noticias/20220917/penelope-cruz-san-sebastian-premio-nacional/2402565.shtml', 'info_dict': { - 'id': '3069778', + 'id': '6694087', 'ext': 'mp4', - 'title': 'Documentos TV - La revolución del móvil', - 'duration': 3496.948, + 'title': 'Penélope Cruz recoge el Premio Nacional de Cinematografía: "No dejen nunca de proteger nuestro cine"', + 'duration': 388.2, }, 'params': { 'skip_download': True, }, - } + }, { + 'url': 'https://www.rtve.es/deportes/20220917/motogp-bagnaia-pole-marquez-decimotercero-motorland-aragon/2402566.shtml', + 'info_dict': { + 'id': '6694142', + 'ext': 'mp4', + 'title': "Bagnaia logra su quinta 'pole' del año y Márquez partirá decimotercero", + 'duration': 153.44, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.rtve.es/playz/20220807/covaleda-fest-final/2394809.shtml', + 'info_dict': { + 'id': '6665408', + 'ext': 'mp4', + 'title': 'Festivales Playz: Covaleda Fest (Soria) - Día 3 con Marc Seguí, Rizha y Judeline', + 'duration': 12009.92, + }, + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): page_id = self._match_id(url) From d7939e2c079b7e663bcc3485617151528e4c49bd Mon Sep 17 00:00:00 2001 From: Alba Mendez Date: Sat, 17 Sep 2022 20:07:29 +0200 Subject: [PATCH 07/10] improve metadata grab metadata from the new endpoint (videos/%d.json) instead of the legacy endpoint (videos/%d/config/alacarta_videos.json), which is what is actually used for the UI now. - gives more up to date titles - adds description / URL - makes supporting audios easy (see next commit) --- youtube_dl/extractor/rtve.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index d0f148cd9..ea0826328 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -12,6 +12,7 @@ from ..compat import ( compat_struct_unpack, ) from ..utils import ( + clean_html, determine_ext, ExtractorError, float_or_none, @@ -33,7 +34,8 @@ class RTVEPlayIE(InfoExtractor): 'info_dict': { 'id': '2491869', 'ext': 'mp4', - 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', + 'title': 'Final de la Swiss Cup masculina: España-Suecia', + 'description': 'Swiss Cup masculina, Final: España-Suecia.', 'duration': 5024.566, 'series': 'Balonmano', }, @@ -45,6 +47,7 @@ class RTVEPlayIE(InfoExtractor): 'id': '1694255', 'ext': 'mp4', 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': '24H LIVE', 'is_live': True, }, 'params': { @@ -56,7 +59,8 @@ class RTVEPlayIE(InfoExtractor): 'info_dict': { 'id': '4236788', 'ext': 'mp4', - 'title': 'Servir y proteger - Capítulo 104', + 'title': 'Capítulo 104', + 'description': 'md5:caae29ae04291875e611dd667fe84641', 'duration': 3222.0, }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], @@ -149,9 +153,9 @@ class RTVEPlayIE(InfoExtractor): def _real_extract_from_id(self, video_id): info = self._download_json( - 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, + 'http://www.rtve.es/api/videos/%s.json' % video_id, video_id)['page']['items'][0] - if info['state'] == 'DESPU': + if (info.get('pubState') or {}).get('code') == 'DESPU': raise ExtractorError('The video is no longer available', expected=True) title = info['title'].strip() formats = self._extract_png_formats(video_id) @@ -161,17 +165,19 @@ class RTVEPlayIE(InfoExtractor): if sbt_file: subtitles = self.extract_subtitles(video_id, sbt_file) - is_live = info.get('live') is True + is_live = info.get('consumption') == 'live' return { 'id': video_id, 'title': self._live_title(title) if is_live else title, 'formats': formats, - 'thumbnail': info.get('image'), + 'url': info.get('htmlUrl'), + 'description': clean_html(info.get('description')), + 'thumbnail': info.get('thumbnail'), 'subtitles': subtitles, 'duration': float_or_none(info.get('duration'), 1000), 'is_live': is_live, - 'series': info.get('programTitle'), + 'series': (info.get('programInfo') or {}).get('title'), } def _get_subtitles(self, video_id, sub_file): @@ -195,7 +201,8 @@ class RTVEInfantilIE(RTVEPlayIE): 'id': '6693248', 'ext': 'mp4', 'title': 'Un pequeño gran ayudante', - 'thumbnail': r're:https?://.+/1663318364013\.jpg', + 'description': 'md5:144ca351e31f9ee99a637ab9fc2787d5', + 'thumbnail': r're:https?://.+/1663318364501\.jpg', 'duration': 691.44, }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], @@ -213,6 +220,7 @@ class RTVELiveIE(RTVEPlayIE): 'id': '1688877', 'ext': 'mp4', 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'La 1', }, 'params': { 'skip_download': 'live stream', @@ -223,6 +231,7 @@ class RTVELiveIE(RTVEPlayIE): 'id': '1688877', 'ext': 'mp4', 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'La 1', }, 'params': { 'skip_download': 'live stream', @@ -233,6 +242,7 @@ class RTVELiveIE(RTVEPlayIE): 'id': '1938028', 'ext': 'mp4', 'title': 're:^Mas24 - 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Mas24 - 1', }, 'params': { 'skip_download': 'live stream', @@ -258,6 +268,7 @@ class RTVETelevisionIE(InfoExtractor): 'id': '6668919', 'ext': 'mp4', 'title': 'Las películas del Festival de San Sebastián en RTVE Play', + 'description': 'El\xa0Festival de San Sebastián vuelve a llenarse de artistas. Y en su honor,\xa0RTVE Play\xa0destacará cada viernes una\xa0película galardonada\xa0con la\xa0Concha de Oro\xa0en su catálogo.', 'duration': 20.048, }, 'params': { @@ -269,6 +280,7 @@ class RTVETelevisionIE(InfoExtractor): 'id': '6694087', 'ext': 'mp4', 'title': 'Penélope Cruz recoge el Premio Nacional de Cinematografía: "No dejen nunca de proteger nuestro cine"', + 'description': 'md5:eda9e6baa78dbbbcc7708c0cc8150a91', 'duration': 388.2, }, 'params': { @@ -280,6 +292,7 @@ class RTVETelevisionIE(InfoExtractor): 'id': '6694142', 'ext': 'mp4', 'title': "Bagnaia logra su quinta 'pole' del año y Márquez partirá decimotercero", + 'description': 'md5:07e2ccb983a046cb42f896cce225f0a7', 'duration': 153.44, }, 'params': { @@ -290,7 +303,8 @@ class RTVETelevisionIE(InfoExtractor): 'info_dict': { 'id': '6665408', 'ext': 'mp4', - 'title': 'Festivales Playz: Covaleda Fest (Soria) - Día 3 con Marc Seguí, Rizha y Judeline', + 'title': 'Covaleda Fest (Soria) - Día 3 con Marc Seguí y Paranoid 1966', + 'description': 'Festivales Playz viaja a Covaleda, Soria, para contarte todo lo que sucede en el Covaleda Fest. Entrevistas, challenges a los artistas, juegos... Khan, Adriana Jiménez y María García no dejarán pasar ni una. ¡No te lo pierdas!', 'duration': 12009.92, }, 'params': { From e4c57418df6f495e12f6b7aca96d3c0e4e37bfd2 Mon Sep 17 00:00:00 2001 From: Alba Mendez Date: Sat, 17 Sep 2022 20:11:58 +0200 Subject: [PATCH 08/10] generalize to audios too --- youtube_dl/extractor/rtve.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index ea0826328..8d1d2c078 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -26,7 +26,7 @@ _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(ch class RTVEPlayIE(InfoExtractor): IE_NAME = 'rtve.es:play' IE_DESC = 'RTVE Play' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/(playz?/videos|filmoteca)/[^/]+/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(?Pplayz?/(?:audios|videos)|filmoteca)/[^/]+/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -70,6 +70,17 @@ class RTVEPlayIE(InfoExtractor): }, { 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 'only_matching': True, + }, { + 'url': 'http://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', + 'md5': 'ae06d27bff945c4e87a50f89f6ce48ce', + 'info_dict': { + 'id': '5889192', + 'ext': 'mp3', + 'title': 'Códigos informáticos', + 'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb', + 'thumbnail': r're:https?://.+/1598856591583.jpg', + 'duration': 349.440, + }, }] def _real_initialize(self): @@ -149,11 +160,14 @@ class RTVEPlayIE(InfoExtractor): return formats def _real_extract(self, url): - return self._real_extract_from_id(self._match_id(url)) + groups = re.match(self._VALID_URL, url).groupdict() + is_audio = groups.get('kind') == 'play/audios' + return self._real_extract_from_id(groups['id'], is_audio) - def _real_extract_from_id(self, video_id): + def _real_extract_from_id(self, video_id, is_audio=False): + kind = 'audios' if is_audio else 'videos' info = self._download_json( - 'http://www.rtve.es/api/videos/%s.json' % video_id, + 'http://www.rtve.es/api/%s/%s.json' % (kind, video_id), video_id)['page']['items'][0] if (info.get('pubState') or {}).get('code') == 'DESPU': raise ExtractorError('The video is no longer available', expected=True) From 585e806d9a1f8b767910e165f1c786bc194a986c Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 10 Oct 2022 10:21:59 +0100 Subject: [PATCH 09/10] Pass TestAllURLsMatching.test_no_duplicates ? --- youtube_dl/extractor/rtve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 8d1d2c078..6159c2dbc 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -26,7 +26,7 @@ _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(ch class RTVEPlayIE(InfoExtractor): IE_NAME = 'rtve.es:play' IE_DESC = 'RTVE Play' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/(?Pplayz?/(?:audios|videos)|filmoteca)/[^/]+/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(?P(?:playz?|alacarta)/(?:audios|videos)|filmoteca)/[^/]+/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', From 20691a11e6ade1bac6f357ccb1c4808854427c16 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 10 Oct 2022 10:54:19 +0100 Subject: [PATCH 10/10] Really pass TestAllURLsMatching.test_no_duplicates --- youtube_dl/extractor/rtve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 6159c2dbc..1a2caa085 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -26,7 +26,7 @@ _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(ch class RTVEPlayIE(InfoExtractor): IE_NAME = 'rtve.es:play' IE_DESC = 'RTVE Play' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/(?P(?:playz?|alacarta)/(?:audios|videos)|filmoteca)/[^/]+/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(?P(?:playz?|(?:m/)?alacarta)/(?:audios|videos)|filmoteca)/[^/]+/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',