From 1f50a07771fddb5f64617617d156bfdd593f951e Mon Sep 17 00:00:00 2001 From: nixxo Date: Wed, 27 Jan 2021 12:24:50 +0100 Subject: [PATCH] [RAI] Extend formats with direct http mp4 link (PR #27990) * initial support for creating direct mp4 link * improved regexes and info extraction * added "connection: close" to request headers * updated to https://github.com/yt-dlp/yt-dlp/pull/208 --- youtube_dl/extractor/rai.py | 111 +++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 2abe164e0..7b0315a62 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -5,15 +5,16 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_str, + compat_urlparse, ) from ..utils import ( - ExtractorError, determine_ext, + ExtractorError, find_xpath_attr, fix_xml_ampersands, GeoRestrictedError, + HEADRequest, int_or_none, parse_duration, remove_start, @@ -96,12 +97,100 @@ class RaiBaseIE(InfoExtractor): if not formats and geoprotection is True: self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + formats.extend(self._create_http_urls(relinker_url, formats)) + return dict((k, v) for k, v in { 'is_live': is_live, 'duration': duration, 'formats': formats, }.items() if v is not None) + def _create_http_urls(self, relinker_url, fmts): + _RELINKER_REG = r'https?://(?P[^/]+?)/(?:i/)?(?P[^/]+?)/(?P.+?)/(?P\d+)(?:_(?P[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?' + _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' + _QUALITY = { + # tbr: w, h + '250': [352, 198], + '400': [512, 288], + '700': [512, 288], + '800': [700, 394], + '1200': [736, 414], + '1800': [1024, 576], + '2400': [1280, 720], + '3200': [1440, 810], + '3600': [1440, 810], + '5000': [1920, 1080], + '10000': [1920, 1080], + } + + def test_url(url): + resp = self._request_webpage( + HEADRequest(url), None, headers={'User-Agent': 'Rai'}, + fatal=False, errnote=False, note=False) + + if resp is False: + return False + + if resp.code == 200: + return False if resp.url == url else resp.url + return None + + def get_format_info(tbr): + import math + br = int_or_none(tbr) + if len(fmts) == 1 and not br: + br = fmts[0].get('tbr') + if br > 300: + tbr = compat_str(math.floor(br / 100) * 100) + else: + tbr = '250' + + # try extracting info from available m3u8 formats + format_copy = None + for f in fmts: + if f.get('tbr'): + br_limit = math.floor(br / 100) + if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1: + format_copy = f.copy() + return { + 'width': format_copy.get('width'), + 'height': format_copy.get('height'), + 'tbr': format_copy.get('tbr'), + 'vcodec': format_copy.get('vcodec'), + 'acodec': format_copy.get('acodec'), + 'fps': format_copy.get('fps'), + 'format_id': 'https-%s' % tbr, + } if format_copy else { + 'width': _QUALITY[tbr][0], + 'height': _QUALITY[tbr][1], + 'format_id': 'https-%s' % tbr, + 'tbr': int(tbr), + } + + loc = test_url(_MP4_TMPL % (relinker_url, '*')) + if not isinstance(loc, compat_str): + return [] + + mobj = re.match( + _RELINKER_REG, + test_url(relinker_url) or '') + if not mobj: + return [] + + available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*'] + available_qualities = [i for i in available_qualities if i] + + formats = [] + for q in available_qualities: + fmt = { + 'url': _MP4_TMPL % (relinker_url, q), + 'protocol': 'https', + 'ext': 'mp4', + } + fmt.update(get_format_info(q)) + formats.append(fmt) + return formats + @staticmethod def _extract_subtitles(url, video_data): STL_EXT = 'stl' @@ -151,6 +240,22 @@ class RaiPlayIE(RaiBaseIE): 'params': { 'skip_download': True, }, + }, { + # 1080p direct mp4 url + 'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html', + 'md5': '2e501e8651d72f05ffe8f5d286ad560b', + 'info_dict': { + 'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642', + 'ext': 'mp4', + 'title': 'Leonardo - S1E1', + 'alt_title': 'St 1 Ep 1 - Episodio 1', + 'description': 'md5:f5360cd267d2de146e4e3879a5a47d31', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Rai 1', + 'duration': 3229, + 'series': 'Leonardo', + 'season': 'Season 1', + }, }, { 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 'only_matching': True, @@ -318,7 +423,7 @@ class RaiIE(RaiBaseIE): }, { # with ContentItem in og:url 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', - 'md5': '6865dd00cf0bbf5772fdd89d59bd768a', + 'md5': '06345bd97c932f19ffb129973d07a020', 'info_dict': { 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', 'ext': 'mp4',