From 1115271ac61b89cc4ac1ca922eff8a4bed0fbf57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Dec 2017 00:46:28 +0700 Subject: [PATCH] [raiplay:playlist] Fix issues and improve (closes #14563) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/rai.py | 62 +++++++++++++++++------------- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d8f9f94cc..b9c97fac4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -856,8 +856,8 @@ from .radiofrance import RadioFranceIE from .rai import ( RaiPlayIE, RaiPlayLiveIE, + RaiPlayPlaylistIE, RaiIE, - RaiPlaylistIE, ) from .rbmaradio import RBMARadioIE from .rds import RDSIE diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 625458380..d22311031 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -17,6 +17,7 @@ from ..utils import ( parse_duration, strip_or_none, try_get, + unescapeHTML, unified_strdate, unified_timestamp, update_url_query, @@ -249,6 +250,41 @@ class RaiPlayLiveIE(RaiBaseIE): } +class RaiPlayPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo', + 'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title = self._html_search_meta( + ('programma', 'nomeProgramma'), webpage, 'title') + description = unescapeHTML(self._html_search_meta( + ('description', 'og:description'), webpage, 'description')) + print(description) + + entries = [] + for mobj in re.finditer( + r']+\bhref=(["\'])(?P/raiplay/video/.+?)\1', + webpage): + video_url = urljoin(url, mobj.group('path')) + entries.append(self.url_result( + video_url, ie=RaiPlayIE.ie_key(), + video_id=RaiPlayIE._match_id(video_url))) + + return self.playlist_result(entries, playlist_id, title, description) + + class RaiIE(RaiBaseIE): _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ @@ -455,29 +491,3 @@ class RaiIE(RaiBaseIE): info.update(relinker_info) return info - - -class RaiPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P[^/]+)' - _TESTS = [{ - 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', - 'info_dict': { - 'id': 'nondirloalmiocapo', - 'title': 'Non dirlo al mio capo', - }, - 'playlist_mincount': 12, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - title = self._html_search_meta('programma', webpage, default=None) - video_urls = re.findall(' href="(/raiplay/video.+)"', webpage) - video_urls = [urljoin(url, video_url) for video_url in video_urls] - entries = [ - self.url_result( - video_url, - RaiPlayIE.ie_key()) - for video_url in video_urls if RaiPlayIE.suitable(video_url) - ] - return self.playlist_result(entries, playlist_id, title)