diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6e8fc3961..48c480362 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1024,6 +1024,7 @@ from .rtl2 import ( RTL2YouSeriesIE, ) from .rtp import RTPIE +from .rtrfm import RTRFMIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE diff --git a/youtube_dl/extractor/rtrfm.py b/youtube_dl/extractor/rtrfm.py new file mode 100644 index 000000000..766d6f258 --- /dev/null +++ b/youtube_dl/extractor/rtrfm.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import compat_urllib_parse_urlencode +from ..compat import compat_urlparse + +from ..utils import ExtractorError + +import re + + +class RTRFMIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtrfm.com.au/(?:shows/(?P[^/]+)|show-episode/(?P[^-]+)-(?P[0-9]{4}-[0-9]{2}-[0-9]{2})/?)' + _SHOW_DATE_TITLE = r"window\.vm\.playShow\('([^']+)', *'([0-9]{4}-[0-9]{2}-[0-9]{2})', *'([^']+)'\)" + _RESTREAMS_URL = 'https://restreams.rtrfm.com.au/rzz' + _TESTS = [ + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/', + 'md5': '594027f513ec36a24b15d65007a24dff', + 'info_dict': { + 'id': 'breakfast-2020-06-01', + 'ext': 'mp3', + 'title': 'Breakfast with Taylah 2020-06-01', + }, + } + ] + + def _real_extract(self, url): + show1, show2, date = self._VALID_URL_RE.match(url).groups() + webpage = self._download_webpage(url, show1 or show2) + match = re.compile(self._SHOW_DATE_TITLE).search(webpage) + if not match: + raise ExtractorError('Error getting the show, date & title', expected=True) + show, date, title = match.groups() + query = {'n': show, 'd': date} + query = compat_urllib_parse_urlencode(query) + url = compat_urlparse.urlparse(self._RESTREAMS_URL) + url = compat_urlparse.urlunparse(url._replace(query=query)) + url = self._download_json(url, show, 'Downloading MP3 URL')['u'] + return { + 'id': '%s-%s' % (show, date), + 'title': '%s %s' % (title, date), + 'url': url, + 'ext': 'mp3', + 'release_date': 'date', + 'description': self._og_search_description(webpage), + }