From 90bddb6cdd59107d137c13970dc50a6193d204a7 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 15 Oct 2015 14:28:56 +0100 Subject: [PATCH] [ooyala] extract more formats and metadata --- youtube_dl/extractor/ooyala.py | 153 ++++++++++++--------------------- 1 file changed, 54 insertions(+), 99 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index a262a9f6d..592cdc564 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,108 +1,64 @@ from __future__ import unicode_literals import re -import json import base64 from .common import InfoExtractor from ..utils import ( - unescapeHTML, - ExtractorError, - determine_ext, int_or_none, + float_or_none, ) class OoyalaBaseIE(InfoExtractor): - def _extract_result(self, info, more_info): - embedCode = info['embedCode'] - video_url = info.get('ipad_url') or info['url'] - - if determine_ext(video_url) == 'm3u8': - formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') - else: - formats = [{ - 'url': video_url, - 'ext': 'mp4', - }] - - return { - 'id': embedCode, - 'title': unescapeHTML(info['title']), - 'formats': formats, - 'description': unescapeHTML(more_info['description']), - 'thumbnail': more_info['promo'], + def _extract(self, player_url, video_id): + print(player_url) + content_tree = self._download_json(player_url, video_id)['content_tree'] + metadata = content_tree[list(content_tree)[0]] + embed_code = metadata['embed_code'] + pcode = metadata.get('asset_pcode') or embed_code + video_info = { + 'id': embed_code, + 'title': metadata['title'], + 'description': metadata.get('description'), + 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), + 'duration': int_or_none(metadata.get('duration')), } - def _extract(self, player_url, video_id): - player = self._download_webpage(player_url, video_id) - mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', - player, 'mobile player url') - # Looks like some videos are only available for particular devices - # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 - # is only available for ipad) - # Working around with fetching URLs for all the devices found starting with 'unknown' - # until we succeed or eventually fail for each device. - devices = re.findall(r'device\s*=\s*"([^"]+)";', player) - devices.remove('unknown') - devices.insert(0, 'unknown') - for device in devices: - mobile_player = self._download_webpage( - '%s&device=%s' % (mobile_url, device), video_id, - 'Downloading mobile player JS for %s device' % device) - videos_info = self._search_regex( - r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', - mobile_player, 'info', fatal=False, default=None) - if videos_info: - break - - if not videos_info: - formats = [] + formats = [] + for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): auth_data = self._download_json( - 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id), - video_id) + 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=%s' % (pcode, embed_code, supported_format), + video_id, 'Downloading %s JSON' % supported_format) - cur_auth_data = auth_data['authorization_data'][video_id] + cur_auth_data = auth_data['authorization_data'][embed_code] for stream in cur_auth_data['streams']: - formats.append({ - 'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), - 'ext': stream.get('delivery_type'), - 'format': stream.get('video_codec'), - 'format_id': stream.get('profile'), - 'width': int_or_none(stream.get('width')), - 'height': int_or_none(stream.get('height')), - 'abr': int_or_none(stream.get('audio_bitrate')), - 'vbr': int_or_none(stream.get('video_bitrate')), - }) - if formats: - return { - 'id': video_id, - 'formats': formats, - 'title': 'Ooyala video', - } - - if not cur_auth_data['authorized']: - raise ExtractorError(cur_auth_data['message'], expected=True) - - if not videos_info: - raise ExtractorError('Unable to extract info') - videos_info = videos_info.replace('\\"', '"') - videos_more_info = self._search_regex( - r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') - videos_info = json.loads(videos_info) - videos_more_info = json.loads(videos_more_info) - - if videos_more_info.get('lineup'): - videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] - return { - '_type': 'playlist', - 'id': video_id, - 'title': unescapeHTML(videos_more_info['title']), - 'entries': videos, - } - else: - return self._extract_result(videos_info[0], videos_more_info) + url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8') + delivery_type = stream['delivery_type'] + if delivery_type == 'remote_asset': + video_info['url'] = url + return video_info + if delivery_type == 'hls': + formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', 0, m3u8_id='hls', fatal=False)) + elif delivery_type == 'hds': + formats.extend(self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False)) + else: + formats.append({ + 'url': url, + 'ext': stream.get('delivery_type'), + 'vcodec': stream.get('video_codec'), + 'format_id': stream.get('profile'), + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'abr': int_or_none(stream.get('audio_bitrate')), + 'vbr': int_or_none(stream.get('video_bitrate')), + 'fps': float_or_none(stream.get('framerate')), + }) + self._sort_formats(formats) + + video_info['formats'] = formats + return video_info class OoyalaIE(OoyalaBaseIE): @@ -117,6 +73,7 @@ class OoyalaIE(OoyalaBaseIE): 'ext': 'mp4', 'title': 'Explaining Data Recovery from Hard Drives and SSDs', 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', + 'duration': 853386, }, }, { # Only available for ipad @@ -125,7 +82,7 @@ class OoyalaIE(OoyalaBaseIE): 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', 'ext': 'mp4', 'title': 'Simulation Overview - Levels of Simulation', - 'description': '', + 'duration': 194948, }, }, { @@ -136,7 +93,8 @@ class OoyalaIE(OoyalaBaseIE): 'info_dict': { 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx', 'ext': 'mp4', - 'title': 'Ooyala video', + 'title': 'Divide Tool Path.mp4', + 'duration': 204405, } } ] @@ -152,8 +110,8 @@ class OoyalaIE(OoyalaBaseIE): def _real_extract(self, url): embed_code = self._match_id(url) - player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code - return self._extract(player_url, embed_code) + content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code) + return self._extract(content_tree_url, embed_code) class OoyalaExternalIE(OoyalaBaseIE): @@ -170,7 +128,7 @@ class OoyalaExternalIE(OoyalaBaseIE): .*?&pcode= ) (?P.+?) - (&|$) + (?:&|$) ''' _TEST = { @@ -179,7 +137,7 @@ class OoyalaExternalIE(OoyalaBaseIE): 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', 'title': 'dm_140128_30for30Shorts___JudgingJewellv2', - 'description': '', + 'duration': 1302000, }, 'params': { # m3u8 download @@ -188,9 +146,6 @@ class OoyalaExternalIE(OoyalaBaseIE): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - partner_id = mobj.group('partner_id') - video_id = mobj.group('id') - pcode = mobj.group('pcode') - player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode) - return self._extract(player_url, video_id) + partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups() + content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id) + return self._extract(content_tree_url, video_id)