From cf862771d72b028f8d5335256610c560245dc8c0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 12 Jan 2021 21:15:13 +0100 Subject: [PATCH] [adn] improve info extraction --- youtube_dl/extractor/adn.py | 79 +++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 901832ac4..d611ee237 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -24,8 +24,8 @@ from ..utils import ( long_to_bytes, pkcs1pad, strip_or_none, + try_get, unified_strdate, - urljoin, ) @@ -40,11 +40,20 @@ class ADNIE(InfoExtractor): 'ext': 'mp4', 'title': 'Blue Exorcist - Kyôto Saga - Episode 1', 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', + 'series': 'Blue Exorcist - Kyôto Saga', + 'duration': 1467, + 'release_date': '20170106', + 'comment_count': int, + 'average_rating': float, + 'season_number': 2, + 'episode': 'Début des hostilités', + 'episode_number': 1, } } _BASE_URL = 'http://animedigitalnetwork.fr' - _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr' + _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/' + _PLAYER_BASE_URL = _API_BASE_URL + 'player/' _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537) _POS_ALIGN_MAP = { 'start': 1, @@ -59,26 +68,24 @@ class ADNIE(InfoExtractor): def _ass_subtitles_timecode(seconds): return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100) - def _get_subtitles(self, sub_path, video_id): - if not sub_path: + def _get_subtitles(self, sub_url, video_id): + if not sub_url: return None enc_subtitles = self._download_webpage( - urljoin(self._BASE_URL, sub_path), - video_id, 'Downloading subtitles location', fatal=False) or '{}' + sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}' subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location') if subtitle_location: enc_subtitles = self._download_webpage( - urljoin(self._BASE_URL, subtitle_location), - video_id, 'Downloading subtitles data', fatal=False, - headers={'Origin': 'https://animedigitalnetwork.fr'}) + subtitle_location, video_id, 'Downloading subtitles data', + fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'}) if not enc_subtitles: return None # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), - bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')), + bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')), bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) )) subtitles_json = self._parse_json( @@ -124,22 +131,25 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' def _real_extract(self, url): video_id = self._match_id(url) - config_url = self._API_BASE_URL + '/player/video/%s/configuration' % video_id - player_config = self._download_json( - config_url, video_id, - 'Downloading player config JSON metadata')['player']['options'] + video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id + player = self._download_json( + video_base_url + 'configuration', video_id, + 'Downloading player config JSON metadata')['player'] + options = player['options'] - user = player_config['user'] + user = options['user'] if not user.get('hasAccess'): - raise ExtractorError('This video is only available for paying users') + raise ExtractorError( + 'This video is only available for paying users', expected=True) # self.raise_login_required() # FIXME: Login is not implemented token = self._download_json( - user.get('refreshTokenUrl') or (self._API_BASE_URL + '/player/refresh/token'), - video_id, 'Downloading access token', headers={'x-player-refresh-token': user['refreshToken']}, - data=b'')['token'] + user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'), + video_id, 'Downloading access token', headers={ + 'x-player-refresh-token': user['refreshToken'] + }, data=b'')['token'] - links_url = player_config.get('videoUrl') or (self._API_BASE_URL + '/player/video/%s/link' % video_id) + links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) message = bytes_to_intlist(json.dumps({ 'k': self._K, @@ -157,17 +167,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' try: links_data = self._download_json( - urljoin(self._BASE_URL, links_url), video_id, - 'Downloading links JSON metadata', headers={ + links_url, video_id, 'Downloading links JSON metadata', headers={ 'X-Player-Token': authorization - }, - query={ + }, query={ 'freeWithAds': 'true', 'adaptive': 'false', 'withMetadata': 'true', 'source': 'Web' - } - ) + }) break except ExtractorError as e: if not isinstance(e.cause, compat_HTTPError): @@ -188,8 +195,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' links = links_data.get('links') or {} metas = links_data.get('metadata') or {} - sub_path = (links.get('subtitles') or {}).get('all') + sub_url = (links.get('subtitles') or {}).get('all') video_info = links_data.get('video') or {} + title = metas['title'] formats = [] for format_id, qualities in (links.get('streaming') or {}).items(): @@ -212,23 +220,24 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' formats.extend(m3u8_formats) self._sort_formats(formats) - video = (self._download_json(self._API_BASE_URL + '/video/%s' % video_id, video_id, - 'Downloading additional video metadata', fatal=False) or {}).get('video') + video = (self._download_json( + self._API_BASE_URL + 'video/%s' % video_id, video_id, + 'Downloading additional video metadata', fatal=False) or {}).get('video') or {} show = video.get('show') or {} return { 'id': video_id, - 'title': metas.get('title') or video_id, + 'title': title, 'description': strip_or_none(metas.get('summary') or video.get('summary')), - 'thumbnail': video_info.get('image'), + 'thumbnail': video_info.get('image') or player.get('image'), 'formats': formats, - 'subtitles': sub_path and self.extract_subtitles(sub_path, video_id), + 'subtitles': self.extract_subtitles(sub_url, video_id), 'episode': metas.get('subtitle') or video.get('name'), 'episode_number': int_or_none(video.get('shortNumber')), - 'series': video_info.get('playlistTitle') or show.get('title'), + 'series': show.get('title'), 'season_number': int_or_none(video.get('season')), 'duration': int_or_none(video_info.get('duration') or video.get('duration')), - 'release_date': unified_strdate(video.get('release_date')), - 'average_rating': video.get('rating') or metas.get('rating'), + 'release_date': unified_strdate(video.get('releaseDate')), + 'average_rating': float_or_none(video.get('rating') or metas.get('rating')), 'comment_count': int_or_none(video.get('commentsCount')), }