From 2fbd86352eaa9df6afeed6698114132aea3cbe81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 May 2018 22:57:01 +0700 Subject: [PATCH] [udemy] Extract asset captions --- youtube_dl/extractor/udemy.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 4664e6222..0a74a9768 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -18,6 +18,7 @@ from ..utils import ( int_or_none, js_to_json, sanitized_Request, + try_get, unescapeHTML, urlencode_postdata, ) @@ -105,7 +106,7 @@ class UdemyIE(InfoExtractor): % (course_id, lecture_id), lecture_id, 'Downloading lecture JSON', query={ 'fields[lecture]': 'title,description,view_html,asset', - 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,data', + 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data', }) def _handle_error(self, response): @@ -308,6 +309,21 @@ class UdemyIE(InfoExtractor): if isinstance(urls, dict): extract_formats(urls.get('Video')) + captions = asset.get('captions') + if isinstance(captions, list): + for cc in captions: + if not isinstance(cc, dict): + continue + cc_url = cc.get('url') + if not cc_url or not isinstance(cc_url, compat_str): + continue + lang = try_get(cc, lambda x: x['locale']['locale'], compat_str) + sub_dict = (automatic_captions if cc.get('source') == 'auto' + else subtitles) + sub_dict.setdefault(lang or 'en', []).append({ + 'url': cc_url, + }) + view_html = lecture.get('view_html') if view_html: view_html_urls = set()