From 6800d3372f35e08dcc4d34d06601815bf0cb0a3d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 3 Jun 2015 23:10:18 +0800 Subject: [PATCH] [YoutubeDL] Support DASH manifest downloading --- youtube_dl/downloader/dash.py | 50 +++++++++++++++++++++++++++++++++ youtube_dl/downloader/http.py | 4 +++ youtube_dl/extractor/youtube.py | 6 ++++ 3 files changed, 60 insertions(+) create mode 100644 youtube_dl/downloader/dash.py diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py new file mode 100644 index 000000000..18eca2c04 --- /dev/null +++ b/youtube_dl/downloader/dash.py @@ -0,0 +1,50 @@ +from __future__ import unicode_literals +from .common import FileDownloader +from ..compat import compat_urllib_request + +import re + + +class DashSegmentsFD(FileDownloader): + """ + Download segments in a DASH manifest + """ + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + base_url = info_dict['url'] + segment_urls = info_dict['segment_urls'] + + self.byte_counter = 0 + + def append_url_to_file(outf, target_url, target_name): + self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) + req = compat_urllib_request.Request(target_url) + data = self.ydl.urlopen(req).read() + outf.write(data) + self.byte_counter += len(data) + + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s/%s' % (base_url, target_url) + + with open(tmpfilename, 'wb') as outf: + append_url_to_file( + outf, combine_url(base_url, info_dict['initialization_url']), + 'initialization segment') + for i, segment_url in enumerate(segment_urls): + append_url_to_file( + outf, combine_url(base_url, segment_url), + 'segment %d / %d' % (i + 1, len(segment_urls))) + + self.try_rename(tmpfilename, filename) + + self._hook_progress({ + 'downloaded_bytes': self.byte_counter, + 'total_bytes': self.byte_counter, + 'filename': filename, + 'status': 'finished', + }) + + return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index b7f144af9..ceacb8522 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -6,6 +6,7 @@ import socket import time from .common import FileDownloader +from .dash import DashSegmentsFD from ..compat import ( compat_urllib_request, compat_urllib_error, @@ -19,6 +20,9 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): + if info_dict.get('initialization_url') and list(filter(None, info_dict.get('segment_urls', []))): + return DashSegmentsFD(self.ydl, self.params).real_download(filename, info_dict) + url = info_dict['url'] tmpfilename = self.temp_name(filename) stream = None diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index aacb999ce..5d1297e0d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -802,6 +802,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # TODO implement WebVTT downloading pass elif mime_type.startswith('audio/') or mime_type.startswith('video/'): + segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') format_id = r.attrib['id'] video_url = url_el.text filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) @@ -815,6 +816,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': filesize, 'fps': int_or_none(r.attrib.get('frameRate')), } + if segment_list: + f.update({ + 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')] + }) try: existing_format = next( fo for fo in formats