From 6800d3372f35e08dcc4d34d06601815bf0cb0a3d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 3 Jun 2015 23:10:18 +0800 Subject: [PATCH 1/9] [YoutubeDL] Support DASH manifest downloading --- youtube_dl/downloader/dash.py | 50 +++++++++++++++++++++++++++++++++ youtube_dl/downloader/http.py | 4 +++ youtube_dl/extractor/youtube.py | 6 ++++ 3 files changed, 60 insertions(+) create mode 100644 youtube_dl/downloader/dash.py diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py new file mode 100644 index 000000000..18eca2c04 --- /dev/null +++ b/youtube_dl/downloader/dash.py @@ -0,0 +1,50 @@ +from __future__ import unicode_literals +from .common import FileDownloader +from ..compat import compat_urllib_request + +import re + + +class DashSegmentsFD(FileDownloader): + """ + Download segments in a DASH manifest + """ + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + base_url = info_dict['url'] + segment_urls = info_dict['segment_urls'] + + self.byte_counter = 0 + + def append_url_to_file(outf, target_url, target_name): + self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) + req = compat_urllib_request.Request(target_url) + data = self.ydl.urlopen(req).read() + outf.write(data) + self.byte_counter += len(data) + + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s/%s' % (base_url, target_url) + + with open(tmpfilename, 'wb') as outf: + append_url_to_file( + outf, combine_url(base_url, info_dict['initialization_url']), + 'initialization segment') + for i, segment_url in enumerate(segment_urls): + append_url_to_file( + outf, combine_url(base_url, segment_url), + 'segment %d / %d' % (i + 1, len(segment_urls))) + + self.try_rename(tmpfilename, filename) + + self._hook_progress({ + 'downloaded_bytes': self.byte_counter, + 'total_bytes': self.byte_counter, + 'filename': filename, + 'status': 'finished', + }) + + return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index b7f144af9..ceacb8522 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -6,6 +6,7 @@ import socket import time from .common import FileDownloader +from .dash import DashSegmentsFD from ..compat import ( compat_urllib_request, compat_urllib_error, @@ -19,6 +20,9 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): + if info_dict.get('initialization_url') and list(filter(None, info_dict.get('segment_urls', []))): + return DashSegmentsFD(self.ydl, self.params).real_download(filename, info_dict) + url = info_dict['url'] tmpfilename = self.temp_name(filename) stream = None diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index aacb999ce..5d1297e0d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -802,6 +802,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # TODO implement WebVTT downloading pass elif mime_type.startswith('audio/') or mime_type.startswith('video/'): + segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') format_id = r.attrib['id'] video_url = url_el.text filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) @@ -815,6 +816,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': filesize, 'fps': int_or_none(r.attrib.get('frameRate')), } + if segment_list: + f.update({ + 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')] + }) try: existing_format = next( fo for fo in formats From b9258c61789388b49792ebdceb5d804217a36da5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 4 Jun 2015 22:05:33 +0800 Subject: [PATCH 2/9] [YoutubeDL] Change how DashSegmentsFD is selected --- youtube_dl/downloader/__init__.py | 2 ++ youtube_dl/downloader/http.py | 4 ---- youtube_dl/extractor/youtube.py | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index f110830c4..1b618ab54 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from .hls import NativeHlsFD from .http import HttpFD from .rtsp import RtspFD from .rtmp import RtmpFD +from .dash import DashSegmentsFD from ..utils import ( determine_protocol, @@ -20,6 +21,7 @@ PROTOCOL_MAP = { 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, + 'dash_segments': DashSegmentsFD, } diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index ceacb8522..b7f144af9 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -6,7 +6,6 @@ import socket import time from .common import FileDownloader -from .dash import DashSegmentsFD from ..compat import ( compat_urllib_request, compat_urllib_error, @@ -20,9 +19,6 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): - if info_dict.get('initialization_url') and list(filter(None, info_dict.get('segment_urls', []))): - return DashSegmentsFD(self.ydl, self.params).real_download(filename, info_dict) - url = info_dict['url'] tmpfilename = self.temp_name(filename) stream = None diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5d1297e0d..692d4d8db 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -819,7 +819,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if segment_list: f.update({ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], - 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')] + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], + 'protocol': 'dash_segments', }) try: existing_format = next( From 453a1617aac6e8000ed947cad7d88817c5740ede Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 4 Jun 2015 22:12:05 +0800 Subject: [PATCH 3/9] [downloader/dash] Reorder imports --- youtube_dl/downloader/dash.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 18eca2c04..5f14658ba 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals -from .common import FileDownloader -from ..compat import compat_urllib_request import re +from .common import FileDownloader +from ..compat import compat_urllib_request + class DashSegmentsFD(FileDownloader): """ From 423d2be5f8c5e70d202ddfa63f3e5365e6afe823 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 4 Jun 2015 22:27:29 +0800 Subject: [PATCH 4/9] [downloader/dash] Rename the protocol 'http_dash_segments' looks more like a protocol name than 'dash_segments' --- youtube_dl/downloader/__init__.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 1b618ab54..dccc59212 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -21,7 +21,7 @@ PROTOCOL_MAP = { 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, - 'dash_segments': DashSegmentsFD, + 'http_dash_segments': DashSegmentsFD, } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 692d4d8db..6d288e848 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -820,7 +820,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f.update({ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], - 'protocol': 'dash_segments', + 'protocol': 'http_dash_segments', }) try: existing_format = next( From 4da31bd56629054497634d041035e4bd6fcfacbb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 6 Jun 2015 22:22:26 +0800 Subject: [PATCH 5/9] [youtube] Fix a FutureWarning from xml.etree.ElementTree --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6d288e848..2424ac2c0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -816,7 +816,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': filesize, 'fps': int_or_none(r.attrib.get('frameRate')), } - if segment_list: + if len(segment_list): f.update({ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], From 0c8662d2b6f033ad42f1cc97989d4975629b524b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 10 Jun 2015 13:40:41 +0800 Subject: [PATCH 6/9] [youtube] Fix a TypeError caused by 4da31bd56629054497634d041035e4bd6fcfacbb --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2424ac2c0..a1906eef6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -816,7 +816,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': filesize, 'fps': int_or_none(r.attrib.get('frameRate')), } - if len(segment_list): + if segment_list is not None: f.update({ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], From 93dfcb9357b400b4d7e353d0a9db0e0194135b19 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 10 Jun 2015 13:44:54 +0800 Subject: [PATCH 7/9] [downloader/dash] Do not pollute ```self``` --- youtube_dl/downloader/dash.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 5f14658ba..cd84e0b07 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -16,14 +16,14 @@ class DashSegmentsFD(FileDownloader): base_url = info_dict['url'] segment_urls = info_dict['segment_urls'] - self.byte_counter = 0 + byte_counter = 0 def append_url_to_file(outf, target_url, target_name): self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) req = compat_urllib_request.Request(target_url) data = self.ydl.urlopen(req).read() outf.write(data) - self.byte_counter += len(data) + return len(data) def combine_url(base_url, target_url): if re.match(r'^https?://', target_url): @@ -35,15 +35,16 @@ class DashSegmentsFD(FileDownloader): outf, combine_url(base_url, info_dict['initialization_url']), 'initialization segment') for i, segment_url in enumerate(segment_urls): - append_url_to_file( + segment_len = append_url_to_file( outf, combine_url(base_url, segment_url), 'segment %d / %d' % (i + 1, len(segment_urls))) + byte_counter += segment_len self.try_rename(tmpfilename, filename) self._hook_progress({ - 'downloaded_bytes': self.byte_counter, - 'total_bytes': self.byte_counter, + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, 'filename': filename, 'status': 'finished', }) From 5bf3276e8d6ee7d017c8be04414398752cd9cdf3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 10 Jun 2015 14:45:54 +0800 Subject: [PATCH 8/9] [downloader/dash] Add testing facility --- youtube_dl/downloader/dash.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index cd84e0b07..a4685d307 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -16,12 +16,21 @@ class DashSegmentsFD(FileDownloader): base_url = info_dict['url'] segment_urls = info_dict['segment_urls'] + is_test = self.params.get('test', False) + remaining_bytes = self._TEST_FILE_SIZE if is_test else None byte_counter = 0 - def append_url_to_file(outf, target_url, target_name): + def append_url_to_file(outf, target_url, target_name, remaining_bytes=None): self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) req = compat_urllib_request.Request(target_url) + if remaining_bytes is not None: + req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) + data = self.ydl.urlopen(req).read() + + if remaining_bytes is not None: + data = data[:remaining_bytes] + outf.write(data) return len(data) @@ -37,8 +46,13 @@ class DashSegmentsFD(FileDownloader): for i, segment_url in enumerate(segment_urls): segment_len = append_url_to_file( outf, combine_url(base_url, segment_url), - 'segment %d / %d' % (i + 1, len(segment_urls))) + 'segment %d / %d' % (i + 1, len(segment_urls)), + remaining_bytes) byte_counter += segment_len + if remaining_bytes is not None: + remaining_bytes -= segment_len + if remaining_bytes <= 0: + break self.try_rename(tmpfilename, filename) From 8a1a26ce4c64d7a2c142718fc56f46d9a1c2c4f2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 10 Jun 2015 14:47:02 +0800 Subject: [PATCH 9/9] [youtube] Add a test for the DASH segment downloader --- youtube_dl/extractor/youtube.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a1906eef6..939f5e61f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -516,6 +516,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': 'requires avconv', } }, + # DASH manifest with segment_list + { + 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8', + 'md5': '8ce563a1d667b599d21064e982ab9e31', + 'info_dict': { + 'id': 'CsmdDsKjzN8', + 'ext': 'mp4', + 'upload_date': '20150510', + 'uploader': 'Airtek', + 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.', + 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ', + 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015', + }, + 'params': { + 'youtube_include_dash_manifest': True, + 'format': '135', # bestvideo + } + } ] def __init__(self, *args, **kwargs):