From ec8cc20805d856dd4b6e8e098bf007c12e4ec904 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sat, 9 Dec 2017 00:33:28 -0600 Subject: [PATCH] [youtube] add storyboards meta field with list and write options Storyboards are grids of small images that appear when the user hovers their cursor over a video's timeline. See related issue #9868. Options added: * --list-storyboards * --write-storyboards Co-authored by @benob (See: https://github.com/MarcAbonce/youtube-dl/pull/1) --- youtube_dl/YoutubeDL.py | 58 ++++++++++++++++++++++++--------- youtube_dl/__init__.py | 2 ++ youtube_dl/extractor/common.py | 6 ++++ youtube_dl/extractor/youtube.py | 54 ++++++++++++++++++++++++++++++ youtube_dl/options.py | 8 +++++ 5 files changed, 112 insertions(+), 16 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ecac31f7a..5e5a230de 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -182,6 +182,7 @@ class YoutubeDL(object): writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file write_all_thumbnails: Write all thumbnail formats to files + writestoryboards: Write all storyboards (grid of video frames) to a file writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file allsubtitles: Downloads all the subtitles of the video @@ -278,6 +279,7 @@ class YoutubeDL(object): [sleep_interval; max_sleep_interval]. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. + list_storyboards: Print a table of all storyboards and exit. match_filter: A function that gets called with the info_dict of every video. If it returns a message, the video is ignored. @@ -1502,6 +1504,10 @@ class YoutubeDL(object): self.list_thumbnails(info_dict) return + if self.params.get('list_storyboards'): + self.list_thumbnails(info_dict, item_name='storyboards') + return + thumbnail = info_dict.get('thumbnail') if thumbnail: info_dict['thumbnail'] = sanitize_url(thumbnail) @@ -2245,17 +2251,27 @@ class YoutubeDL(object): '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(header_line, table))) - def list_thumbnails(self, info_dict): - thumbnails = info_dict.get('thumbnails') + def list_thumbnails(self, info_dict, item_name='thumbnails'): + thumbnails = info_dict.get(item_name) if not thumbnails: - self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) + self.to_screen('[info] No %s present for %s' % (item_name, info_dict['id'])) return self.to_screen( - '[info] Thumbnails for %s:' % info_dict['id']) - self.to_screen(render_table( - ['ID', 'width', 'height', 'URL'], - [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) + '[info] %s for %s:' % (item_name.title(), info_dict['id'])) + + columns = ['ID', 'width', 'height'] + if item_name == 'storyboards': + columns += ['cols', 'rows', 'frames'] + columns += ['URL'] + + table = [] + for t in thumbnails: + table.append([]) + for column in columns: + table[-1].append(t.get(column.lower(), 'unknown')) + + self.to_screen(render_table(columns, table)) def list_subtitles(self, video_id, subtitles, name='subtitles'): if not subtitles: @@ -2420,12 +2436,16 @@ class YoutubeDL(object): return encoding def _write_thumbnails(self, info_dict, filename): + item_name = 'thumbnail' if self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') if thumbnails: thumbnails = [thumbnails[-1]] elif self.params.get('write_all_thumbnails', False): thumbnails = info_dict.get('thumbnails') + elif self.params.get('writestoryboards', False): + thumbnails = info_dict.get('storyboards') + item_name = 'storyboard' else: return @@ -2435,22 +2455,28 @@ class YoutubeDL(object): for t in thumbnails: thumb_ext = determine_ext(t['url'], 'jpg') - suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' + if item_name == 'thumbnails': + suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' + else: + suffix = '_%s_%s' % (item_name, t['id']) thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen('[%s] %s: Thumbnail %sis already present' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) + self.to_screen('[%s] %s: %s %sis already present' % + (info_dict['extractor'], info_dict['id'], + item_name.title(), thumb_display_id)) else: - self.to_screen('[%s] %s: Downloading thumbnail %s...' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) + self.to_screen('[%s] %s: Downloading %s %s...' % + (info_dict['extractor'], info_dict['id'], + item_name, thumb_display_id)) try: uf = self.urlopen(t['url']) with open(encodeFilename(thumb_filename), 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) - self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % - (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) + self.to_screen('[%s] %s: Writing %s %sto: %s' % + (info_dict['extractor'], info_dict['id'], + item_name, thumb_display_id, thumb_filename)) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (t['url'], error_to_compat_str(err))) + self.report_warning('Unable to download %s "%s": %s' % + (t['url'], item_name, error_to_compat_str(err))) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e1bd67919..9235d0655 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -372,6 +372,7 @@ def _real_main(argv=None): 'writeinfojson': opts.writeinfojson, 'writethumbnail': opts.writethumbnail, 'write_all_thumbnails': opts.write_all_thumbnails, + 'writestoryboards': opts.writestoryboards, 'writesubtitles': opts.writesubtitles, 'writeautomaticsub': opts.writeautomaticsub, 'allsubtitles': opts.allsubtitles, @@ -419,6 +420,7 @@ def _real_main(argv=None): 'max_sleep_interval': opts.max_sleep_interval, 'external_downloader': opts.external_downloader, 'list_thumbnails': opts.list_thumbnails, + 'list_storyboards': opts.list_storyboards, 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, 'match_filter': match_filter, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8eb110f4e..4c0d3986b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -226,6 +226,12 @@ class InfoExtractor(object): deprecated) * "filesize" (optional, int) thumbnail: Full URL to a video thumbnail image. + storyboards: A list of dictionaries representing storyboards. + A storyboard is an image grid made of frames from the video. + This has the same structure as the thumbnails list, plus: + * "cols" (optional, int) + * "rows" (optional, int) + * "frames" (optional, int) description: Full video description. uploader: Full name of the video uploader. license: License name the video is licensed under. diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 72d9fbbc6..31ff91b14 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -8,6 +8,7 @@ import os.path import random import re import traceback +import math from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -1694,6 +1695,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if thumbnail: thumbnails = [{'url': thumbnail}] + storyboards = [] + sb_spec = try_get(player_response, + lambda x: x['storyboards']['playerStoryboardSpecRenderer']['spec'], + compat_str) + if sb_spec: + s_parts = sb_spec.split('|') + base_url = s_parts[0] + for i, params in enumerate(s_parts[1:]): + storyboard_attrib = params.split('#') + if len(storyboard_attrib) != 8: + self._downloader.report_warning('Unable to extract storyboard') + continue + + frame_width = int_or_none(storyboard_attrib[0]) + frame_height = int_or_none(storyboard_attrib[1]) + total_frames = int_or_none(storyboard_attrib[2]) + cols = int_or_none(storyboard_attrib[3]) + rows = int_or_none(storyboard_attrib[4]) + filename = storyboard_attrib[6] + sigh = storyboard_attrib[7] + + if frame_width and frame_height and cols and rows and total_frames: + frames = cols * rows + width, height = frame_width * cols, frame_height * rows + n_images = int(math.ceil(total_frames / float(cols * rows))) + else: + self._downloader.report_warning('Unable to extract storyboard') + continue + + storyboards_url = base_url.replace('$L', compat_str(i)) + '&' + for j in range(n_images): + url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh + if j == n_images - 1: + remaining_frames = total_frames % (cols * rows) + if remaining_frames != 0: + frames = remaining_frames + rows = int(math.ceil(float(remaining_frames) / rows)) + height = rows * frame_height + if rows == 1: + cols = remaining_frames + width = cols * frame_width + + storyboards.append({ + 'id': 'L{0}-M{1}'.format(i, j), + 'width': width, + 'height': height, + 'cols': cols, + 'rows': rows, + 'frames': frames, + 'url': url + }) + category = microformat.get('category') or search_meta('genre') channel_id = video_details.get('channelId') \ or microformat.get('externalChannelId') \ @@ -1733,6 +1786,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': [category] if category else None, 'tags': keywords, 'is_live': is_live, + 'storyboards': storyboards, } pctr = try_get( diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 241cf110f..c860d40e6 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -781,6 +781,14 @@ def parseOpts(overrideArguments=None): '--list-thumbnails', action='store_true', dest='list_thumbnails', default=False, help='Simulate and list all available thumbnail formats') + thumbnail.add_option( + '--write-storyboards', + action='store_true', dest='writestoryboards', default=False, + help='Write all storyboards (grid of video frames) to disk') + thumbnail.add_option( + '--list-storyboards', + action='store_true', dest='list_storyboards', default=False, + help='Simulate and list all available storyboards') postproc = optparse.OptionGroup(parser, 'Post-processing Options') postproc.add_option(