From cc6dec72b142ee5210ad7363e799740ffe71a44d Mon Sep 17 00:00:00 2001 From: "Du, Song" Date: Fri, 4 Jun 2021 18:16:27 -0400 Subject: [PATCH 1/3] add bilibili subtitle download --- youtube_dl/extractor/bilibili.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index bff6ea194..6f88ce678 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -112,6 +112,14 @@ class BiliBiliIE(InfoExtractor): _APP_KEY = 'iVGUTjsxvpLeuDCf' _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' + def convert_timestamp_format(self, cc_timestamp): + s, mm = int(cc_timestamp), 0 + if "." in str(cc_timestamp): + s, mm = map(int, str(cc_timestamp).split(".")) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + return "%02d:%02d:%02d,%03d" % (h, m, s, mm) + def _report_error(self, result): if 'message' in result: raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) @@ -155,6 +163,21 @@ class BiliBiliIE(InfoExtractor): self._report_error(js) cid = js['result']['cid'] + subtitles = {} + for sub in re.findall(r'"lan":\s*"([^"]+)"[^}]+"subtitle_url":\s*"([^"]+)"', webpage, re.DOTALL): + subtitles[sub[0]] = [{'url': sub[1]}] + j = self._download_json(sub[1].replace("\\u002F", "/"), video_id, note='Downloading raw subtitle: %s' % (sub[0])) + srt = [] + for idx, line in enumerate(j["body"]): + from_timestamp = self.convert_timestamp_format(line["from"]) + to_timestamp = self.convert_timestamp_format(line["to"]) + srt_tmp = ( + """%d\n%s --> %s\n%s\n""" % (idx, from_timestamp, to_timestamp, line["content"]) + ) + srt.append(srt_tmp) + subtitles[sub[0]][0]["data"] = "\n".join(srt) + subtitles[sub[0]][0]["ext"] = "srt" + headers = { 'Accept': 'application/json', 'Referer': url @@ -204,6 +227,7 @@ class BiliBiliIE(InfoExtractor): 'id': '%s_part%s' % (video_id, idx), 'duration': float_or_none(durl.get('length'), 1000), 'formats': formats, + 'subtitles': subtitles }) break From 3dc46ff6977ef72a01496720209702cfed5b3191 Mon Sep 17 00:00:00 2001 From: "Du, Song" Date: Sat, 5 Jun 2021 00:28:17 -0400 Subject: [PATCH 2/3] [bilibili] fix srt conversion when timestamp is long float --- youtube_dl/extractor/bilibili.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 6f88ce678..a4c59f985 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -113,12 +113,10 @@ class BiliBiliIE(InfoExtractor): _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' def convert_timestamp_format(self, cc_timestamp): - s, mm = int(cc_timestamp), 0 - if "." in str(cc_timestamp): - s, mm = map(int, str(cc_timestamp).split(".")) - m, s = divmod(s, 60) + ss, mm = "{:.3f}".format(float(cc_timestamp)).split(".") + m, s = divmod(int(ss), 60) h, m = divmod(m, 60) - return "%02d:%02d:%02d,%03d" % (h, m, s, mm) + return "%02d:%02d:%02d,%s" % (h, m, s, mm) def _report_error(self, result): if 'message' in result: From 7a3c8386578511a4d20ad5822cd86178f0819872 Mon Sep 17 00:00:00 2001 From: "Du, Song" Date: Mon, 30 Aug 2021 22:17:42 -0400 Subject: [PATCH 3/3] BiliBili: add test for sub, fix previous test url --- youtube_dl/extractor/bilibili.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index a4c59f985..f44d19a80 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -38,7 +38,21 @@ class BiliBiliIE(InfoExtractor): ''' _TESTS = [{ - 'url': 'http://www.bilibili.tv/video/av1074402/', + 'url': 'https://www.bilibili.com/video/BV1N7411A7WU/', + 'info_dict': { + 'id': '1N7411A7WU', + 'ext': 'flv', + 'title': '[经验分享]观看B站视频时如何开启CC字幕?(B站小白必看)', + 'description': 'md5:b5b03414a659c29c7f5bce58c725b8ee', + 'duration': 119.267, + 'timestamp': 1582321654, + 'upload_date': '20200221', + 'uploader': 'Lynda教程和字幕', + 'uploader_id': '389168097', + 'subtitles': {'zh-CN': [{'url': 'http:\\u002F\\u002Fi0.hdslb.com\\u002Fbfs\\u002Fsubtitle\\u002F3c648ce057eea5b4e619c5dd9f0f23fbcad728f5.json', 'data': 'md5:ece63ef7a3a45fe2f7183f95f96d3181', 'ext': 'srt'}]} + }, + }, { + 'url': 'http://www.bilibili.com/video/av1074402/', 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', 'info_dict': { 'id': '1074402',