From a363fb5d28da7c1b651e6de98b9e799544a4df73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Feb 2021 04:03:54 +0700 Subject: [PATCH] [yandexmusic:playlist] Request missing tracks in chunks (closes #27355, closes #28184) --- youtube_dl/extractor/yandexmusic.py | 35 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 7893f363e..84969f8e1 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -1,8 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import hashlib +import itertools +import re from .common import InfoExtractor from ..compat import compat_str @@ -209,17 +210,27 @@ class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): missing_track_ids = [ track_id for track_id in track_ids if track_id not in present_track_ids] - missing_tracks = self._call_api( - 'track-entries', tld, url, item_id, - 'Downloading missing tracks JSON', { - 'entries': ','.join(missing_track_ids), - 'lang': tld, - 'external-domain': 'music.yandex.%s' % tld, - 'overembed': 'false', - 'strict': 'true', - }) - if missing_tracks: - tracks.extend(missing_tracks) + # Request missing tracks in chunks to avoid exceeding max HTTP header size, + # see https://github.com/ytdl-org/youtube-dl/issues/27355 + _TRACKS_PER_CHUNK = 250 + for chunk_num in itertools.count(0): + start = chunk_num * _TRACKS_PER_CHUNK + end = start + _TRACKS_PER_CHUNK + missing_track_ids_req = missing_track_ids[start:end] + assert missing_track_ids_req + missing_tracks = self._call_api( + 'track-entries', tld, url, item_id, + 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), { + 'entries': ','.join(missing_track_ids_req), + 'lang': tld, + 'external-domain': 'music.yandex.%s' % tld, + 'overembed': 'false', + 'strict': 'true', + }) + if missing_tracks: + tracks.extend(missing_tracks) + if end >= len(missing_track_ids): + break return tracks