From a66e25859a0f163446c009f9a71abab4083745f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 8 Apr 2017 20:51:38 +0700 Subject: [PATCH] [mixcloud:playlist] Relax title extraction and fix description extraction (closes #12582) --- youtube_dl/extractor/mixcloud.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index ba0bb190f..bc38a8663 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -138,7 +138,7 @@ class MixcloudPlaylistBaseIE(InfoExtractor): def _get_user_description(self, page_content): return self._html_search_regex( - r']+class="description-text"[^>]*>(.+?)', + r']+class="profile-bio"[^>]*>(.+?)', page_content, 'user description', fatal=False) @@ -151,7 +151,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_uploads', 'title': 'Daniel Holbach (uploads)', - 'description': 'md5:327af72d1efeb404a8216c27240d1370', + 'description': 'md5:def36060ac8747b3aabca54924897e47', }, 'playlist_mincount': 11, }, { @@ -159,7 +159,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_uploads', 'title': 'Daniel Holbach (uploads)', - 'description': 'md5:327af72d1efeb404a8216c27240d1370', + 'description': 'md5:def36060ac8747b3aabca54924897e47', }, 'playlist_mincount': 11, }, { @@ -167,7 +167,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_favorites', 'title': 'Daniel Holbach (favorites)', - 'description': 'md5:327af72d1efeb404a8216c27240d1370', + 'description': 'md5:def36060ac8747b3aabca54924897e47', }, 'params': { 'playlist_items': '1-100', @@ -178,7 +178,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_listens', 'title': 'Daniel Holbach (listens)', - 'description': 'md5:327af72d1efeb404a8216c27240d1370', + 'description': 'md5:def36060ac8747b3aabca54924897e47', }, 'params': { 'playlist_items': '1-100', @@ -229,12 +229,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): 'playlist_mincount': 16, }, { 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', - 'info_dict': { - 'id': 'maxvibes_jazzcat-on-ness-radio', - 'title': 'Jazzcat on Ness Radio', - 'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263', - }, - 'playlist_mincount': 23 + 'only_matching': True, }] def _real_extract(self, url): @@ -243,15 +238,16 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): playlist_id = mobj.group('playlist') video_id = '%s_%s' % (user_id, playlist_id) - profile = self._download_webpage( + webpage = self._download_webpage( url, user_id, note='Downloading playlist page', errnote='Unable to download playlist page') - description = self._get_user_description(profile) - playlist_title = self._html_search_regex( - r'\d+([^', - profile, 'playlist title') + title = self._html_search_regex( + r']+class="parent active"[^>]*>\d+]*>([^<]+)', + webpage, 'playlist title', + default=None) or self._og_search_title(webpage, fatal=False) + description = self._get_user_description(webpage) entries = OnDemandPagedList( functools.partial( @@ -259,7 +255,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), self._PAGE_SIZE) - return self.playlist_result(entries, video_id, playlist_title, description) + return self.playlist_result(entries, video_id, title, description) class MixcloudStreamIE(MixcloudPlaylistBaseIE):