From 7907e765bf39216ebd60831116f1c6db535a53de Mon Sep 17 00:00:00 2001 From: schn0sch Date: Sun, 27 Dec 2020 21:48:24 +0100 Subject: [PATCH 1/2] [yourporn] added support for posts with multiple videos closes #27554 --- youtube_dl/extractor/yourporn.py | 87 ++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index 98347491e..b552efe2a 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -30,38 +30,71 @@ class YourPornIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + get_duration = lambda webpage: parse_duration(self._search_regex( + r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', + default=None)) - webpage = self._download_webpage(url, video_id) + # Only for posts containing a single video is the post_id equal to + # video_id. If there a multiple videos there also exists a post with the + # video_id and we use this page to fetch the video title. + post_id = self._match_id(url) - parts = self._parse_json( + webpage = self._download_webpage(url, post_id) + + videos = self._parse_json( self._search_regex( r'data-vnfo=(["\'])(?P{.+?})\1', webpage, 'data info', group='data'), - video_id)[video_id].split('/') + post_id) - num = 0 - for c in parts[6] + parts[7]: - if c.isnumeric(): - num += int(c) - parts[5] = compat_str(int(parts[5]) - num) - parts[1] += '8' - video_url = urljoin(url, '/'.join(parts)) + for video_id in videos: + parts = videos[video_id].split('/') + num = 0 + for c in parts[6] + parts[7]: + if c.isnumeric(): + num += int(c) + parts[5] = compat_str(int(parts[5]) - num) + parts[1] += '8' + videos[video_id] = urljoin(url, '/'.join(parts)) - title = (self._search_regex( - r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title', - default=None) or self._og_search_description(webpage)).strip() - thumbnail = self._og_search_thumbnail(webpage) - duration = parse_duration(self._search_regex( - r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', - default=None)) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'age_limit': 18, - 'ext': 'mp4', + # If there ist only one video in the post (as is most likely) the + # video_id is equal to post_id and we save us from re-fetching the page + # to obtain the meta data. + # This may fail but if needed will be obtained in the next step. + titles = { + post_id: self._og_search_description(webpage, default=None) + } + thumbnails = { + post_id: self._og_search_thumbnail(webpage, default=None) + } + durations = { + post_id: get_duration(webpage) } + + # obtain missing metadata for all videos in the post + for video_id in videos: + if not titles.get(video_id) or not thumbnails.get(video_id) or video_id not in durations: + webpage = self._download_webpage('https://sxyprn.com/post/%s.html' % video_id, video_id) + if not titles.get(video_id): + titles[video_id] = self._og_search_description(webpage) + if not thumbnails.get(video_id): + thumbnails[video_id] = self._og_search_thumbnail(webpage) + if video_id not in durations: + durations[video_id] = get_duration(webpage) + + entries = [] + for video_id in videos: + entries.append({ + 'id': video_id, + 'url': videos[video_id], + 'title': titles[video_id], + 'thumbnail': thumbnails[video_id], + 'duration': durations[video_id], + 'age_limit': 18, + 'ext': 'mp4', + }) + + if len(entries) == 1: + return entries[0] + else: + return self.playlist_result(entries, post_id, titles[post_id]) From cbaa856993a59cd5038ce218385ced543e711dba Mon Sep 17 00:00:00 2001 From: schn0sch Date: Sun, 27 Dec 2020 21:27:21 +0100 Subject: [PATCH 2/2] [yourporn] added tests --- youtube_dl/extractor/yourporn.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py index b552efe2a..5b36020c0 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/yourporn.py @@ -24,9 +24,23 @@ class YourPornIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://sxyprn.com/post/5acfc82b12d00.html', + 'md5': '1df93ede16d87685aa069f56ac69b0e7', + 'info_dict': { + 'id': '5acfc82b12d00', + 'ext': 'mp4', + 'title': 'Girls Do Porn E157 The Mormon Girl this should be in HD... #GirlsDoPorn #GDP #BigTits #casting', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2466, + 'age_limit': 18, + }, }, { 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html', 'only_matching': True, + }, { + 'url': 'https://sxyprn.com/post/5af15f6799de9.html', + 'only_matching': True, }] def _real_extract(self, url):