From 6e109f8ad49c6fb8aebfa969e3821f01b05e6f22 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:01:18 +0100 Subject: [PATCH 01/11] Scrape uploader --- youtube_dl/extractor/xnxx.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index ac1ccc404..d227104b2 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -25,6 +25,9 @@ class XNXXIE(InfoExtractor): 'duration': 469, 'view_count': int, 'age_limit': 18, + 'uploader': 'Glurp', + 'uploader_id': 'Glurp', + 'uploader_url': '/porn-maker/glurp', }, }, { 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', @@ -73,6 +76,13 @@ class XNXXIE(InfoExtractor): r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count', default=None)) + uploader_data = re.findall(r'(?P.+?)', webpage) + uploader_id = '' + uploader_url = '' + if uploader_data is not None: + uploader_id = uploader_data[0][1] + uploader_url = uploader_data[0][0] + return { 'id': video_id, 'title': title, @@ -81,4 +91,7 @@ class XNXXIE(InfoExtractor): 'view_count': view_count, 'age_limit': 18, 'formats': formats, + 'uploader': uploader_id, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, } From b6f851ba4d3f244f7657741de42a13dd074352f4 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:13:05 +0100 Subject: [PATCH 02/11] Fix view_count regex --- youtube_dl/extractor/xnxx.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index d227104b2..d44b5ab7d 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -16,7 +16,7 @@ class XNXXIE(InfoExtractor): _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P[0-9a-z]+)/' _TESTS = [{ 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', - 'md5': '7583e96c15c0f21e9da3453d9920fbba', + 'md5': '73c071a361a09aae7e7d60008221fd13', 'info_dict': { 'id': '55awb78', 'ext': 'mp4', @@ -73,8 +73,8 @@ class XNXXIE(InfoExtractor): 'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False) duration = int_or_none(self._og_search_property('duration', webpage)) view_count = str_to_int(self._search_regex( - r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count', - default=None)) + r'-.+?\t+- (?P.+?) ', webpage, 'view count', group='views', + default=0)) uploader_data = re.findall(r'(?P.+?)', webpage) uploader_id = '' From 441bafa2d1c8b6484631efc2214a6522185f9c92 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:19:18 +0100 Subject: [PATCH 03/11] Scrape tags modified: youtube_dl/extractor/xnxx.py --- youtube_dl/extractor/xnxx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index d44b5ab7d..8e119411d 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -25,6 +25,7 @@ class XNXXIE(InfoExtractor): 'duration': 469, 'view_count': int, 'age_limit': 18, + 'tags': ['video game', 'skyrim', '3d', 'game', '3d game', 'video games', 'rule34', 'test', 'rough', 'sfm', 'fallout', 'porno game', 'skyrim hentai', 'h game', '3d horse', '3d porno anime', 'xx video wwxxx cartoon cartoons', 'gaming', 'games', '3d porno desenho'], 'uploader': 'Glurp', 'uploader_id': 'Glurp', 'uploader_url': '/porn-maker/glurp', @@ -76,6 +77,8 @@ class XNXXIE(InfoExtractor): r'-.+?\t+- (?P.+?) ', webpage, 'view count', group='views', default=0)) + tags = self._search_regex(r'(?P.+?)', webpage) uploader_id = '' uploader_url = '' @@ -91,6 +94,7 @@ class XNXXIE(InfoExtractor): 'view_count': view_count, 'age_limit': 18, 'formats': formats, + 'tags': tags, 'uploader': uploader_id, 'uploader_id': uploader_id, 'uploader_url': uploader_url, From 072873437c0792aa4d88e6b6b026ca237fdf9338 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:22:33 +0100 Subject: [PATCH 04/11] Fix uploader regex --- youtube_dl/extractor/xnxx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 8e119411d..cd7d2cf49 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -79,7 +79,7 @@ class XNXXIE(InfoExtractor): tags = self._search_regex(r'(?P.+?)', webpage) + uploader_data = re.findall(r'(?P.+?)', webpage) uploader_id = '' uploader_url = '' if uploader_data is not None: From 9823824d7cec9e11ac51ba1794d36a1c6d1ebdd1 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:39:06 +0100 Subject: [PATCH 05/11] Scrape actors --- youtube_dl/extractor/xnxx.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index cd7d2cf49..ab9492809 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -9,6 +9,7 @@ from ..utils import ( int_or_none, NO_DEFAULT, str_to_int, + urljoin, ) @@ -30,6 +31,23 @@ class XNXXIE(InfoExtractor): 'uploader_id': 'Glurp', 'uploader_url': '/porn-maker/glurp', }, + }, { + 'url': 'https://www.xnxx.com/video-h46klf8/babes_-_come_back_to_bed_starring_abella_danger_and_darcie_dolce_clip', + 'md5': 'b8b4a594b4091de46ce05d0a9d45317c', + 'info_dict': { + 'id': 'h46klf8', + 'ext': 'mp4', + 'title': 'Babes - Come Back to Bed starring Abella Danger and Darcie Dolce clip', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 480, + 'view_count': int, + 'age_limit': 18, + 'tags': ['lesbicas', 'darcie dolce', 'darcie dolce lesbian', 'abella danger lesbian', 'babes', 'lesb', 'abella danger', 'mmf', 'come', 'darcie', 'lesbianas', 'mulher chupando peito da outra', 'abella', 'girl on girl', 'darcie dolce abella danger', 'chupando peitos', 'para', 'lesbian', 'chupando peitos lesbicas', 'black lesbians'], + 'uploader': 'Babes Network', + 'uploader_id': 'Babes Network', + 'uploader_url': '/porn-maker/babes3', + 'actors': [{'given_name': 'darcie dolce', 'url': 'https://www.xnxx.com/search/darcie%20dolce'}, {'given_name': 'darcie dolce lesbian', 'url': 'https://www.xnxx.com/search/darcie%20dolce%20lesbian'}, {'given_name': 'abella danger lesbian', 'url': 'https://www.xnxx.com/search/abella%20danger%20lesbian'}, {'given_name': 'abella danger', 'url': 'https://www.xnxx.com/search/abella%20danger'}, {'given_name': 'darcie dolce abella danger', 'url': 'https://www.xnxx.com/search/darcie%20dolce%20abella%20danger'}], + } }, { 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', 'only_matching': True, @@ -86,6 +104,15 @@ class XNXXIE(InfoExtractor): uploader_id = uploader_data[0][1] uploader_url = uploader_data[0][0] + actors_data = re.findall(r'(?P.+?)', webpage) + actors = [] + if actors_data is not None: + for actor_tuple in actors_data: + actors.append({ + 'given_name': actor_tuple[1], + 'url': urljoin(url, actor_tuple[0]), + }) + return { 'id': video_id, 'title': title, @@ -98,4 +125,5 @@ class XNXXIE(InfoExtractor): 'uploader': uploader_id, 'uploader_id': uploader_id, 'uploader_url': uploader_url, + 'actors': actors, } From 4d93d7072ddb91b1b1c2e1bd0386bfc2a3d182f0 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:48:10 +0100 Subject: [PATCH 06/11] Fix checks on empty data --- youtube_dl/extractor/xnxx.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index ab9492809..b28006b45 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + float_or_none, int_or_none, NO_DEFAULT, str_to_int, @@ -100,19 +101,19 @@ class XNXXIE(InfoExtractor): uploader_data = re.findall(r'(?P.+?)', webpage) uploader_id = '' uploader_url = '' - if uploader_data is not None: + if uploader_data!=[]: uploader_id = uploader_data[0][1] uploader_url = uploader_data[0][0] actors_data = re.findall(r'(?P.+?)', webpage) actors = [] - if actors_data is not None: + if actors_data != []: for actor_tuple in actors_data: actors.append({ 'given_name': actor_tuple[1], 'url': urljoin(url, actor_tuple[0]), }) - + return { 'id': video_id, 'title': title, From 0e05a7d5d8a89b38cfa52c9100ba9a671ee7b7a1 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:49:42 +0100 Subject: [PATCH 07/11] Scrape average_rating --- youtube_dl/extractor/xnxx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index b28006b45..ec22908d5 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -48,6 +48,7 @@ class XNXXIE(InfoExtractor): 'uploader_id': 'Babes Network', 'uploader_url': '/porn-maker/babes3', 'actors': [{'given_name': 'darcie dolce', 'url': 'https://www.xnxx.com/search/darcie%20dolce'}, {'given_name': 'darcie dolce lesbian', 'url': 'https://www.xnxx.com/search/darcie%20dolce%20lesbian'}, {'given_name': 'abella danger lesbian', 'url': 'https://www.xnxx.com/search/abella%20danger%20lesbian'}, {'given_name': 'abella danger', 'url': 'https://www.xnxx.com/search/abella%20danger'}, {'given_name': 'darcie dolce abella danger', 'url': 'https://www.xnxx.com/search/darcie%20dolce%20abella%20danger'}], + 'average_rating': float, } }, { 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', @@ -114,6 +115,8 @@ class XNXXIE(InfoExtractor): 'url': urljoin(url, actor_tuple[0]), }) + rating = float_or_none(self._search_regex(r'(?P.+?)%', webpage, 'rating', group='rating')) + return { 'id': video_id, 'title': title, @@ -127,4 +130,5 @@ class XNXXIE(InfoExtractor): 'uploader_id': uploader_id, 'uploader_url': uploader_url, 'actors': actors, + 'average_rating': rating, } From 068cd1b81c98554c21c74d00c9d1e7f5ad04e58b Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:53:11 +0100 Subject: [PATCH 08/11] Scrape creator --- youtube_dl/extractor/xnxx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index ec22908d5..670b2dfbe 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -126,6 +126,7 @@ class XNXXIE(InfoExtractor): 'age_limit': 18, 'formats': formats, 'tags': tags, + 'creator': uploader_id, 'uploader': uploader_id, 'uploader_id': uploader_id, 'uploader_url': uploader_url, From d9c98f01d56e5d63deef77ed0e5c02b2584e28c6 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:57:39 +0100 Subject: [PATCH 09/11] Fix flake8 issues --- youtube_dl/extractor/xnxx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 670b2dfbe..41fcdc974 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -102,7 +102,7 @@ class XNXXIE(InfoExtractor): uploader_data = re.findall(r'(?P.+?)', webpage) uploader_id = '' uploader_url = '' - if uploader_data!=[]: + if uploader_data != []: uploader_id = uploader_data[0][1] uploader_url = uploader_data[0][0] @@ -114,7 +114,7 @@ class XNXXIE(InfoExtractor): 'given_name': actor_tuple[1], 'url': urljoin(url, actor_tuple[0]), }) - + rating = float_or_none(self._search_regex(r'(?P.+?)%', webpage, 'rating', group='rating')) return { From fa32787f2bb14f75b1213b6a87f95c4b7091b5b7 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sun, 27 Feb 2022 01:32:20 +0100 Subject: [PATCH 10/11] Apply refactoring suggested by @rautamiekka --- youtube_dl/extractor/xnxx.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 41fcdc974..5468c954f 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -103,8 +103,7 @@ class XNXXIE(InfoExtractor): uploader_id = '' uploader_url = '' if uploader_data != []: - uploader_id = uploader_data[0][1] - uploader_url = uploader_data[0][0] + uploader_url, uploader_id = uploader_data[0][0:2] actors_data = re.findall(r'(?P.+?)', webpage) actors = [] From b804854e3bd4c663dfa6c264968a663d739a64d7 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sun, 27 Feb 2022 01:40:37 +0100 Subject: [PATCH 11/11] Set uploader default values to None --- youtube_dl/extractor/xnxx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 5468c954f..808733ed1 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -100,8 +100,8 @@ class XNXXIE(InfoExtractor): tags = self._search_regex(r'(?P.+?)', webpage) - uploader_id = '' - uploader_url = '' + uploader_id = None + uploader_url = None if uploader_data != []: uploader_url, uploader_id = uploader_data[0][0:2]