From 80abd212b418313abc5c5151eabd8387966f4dc8 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 14:25:01 +0100 Subject: [PATCH 01/13] Scrape tags --- youtube_dl/extractor/xvideos.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index e63d4690d..5036b3ba9 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -25,14 +25,15 @@ class XVideosIE(InfoExtractor): (?P[0-9]+) ''' _TESTS = [{ - 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', - 'md5': '14cea69fcb84db54293b1e971466c2e1', + 'url': 'https://www.xvideos.com/video23686408/le_ragazze_universitarie_provano_cose_nuove', + 'md5': '69638dcd63e14f587fb5b6efc932ccc3', 'info_dict': { - 'id': '4588838', + 'id': '23686408', 'ext': 'mp4', - 'title': 'Biker Takes his Girl', - 'duration': 108, + 'title': 'College Girls try new things', + 'duration': 277, 'age_limit': 18, + 'tags': ['teen', 'amateur', 'college', 'dorm', '18', 'sorority'], } }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', @@ -137,6 +138,8 @@ class XVideosIE(InfoExtractor): self._sort_formats(formats) + tags = self._search_regex(r' Date: Sat, 26 Feb 2022 14:54:42 +0100 Subject: [PATCH 02/13] Scrape creator --- youtube_dl/extractor/xvideos.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 5036b3ba9..214cbb97c 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -10,6 +10,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_duration, + urljoin, ) @@ -25,15 +26,16 @@ class XVideosIE(InfoExtractor): (?P[0-9]+) ''' _TESTS = [{ - 'url': 'https://www.xvideos.com/video23686408/le_ragazze_universitarie_provano_cose_nuove', - 'md5': '69638dcd63e14f587fb5b6efc932ccc3', + 'url': 'https://www.xvideos.com/video50011247/when_girls_play_-_adriana_chechik_abella_danger_-_tradimento_-_twistys', + 'md5': 'aa54f96311768b3a8bfe54b8c8fda070', 'info_dict': { - 'id': '23686408', + 'id': '50011247', 'ext': 'mp4', - 'title': 'College Girls try new things', - 'duration': 277, + 'title': 'When Girls Play - (Adriana Chechik, Abella Danger) - Betrayal - Twistys', + 'duration': 720, 'age_limit': 18, - 'tags': ['teen', 'amateur', 'college', 'dorm', '18', 'sorority'], + 'tags': ['lesbian', 'teen', 'hardcore', 'latina', 'rough', 'squirt', 'big-ass', 'cheater', 'twistys', 'cheat', 'ass-play', 'when-girls-play'], + 'creator': 'Twistys', } }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', @@ -140,6 +142,8 @@ class XVideosIE(InfoExtractor): tags = self._search_regex(r'(?P.+?)<', webpage, 'creator', group='creator') + return { 'id': video_id, 'formats': formats, @@ -148,4 +152,5 @@ class XVideosIE(InfoExtractor): 'thumbnails': thumbnails, 'age_limit': 18, 'tags': tags, + 'creator': creator, } From 16a2888b1a561c3d0eb17b49054df2c3e4c0e8ba Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 14:55:53 +0100 Subject: [PATCH 03/13] Scrape actors modified: youtube_dl/extractor/xvideos.py --- youtube_dl/extractor/xvideos.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 214cbb97c..ed7c216ac 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -36,6 +36,7 @@ class XVideosIE(InfoExtractor): 'age_limit': 18, 'tags': ['lesbian', 'teen', 'hardcore', 'latina', 'rough', 'squirt', 'big-ass', 'cheater', 'twistys', 'cheat', 'ass-play', 'when-girls-play'], 'creator': 'Twistys', + 'actors': [{'given_name': 'Adriana Chechik', 'url': 'https://www.xvideos.com/pornstars/adriana-chechik'}, {'given_name': 'Abella Danger', 'url': 'https://www.xvideos.com/pornstars/abella-danger'}], } }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', @@ -144,6 +145,14 @@ class XVideosIE(InfoExtractor): creator = self._search_regex(r'(?P.+?)<', webpage, 'creator', group='creator') + actors_data = re.findall(r'href="(?P/pornstars/.+?)" class="btn btn-default label profile hover-name">(?P.+?)', webpage) + actors = [] + for actor_tuple in actors_data: + actors.append({ + 'given_name': actor_tuple[1], + 'url': urljoin(url, actor_tuple[0]), + }) + return { 'id': video_id, 'formats': formats, @@ -153,4 +162,5 @@ class XVideosIE(InfoExtractor): 'age_limit': 18, 'tags': tags, 'creator': creator, + 'actors': actors, } From a4ca7681698954688cfc44a7f2c991bb39523cec Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:06:16 +0100 Subject: [PATCH 04/13] Scrape views --- youtube_dl/extractor/xvideos.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index ed7c216ac..8f6af255a 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -10,6 +10,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_duration, + str_to_int, urljoin, ) @@ -37,6 +38,7 @@ class XVideosIE(InfoExtractor): 'tags': ['lesbian', 'teen', 'hardcore', 'latina', 'rough', 'squirt', 'big-ass', 'cheater', 'twistys', 'cheat', 'ass-play', 'when-girls-play'], 'creator': 'Twistys', 'actors': [{'given_name': 'Adriana Chechik', 'url': 'https://www.xvideos.com/pornstars/adriana-chechik'}, {'given_name': 'Abella Danger', 'url': 'https://www.xvideos.com/pornstars/abella-danger'}], + 'views': int, } }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', @@ -153,6 +155,8 @@ class XVideosIE(InfoExtractor): 'url': urljoin(url, actor_tuple[0]), }) + views = self._search_regex(r'(?P.+?)<', webpage, 'views', group='views') + return { 'id': video_id, 'formats': formats, @@ -163,4 +167,5 @@ class XVideosIE(InfoExtractor): 'tags': tags, 'creator': creator, 'actors': actors, + 'views': str_to_int(views), } From 81787fd733b194415421eb2a7519bb0b3316d5dd Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:16:41 +0100 Subject: [PATCH 05/13] Scrape uploader url --- youtube_dl/extractor/xvideos.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 8f6af255a..36eed1f42 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -37,6 +37,9 @@ class XVideosIE(InfoExtractor): 'age_limit': 18, 'tags': ['lesbian', 'teen', 'hardcore', 'latina', 'rough', 'squirt', 'big-ass', 'cheater', 'twistys', 'cheat', 'ass-play', 'when-girls-play'], 'creator': 'Twistys', + 'uploader': 'Twistys', + 'uploader_id': 'Twistys', + 'uploader_url': '/channels/twistys1', 'actors': [{'given_name': 'Adriana Chechik', 'url': 'https://www.xvideos.com/pornstars/adriana-chechik'}, {'given_name': 'Abella Danger', 'url': 'https://www.xvideos.com/pornstars/abella-danger'}], 'views': int, } @@ -145,7 +148,9 @@ class XVideosIE(InfoExtractor): tags = self._search_regex(r'(?P.+?)<', webpage, 'creator', group='creator') + creator_data = re.findall(r'(?P.+?)<', webpage) + creator = creator_data[0][1] + uploader_url = creator_data[0][0] actors_data = re.findall(r'href="(?P/pornstars/.+?)" class="btn btn-default label profile hover-name">(?P.+?)', webpage) actors = [] @@ -166,6 +171,9 @@ class XVideosIE(InfoExtractor): 'age_limit': 18, 'tags': tags, 'creator': creator, + 'uploader': creator, + 'uploader_id': creator, + 'uploader_url': uploader_url, 'actors': actors, 'views': str_to_int(views), } From fd7aaa6ae990e8cb06b6e77dab00e440609860df Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:28:09 +0100 Subject: [PATCH 06/13] Rename views to view_count --- youtube_dl/extractor/xvideos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 36eed1f42..bdd799619 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -41,7 +41,7 @@ class XVideosIE(InfoExtractor): 'uploader_id': 'Twistys', 'uploader_url': '/channels/twistys1', 'actors': [{'given_name': 'Adriana Chechik', 'url': 'https://www.xvideos.com/pornstars/adriana-chechik'}, {'given_name': 'Abella Danger', 'url': 'https://www.xvideos.com/pornstars/abella-danger'}], - 'views': int, + 'view_count': int, } }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', @@ -175,5 +175,5 @@ class XVideosIE(InfoExtractor): 'uploader_id': creator, 'uploader_url': uploader_url, 'actors': actors, - 'views': str_to_int(views), + 'view_count': str_to_int(views), } From 48e5ff4bb1122fc9ce2edf8d47f408efe33a6ae4 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:45:23 +0100 Subject: [PATCH 07/13] Add default fallback values --- youtube_dl/extractor/xvideos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index bdd799619..f7820e196 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -146,7 +146,7 @@ class XVideosIE(InfoExtractor): self._sort_formats(formats) - tags = self._search_regex(r'(?P.+?)<', webpage) creator = creator_data[0][1] @@ -160,7 +160,7 @@ class XVideosIE(InfoExtractor): 'url': urljoin(url, actor_tuple[0]), }) - views = self._search_regex(r'(?P.+?)<', webpage, 'views', group='views') + views = self._search_regex(r'(?P.+?)<', webpage, 'views', group='views', default=0) return { 'id': video_id, From 2606c3108e241e3b3a6019a93372e545b9dd140d Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:50:14 +0100 Subject: [PATCH 08/13] Check whether actors_data has been found --- youtube_dl/extractor/xvideos.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index f7820e196..e15b50870 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -154,11 +154,12 @@ class XVideosIE(InfoExtractor): actors_data = re.findall(r'href="(?P/pornstars/.+?)" class="btn btn-default label profile hover-name">(?P.+?)', webpage) actors = [] - for actor_tuple in actors_data: - actors.append({ - 'given_name': actor_tuple[1], - 'url': urljoin(url, actor_tuple[0]), - }) + if actors_data is not None: + for actor_tuple in actors_data: + actors.append({ + 'given_name': actor_tuple[1], + 'url': urljoin(url, actor_tuple[0]), + }) views = self._search_regex(r'(?P.+?)<', webpage, 'views', group='views', default=0) From 3aba2a79124b0db9fef959ad2d0fba6de6fc3b92 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:51:58 +0100 Subject: [PATCH 09/13] Add default fallback values for creator_data --- youtube_dl/extractor/xvideos.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index e15b50870..5102fdffb 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -149,8 +149,11 @@ class XVideosIE(InfoExtractor): tags = self._search_regex(r'(?P.+?)<', webpage) - creator = creator_data[0][1] - uploader_url = creator_data[0][0] + creator = '' + uploader_url = '' + if creator_data is not None: + creator = creator_data[0][1] + uploader_url = creator_data[0][0] actors_data = re.findall(r'href="(?P/pornstars/.+?)" class="btn btn-default label profile hover-name">(?P.+?)', webpage) actors = [] From e048a6184bacd9a87d56b974cf961c093fec24c1 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:50:40 +0100 Subject: [PATCH 10/13] Fix checks on empty data --- youtube_dl/extractor/xvideos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 5102fdffb..42c161bcc 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -151,13 +151,13 @@ class XVideosIE(InfoExtractor): creator_data = re.findall(r'(?P.+?)<', webpage) creator = '' uploader_url = '' - if creator_data is not None: + if creator_data != []: creator = creator_data[0][1] uploader_url = creator_data[0][0] actors_data = re.findall(r'href="(?P/pornstars/.+?)" class="btn btn-default label profile hover-name">(?P.+?)', webpage) actors = [] - if actors_data is not None: + if actors_data != []: for actor_tuple in actors_data: actors.append({ 'given_name': actor_tuple[1], From 109b85c534a70e13f185779534cd1fccc4bc36e9 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:50:40 +0100 Subject: [PATCH 11/13] Fix checks on empty data modified: youtube_dl/extractor/xvideos.py --- youtube_dl/extractor/xvideos.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 5102fdffb..410496743 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -151,20 +151,20 @@ class XVideosIE(InfoExtractor): creator_data = re.findall(r'(?P.+?)<', webpage) creator = '' uploader_url = '' - if creator_data is not None: + if creator_data != []: creator = creator_data[0][1] uploader_url = creator_data[0][0] actors_data = re.findall(r'href="(?P/pornstars/.+?)" class="btn btn-default label profile hover-name">(?P.+?)', webpage) actors = [] - if actors_data is not None: + if actors_data != []: for actor_tuple in actors_data: actors.append({ 'given_name': actor_tuple[1], 'url': urljoin(url, actor_tuple[0]), }) - views = self._search_regex(r'(?P.+?)<', webpage, 'views', group='views', default=0) + views = self._search_regex(r'(?P.+?)<', webpage, 'views', group='views', default=None) return { 'id': video_id, From f124fd7f689ca58a234188ed3c0efe08fbb0d853 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sun, 27 Feb 2022 01:30:18 +0100 Subject: [PATCH 12/13] Apply refactoring suggested by @rautamiekka --- youtube_dl/extractor/xvideos.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 410496743..e392698ac 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -152,8 +152,7 @@ class XVideosIE(InfoExtractor): creator = '' uploader_url = '' if creator_data != []: - creator = creator_data[0][1] - uploader_url = creator_data[0][0] + uploader_url, creator = creator_data[0][0:2] actors_data = re.findall(r'href="(?P/pornstars/.+?)" class="btn btn-default label profile hover-name">(?P.+?)', webpage) actors = [] From f2a095884c293ebffd2363dc5b24c644bf6f4837 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sun, 27 Feb 2022 01:41:22 +0100 Subject: [PATCH 13/13] Set uploader default values to None --- youtube_dl/extractor/xvideos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index e392698ac..09d5bf877 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -149,8 +149,8 @@ class XVideosIE(InfoExtractor): tags = self._search_regex(r'(?P.+?)<', webpage) - creator = '' - uploader_url = '' + creator = None + uploader_url = None if creator_data != []: uploader_url, creator = creator_data[0][0:2]