From 3687a4f9bdb025855acd2a4e782748c8e3810765 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 4 Jun 2022 08:07:54 +0100 Subject: [PATCH 1/2] [DoodStream] Add extractor from yt-dlp back-port and improve --- youtube_dl/extractor/doodstream.py | 119 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 120 insertions(+) create mode 100644 youtube_dl/extractor/doodstream.py diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py new file mode 100644 index 000000000..70bf4e512 --- /dev/null +++ b/youtube_dl/extractor/doodstream.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import string +import time + +from ..compat import compat_filter as filter +from ..utils import ( + clean_html, + ExtractorError, + get_element_by_class, + parse_duration, + parse_filesize, + update_url_query, + unified_strdate, + url_or_none, +) + +from .common import InfoExtractor + + +class DoodStreamIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P[a-z0-9]+)' + _TESTS = [{ + 'url': 'http://dood.to/e/5s1wmbdacezb', + 'md5': '4568b83b31e13242b3f1ff96c55f0595', + 'info_dict': { + 'id': '5s1wmbdacezb', + 'ext': 'mp4', + 'title': 'Kat Wonders - Monthly May 2020', + 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', + 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', + }, + 'skip': 'Video not found', + }, { + 'url': 'http://dood.watch/d/5s1wmbdacezb', + 'md5': '4568b83b31e13242b3f1ff96c55f0595', + 'info_dict': { + 'id': '5s1wmbdacezb', + 'ext': 'mp4', + 'title': 'Kat Wonders - Monthly May 2020', + 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', + 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', + }, + 'skip': 'Video not found', + }, { + 'url': 'https://dood.to/d/jzrxn12t2s7n', + 'md5': '3207e199426eca7c2aa23c2872e6728a', + 'info_dict': { + 'id': 'jzrxn12t2s7n', + 'ext': 'mp4', + 'title': 'Stacy Cruz Cute ALLWAYSWELL', + 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com', + 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg', + }, + 'skip': 'Video not found', + }, { + 'url': 'https://dood.to/d/is34uy8wvaet', + 'md5': '04740d3ba93bcd638aa7a097d9226710', + 'info_dict': { + 'id': 'is34uy8wvaet', + 'ext': 'mp4', + 'title': 'Akhanda (2021) Telugu DVDScr MP3 700MB - DoodStream', + 'upload_date': '20211202', + 'filesize_approx': int, + 'duration': 9886, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + url = 'https://dood.to/e/' + video_id + headers = { + 'User-Agent': 'Mozilla/5.0', # (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0', + } + webpage = self._download_webpage(url, video_id, headers=headers) + + title = self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None) + if not title: + title = self._html_search_regex(r']*>([^<]+?)(?:\|\s+DoodStream\s*)? Date: Sat, 4 Jun 2022 09:26:41 +0100 Subject: [PATCH 2/2] [DoodStream] Support more TLDs * also fix title extraction --- youtube_dl/extractor/doodstream.py | 35 +++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py index 70bf4e512..94411ebb0 100644 --- a/youtube_dl/extractor/doodstream.py +++ b/youtube_dl/extractor/doodstream.py @@ -21,7 +21,7 @@ from .common import InfoExtractor class DoodStreamIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P[a-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|la|pm|sh|ws|one)/[ed]/(?P[a-z0-9]+)' _TESTS = [{ 'url': 'http://dood.to/e/5s1wmbdacezb', 'md5': '4568b83b31e13242b3f1ff96c55f0595', @@ -61,11 +61,24 @@ class DoodStreamIE(InfoExtractor): 'info_dict': { 'id': 'is34uy8wvaet', 'ext': 'mp4', - 'title': 'Akhanda (2021) Telugu DVDScr MP3 700MB - DoodStream', + 'title': 'Akhanda (2021) Telugu DVDScr MP3 700MB', 'upload_date': '20211202', + 'thumbnail': r're:https?://img\.doodcdn\.com?/[\w/]+\.jpg', 'filesize_approx': int, 'duration': 9886, } + }, { + 'url': 'https://dood.so/d/wlihoael8uog', + 'md5': '2c14444c89788cc309738c1560abe278', + 'info_dict': { + 'id': 'wlihoael8uog', + 'ext': 'mp4', + 'title': 'VID 20220319 161659', + 'thumbnail': r're:https?://img\.doodcdn\.com?/splash/rmpnhb8ckkk79cge\.jpg', + 'upload_date': '20220319', + 'filesize_approx': int, + 'duration': 12.0, + } }] def _real_extract(self, url): @@ -76,11 +89,12 @@ class DoodStreamIE(InfoExtractor): } webpage = self._download_webpage(url, video_id, headers=headers) - title = self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None) - if not title: - title = self._html_search_regex(r']*>([^<]+?)(?:\|\s+DoodStream\s*)?]*>([^<]+?)(?:[|-]\s+DoodStream\s*)?