From 6b54fcef206d754154f62e5fdacfcea056d1d9dc Mon Sep 17 00:00:00 2001 From: mbunse <5903796+mbunse@users.noreply.github.com> Date: Sat, 21 Jan 2023 01:10:22 +0100 Subject: [PATCH 1/8] [VideoCdn] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/videocdn.py | 72 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/videocdn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 947cbe8fd..e0ddf9d99 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -626,6 +626,7 @@ from .livestream import ( ) from .lnkgo import LnkGoIE from .localnews8 import LocalNews8IE +from .videocdn import VideoCdnIE from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE from .lynda import ( diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py new file mode 100644 index 000000000..7dd84d063 --- /dev/null +++ b/youtube_dl/extractor/videocdn.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) + +from ..utils import determine_ext + + +class VideoCdnIE(InfoExtractor): + _VALID_URL = r'https?://e\.video-cdn\.net/video?.*video-id=(?P[a-zA-Z0-9-_]+).*' + _TESTS = [ + { + 'url': 'https://e.video-cdn.net/video?video-id=8eBUrWaMJFS38A5X-j2CgY&player-id=53Tun3ZZpZpVuvaTvsm3jU', + 'info_dict': { + 'id': '8eBUrWaMJFS38A5X-j2CgY', + 'ext': 'mp4', + 'title': 'RiskBuster FireFighter VI - Adventskranz', + 'thumbnail': r're:(?i)https://.*\.jpeg', + }, + }, + { + 'url': 'https://e.video-cdn.net/video?video-id=91imQ_wKjkTFghe-3mmBAA&player-id=7nCLZ_ESM8rT9YUw6qUGA9', + 'info_dict': { + 'id': '91imQ_wKjkTFghe-3mmBAA', + 'ext': 'mp4', + 'title': 'SCC2019_Talk_Tychsen_TXL.mp4', + 'thumbnail': r're:(?i)https://.*\.jpeg', + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + formats = [] + + video_id = self._search_regex( + r'(?ims)]+mi24-video-player+.*video-id=[\"\'](?P[A-Za-z0-9_-]+)', + webpage, 'video id', group='videoid') + + thumbnail = self._search_regex( + r'\"thumbnailUrl\":\"(?P[^\"]+)', + webpage, 'thumbnail', group='thumbnail') + + title = self._search_regex( + r'\"name\":\"(?P[^\"]+)', + webpage, 'title', group='title') + + manifest_url = self._search_regex( + r'\"contentUrl\":\"(?P<manifesturl>[^\"]+)', + webpage, + 'manifest_url', group='manifesturl' + ) + + if isinstance(manifest_url, compat_str) and determine_ext(manifest_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + compat_urlparse.urljoin(url, manifest_url), + video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } From aec8c6b570c66fb24b79b6635c1bca76e3261c2a Mon Sep 17 00:00:00 2001 From: Moritz Bunse <5903796+mbunse@users.noreply.github.com> Date: Sat, 21 Jan 2023 20:11:25 +0100 Subject: [PATCH 2/8] Update youtube_dl/extractor/videocdn.py Co-authored-by: dirkf <fieldhouse@gmx.net> --- youtube_dl/extractor/videocdn.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py index 7dd84d063..6e530eda2 100644 --- a/youtube_dl/extractor/videocdn.py +++ b/youtube_dl/extractor/videocdn.py @@ -7,7 +7,9 @@ from ..compat import ( compat_urlparse, ) -from ..utils import determine_ext +from ..utils import ( + determine_ext, +) class VideoCdnIE(InfoExtractor): From bfca7e849cc95d3dd54b31723a623636a5a37603 Mon Sep 17 00:00:00 2001 From: mbunse <5903796+mbunse@users.noreply.github.com> Date: Sat, 21 Jan 2023 20:16:21 +0100 Subject: [PATCH 3/8] remove duplicate video id extraction --- youtube_dl/extractor/videocdn.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py index 7dd84d063..5dea84359 100644 --- a/youtube_dl/extractor/videocdn.py +++ b/youtube_dl/extractor/videocdn.py @@ -40,10 +40,6 @@ class VideoCdnIE(InfoExtractor): formats = [] - video_id = self._search_regex( - r'(?ims)<div[^>]+mi24-video-player+.*video-id=[\"\'](?P<videoid>[A-Za-z0-9_-]+)', - webpage, 'video id', group='videoid') - thumbnail = self._search_regex( r'\"thumbnailUrl\":\"(?P<thumbnail>[^\"]+)', webpage, 'thumbnail', group='thumbnail') From 6ff4cde8e81f6b44d085860e082ddea1e5d24903 Mon Sep 17 00:00:00 2001 From: Moritz Bunse <5903796+mbunse@users.noreply.github.com> Date: Sat, 21 Jan 2023 20:17:48 +0100 Subject: [PATCH 4/8] Update youtube_dl/extractor/videocdn.py Co-authored-by: dirkf <fieldhouse@gmx.net> --- youtube_dl/extractor/videocdn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py index 8f3bfa5ba..eae730dcd 100644 --- a/youtube_dl/extractor/videocdn.py +++ b/youtube_dl/extractor/videocdn.py @@ -44,7 +44,8 @@ class VideoCdnIE(InfoExtractor): thumbnail = self._search_regex( r'\"thumbnailUrl\":\"(?P<thumbnail>[^\"]+)', - webpage, 'thumbnail', group='thumbnail') + webpage, 'thumbnail', group='thumbnail', + default=None) title = self._search_regex( r'\"name\":\"(?P<title>[^\"]+)', From bdd0cf461141b1df7bafdef17e05d92c5148fbc3 Mon Sep 17 00:00:00 2001 From: mbunse <5903796+mbunse@users.noreply.github.com> Date: Sat, 21 Jan 2023 20:27:41 +0100 Subject: [PATCH 5/8] apply suggested improvements --- youtube_dl/extractor/videocdn.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py index eae730dcd..45b3e3869 100644 --- a/youtube_dl/extractor/videocdn.py +++ b/youtube_dl/extractor/videocdn.py @@ -2,13 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( determine_ext, + urljoin, ) @@ -40,24 +38,18 @@ class VideoCdnIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - formats = [] - thumbnail = self._search_regex( r'\"thumbnailUrl\":\"(?P<thumbnail>[^\"]+)', webpage, 'thumbnail', group='thumbnail', default=None) - title = self._search_regex( - r'\"name\":\"(?P<title>[^\"]+)', - webpage, 'title', group='title') + title = self._search_regex(r'"name"\s*:\s*"((?:\\"|[^"])+)', webpage, 'title') - manifest_url = self._search_regex( - r'\"contentUrl\":\"(?P<manifesturl>[^\"]+)', - webpage, - 'manifest_url', group='manifesturl' - ) + manifest_url = self._search_regex(r'"contentUrl"\s*:\s*"((?:\\"|[^"])+)', webpage, 'manifest_url') + manifest_url = urljoin(url, manifest_url) - if isinstance(manifest_url, compat_str) and determine_ext(manifest_url) == 'm3u8': + formats = [] + if manifest_url and determine_ext(manifest_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( compat_urlparse.urljoin(url, manifest_url), video_id, 'mp4', From 5d37e5e19d7962665e1ab689d7559ec92a3d46ea Mon Sep 17 00:00:00 2001 From: Moritz Bunse <5903796+mbunse@users.noreply.github.com> Date: Sat, 21 Jan 2023 20:28:23 +0100 Subject: [PATCH 6/8] Update youtube_dl/extractor/videocdn.py Co-authored-by: dirkf <fieldhouse@gmx.net> --- youtube_dl/extractor/videocdn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py index 45b3e3869..ba0f197fb 100644 --- a/youtube_dl/extractor/videocdn.py +++ b/youtube_dl/extractor/videocdn.py @@ -54,6 +54,7 @@ class VideoCdnIE(InfoExtractor): compat_urlparse.urljoin(url, manifest_url), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='m3u8')) + self._sort_formats(formats) return { 'id': video_id, From 47390b13559d800eed12572c805a475bef37373b Mon Sep 17 00:00:00 2001 From: Moritz Bunse <5903796+mbunse@users.noreply.github.com> Date: Sat, 21 Jan 2023 20:28:53 +0100 Subject: [PATCH 7/8] Update youtube_dl/extractor/videocdn.py Co-authored-by: dirkf <fieldhouse@gmx.net> --- youtube_dl/extractor/videocdn.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py index ba0f197fb..3825156b6 100644 --- a/youtube_dl/extractor/videocdn.py +++ b/youtube_dl/extractor/videocdn.py @@ -51,8 +51,7 @@ class VideoCdnIE(InfoExtractor): formats = [] if manifest_url and determine_ext(manifest_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( - compat_urlparse.urljoin(url, manifest_url), - video_id, 'mp4', + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='m3u8')) self._sort_formats(formats) From 638ec38eb53c1e0c90d5e2feb82a32bc789835a4 Mon Sep 17 00:00:00 2001 From: dirkf <fieldhouse@gmx.net> Date: Mon, 23 Jan 2023 13:27:52 +0000 Subject: [PATCH 8/8] Linted --- youtube_dl/extractor/videocdn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/videocdn.py b/youtube_dl/extractor/videocdn.py index 3825156b6..995d52ff4 100644 --- a/youtube_dl/extractor/videocdn.py +++ b/youtube_dl/extractor/videocdn.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( determine_ext,