From 09476ecddeec3b5432d0dc702bb083f194f04e81 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Feb 2022 20:13:59 +0000 Subject: [PATCH 1/2] [UNO] Add extractor for United Nations --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/uno.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/uno.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 50b7cb4a0..773d47b5f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1388,6 +1388,7 @@ from .dlive import ( from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE +from .uno import UNOIE from .uol import UOLIE from .uplynk import ( UplynkIE, diff --git a/youtube_dl/extractor/uno.py b/youtube_dl/extractor/uno.py new file mode 100644 index 000000000..b636e2d75 --- /dev/null +++ b/youtube_dl/extractor/uno.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unified_timestamp, + url_or_none, +) + + +class UNOIE(InfoExtractor): + _VALID_URL = r'https?://media\.un\.org/(?:\w+/)+(?Pk\d[\w]+)' + _TESTS = [{ + 'url': 'https://media.un.org/en/asset/k1r/k1r3vy9ikk', + # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', + 'info_dict': { + 'id': '1_r3vy9ikk', + 'ext': 'mp4', + 'title': 'md5:abde2a46d396051535e5e6fd6f627a19', + 'description': 'md5:2cba11ee153ae3e6ae2c629e7c4e39b0', + 'thumbnail': 're:https?://.+/thumbnail/.+', + 'duration': 5768, + 'timestamp': 1625216872, + 'upload_date': '20210702', + 'uploader_id': 'UNWebTV_New_York', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_id = video_id[1:2] + '_' + video_id[2:] + webpage = self._download_webpage(url, video_id) + title = ( + self._html_search_meta(('title', 'og:title'), webpage) + or self._html_search_regex(r']*>([^<]+) Date: Tue, 8 Feb 2022 01:48:32 +0000 Subject: [PATCH 2/2] New extraction tactic for Kaltura ID using image URL --- youtube_dl/extractor/uno.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/uno.py b/youtube_dl/extractor/uno.py index b636e2d75..e9f064bd8 100644 --- a/youtube_dl/extractor/uno.py +++ b/youtube_dl/extractor/uno.py @@ -12,7 +12,7 @@ class UNOIE(InfoExtractor): _VALID_URL = r'https?://media\.un\.org/(?:\w+/)+(?Pk\d[\w]+)' _TESTS = [{ 'url': 'https://media.un.org/en/asset/k1r/k1r3vy9ikk', - # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', + 'md5': '981c41cb283227f079d1e5059fd0d30c', 'info_dict': { 'id': '1_r3vy9ikk', 'ext': 'mp4', @@ -23,17 +23,31 @@ class UNOIE(InfoExtractor): 'timestamp': 1625216872, 'upload_date': '20210702', 'uploader_id': 'UNWebTV_New_York', + } + }, { + 'url': 'https://media.un.org/en/asset/k12/k12gpkg3qx', + 'md5': '5978503ca886a922a0f00cf5a7e82395', + 'info_dict': { + 'id': '1_vohfjqkj', + 'ext': 'mp4', + 'title': '1851st Meeting, 81st session Committee on the Elimination of Discrimination Against Women (CEDAW)', + 'description': 'Informal meeting with NGOs and human rights institutions - 1851st Meeting, 81st session CEDAW', + 'thumbnail': 're:https?://.+/thumbnail/.+', + 'duration': 3502, + 'timestamp': 1644235332, + 'upload_date': '20220207', + 'uploader_id': 'nathalie.minard@un.org', }, }] def _real_extract(self, url): video_id = self._match_id(url) - video_id = video_id[1:2] + '_' + video_id[2:] webpage = self._download_webpage(url, video_id) + partner_id = self._search_regex(r'partnerId\s*:\s*(\d+)\b', webpage, 'Partner ID') + video_id = self._search_regex(r'/p/%s(?:/\w+)+?/entry_id/(\w+)/' % (partner_id, ), webpage, 'Kaltura ID') title = ( self._html_search_meta(('title', 'og:title'), webpage) or self._html_search_regex(r']*>([^<]+)