[UNO] Add extractor for United Nations

2 years ago · 09476ecdde
parent 47b0c8697a
commit 09476ecdde
2 changed files with 51 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1388,6 +1388,7 @@ from .dlive import (
 from .umg import UMGDeIE
 from .unistra import UnistraIE
 from .unity import UnityIE
+from .uno import UNOIE
 from .uol import UOLIE
 from .uplynk import (
    UplynkIE,
--- a/youtube_dl/extractor/uno.py
+++ b/youtube_dl/extractor/uno.py
@ -0,0 +1,50 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class UNOIE(InfoExtractor):
+    _VALID_URL = r'https?://media\.un\.org/(?:\w+/)+(?P<id>k\d[\w]+)'
+    _TESTS = [{
+        'url': 'https://media.un.org/en/asset/k1r/k1r3vy9ikk',
+        # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+        'info_dict': {
+            'id': '1_r3vy9ikk',
+            'ext': 'mp4',
+            'title': 'md5:abde2a46d396051535e5e6fd6f627a19',
+            'description': 'md5:2cba11ee153ae3e6ae2c629e7c4e39b0',
+            'thumbnail': 're:https?://.+/thumbnail/.+',
+            'duration': 5768,
+            'timestamp': 1625216872,
+            'upload_date': '20210702',
+            'uploader_id': 'UNWebTV_New_York',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_id = video_id[1:2] + '_' + video_id[2:]
+        webpage = self._download_webpage(url, video_id)
+        title = (
+            self._html_search_meta(('title', 'og:title'), webpage)
+            or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title').rsplit('|', 1)[0]).strip()
+        partner_id = self._search_regex(r'partnerId\s*:\s*(\d+)\b', webpage, 'Partner ID')
+        result = self.url_result(
+            'kaltura:%s:%s' % (partner_id, video_id), 'Kaltura',
+            video_title=title,
+            video_id=video_id)
+        if result:
+            result.update({
+                '_type': 'url_transparent',
+                'description': self._html_search_meta(('description', 'og:description'), webpage, 'description'),
+                'creator': self._html_search_meta('author', webpage),
+                'upoader_id': self._html_search_meta('publisher', webpage),
+                'thumbnail': url_or_none(self._og_search_thumbnail(webpage)),
+                'timestamp': unified_timestamp(self._og_search_property('updated_time', webpage)),
+            })
+        return result