[MTVA Archivum] Add new extractor

Add new extractor for MTVA Archivum site. Closes #21430
7 months ago · de71230165
parent 00ef748cc0
commit de71230165
2 changed files with 65 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -726,6 +726,7 @@ from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
 from .movingimage import MovingImageIE
 from .msn import MSNIE
+from .mtvaarchivum import MtvaArchivumIE
 from .mtv import (
    MTVIE,
    MTVVideoIE,
--- a/youtube_dl/extractor/mtvaarchivum.py
+++ b/youtube_dl/extractor/mtvaarchivum.py
@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MtvaArchivumIE(InfoExtractor):
+    _VALID_URL = r'https://archivum\.mtva\.hu\/m3/(?P<id>M3-[a-zA-Z0-9]*)'
+    _TESTS = [{
+        'url': 'https://archivum.mtva.hu/m3/M3-87720998249999359',
+        'info_dict': {
+            'id': 'M3-87720998249999359',
+            'ext': 'mp4',
+            'title': 'Kék egér',
+            'description': 'Kék egér nem sokáig örülhet a napsütésnek, mert egy kölyökkutya azt hiszi, kutyáknak való játék ez a kék valami. A Kék egér tiltakozása ellenére csak akkor engedi el az egeret, amikor az elásott csontja helyét megtalálja a kutya. A menekülő egérke elbotlik egy fél perecben aminek nagyon megörül, de egy erőszakos galamb meghívatja magát a perecre. Némi ellenszolgáltatás, és egy jó tanács fejében az egészet felfalja.',
+        },
+    }, {
+        'url': 'https://archivum.mtva.hu/m3/M3-59898941410999595',
+        'info_dict': {
+            'id': 'M3-59898941410999595',
+            'ext': 'mp4',
+            'title': 'Magyar retro',
+            'description': 'MTVA Archívum',
+        }
+    }, {
+        'url': 'https://archivum.mtva.hu/m3/M3-59968988460999294',
+        'info_dict': {
+            'id': 'M3-59968988460999294',
+            'ext': 'mp4',
+            'title': 'FŐTÉR',
+            'description': 'MTVA Archívum',
+        }
+    }, {
+        'url': 'https://archivum.mtva.hu/m3/M3-599A8939770999694',
+        'info_dict': {
+            'id': 'M3-599A8939770999694',
+            'ext': 'mp4',
+            'title': 'Csináljuk a fesztivált!',
+            'description': 'MTVA Archívum',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        json = self._download_json('https://archivum.mtva.hu/m3/stream?no_lb=1&target=' + video_id, video_id)
+        video_url = json['url']
+        title = self._og_search_title(webpage) or self._html_search_regex(
+            '<h1 class=\"active-title\">.+</h1>', webpage, 'title')
+        description = self._og_search_description(webpage) or self._html_search_regex(
+            '<p class=\"active-full-description\">\n.+</p>', webpage, 'description')
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        formats = self._extract_m3u8_formats(
+            video_url, video_id, 'mp4')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }