Merge a60972e253 into 71211e7db7

2 months ago · c48b8c440b
parent 71211e7db7 a60972e253
commit c48b8c440b
2 changed files with 240 additions and 677 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -848,14 +848,13 @@ from .nowness import (
 from .noz import NozIE
 from .npo import (
    AndereTijdenIE,
    BNNVaraIE,
    NPOIE,
-    NPOLiveIE,
+    ONIE,
    NPORadioIE,
    NPORadioFragmentIE,
    SchoolTVIE,
    HetKlokhuisIE,
    VPROIE,
-    WNLIE,
+    ZAPPIE,
 )
 from .npr import NprIE
 from .nrk import (
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@ -1,767 +1,331 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
-from ..compat import (
+from ..utils import ExtractorError
    compat_HTTPError,
    compat_str,
 )
 from ..utils import (
    determine_ext,
    ExtractorError,
    fix_xml_ampersands,
    int_or_none,
    merge_dicts,
    orderedSet,
    parse_duration,
    qualities,
    str_or_none,
    strip_jsonp,
    unified_strdate,
    unified_timestamp,
    url_or_none,
    urlencode_postdata,
 )
 class NPOBaseIE(InfoExtractor):
    def _get_token(self, video_id):
        return self._download_json(
            'http://ida.omroep.nl/app.php/auth', video_id,
            note='Downloading token')['token']
-class NPOIE(NPOBaseIE):
+class NPOIE(InfoExtractor):
    IE_NAME = 'npo'
-    IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
+    IE_DESC = 'npo.nl'
-    _VALID_URL = r'''(?x)
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/start/serie/'
                    (?:
                        npo:|
                        https?://
                            (?:www\.)?
                            (?:
                                npo\.nl/(?:[^/]+/)*|
                                (?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
                                omroepwnl\.nl/video/fragment/[^/]+__|
                                (?:zapp|npo3)\.nl/(?:[^/]+/){2,}
                            )
                        )
                        (?P<id>[^/?#]+)
                '''
    _TESTS = [{
-        'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
+        'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
-        'md5': '4b3f9c429157ec4775f2c9cb7b911016',
+        'md5': 'f9ce9c43cc8bc3b8138df1562b99c379',
        'info_dict': {
            'id': 'VPWON_1220719',
            'ext': 'm4v',
            'title': 'Nieuwsuur',
            'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
            'upload_date': '20140622',
        },
    }, {
        'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
        'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
        'info_dict': {
            'id': 'VARA_101191800',
            'ext': 'm4v',
            'title': 'De Mega Mike & Mega Thomas show: The best of.',
            'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
            'upload_date': '20090227',
            'duration': 2400,
        },
    }, {
        'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
        'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
        'info_dict': {
            'id': 'VPWON_1169289',
            'ext': 'm4v',
            'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika',
            'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
            'upload_date': '20130225',
            'duration': 3000,
        },
    }, {
        'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
        'info_dict': {
-            'id': 'WO_VPRO_043706',
+            'description': 'Wie is de mol? (2)',
            'duration': 2439,
            'ext': 'm4v',
-            'title': 'De nieuwe mens - Deel 1',
+            'id': 'wie-is-de-mol-2',
-            'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
+            'thumbnail': 'https://assets-start.npo.nl/resources/2023/07/01/e723c3cf-3e42-418a-9ba5-f6dbb64b516a.jpg',
-            'duration': 4680,
+            'title': 'Wie is de mol? (2)'
        },
        'params': {
            'skip_download': True,
        }
    }, {
-        # non asf in streams
+        'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
-        'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
+        'md5': 'c84d054219c4888ed53b4ee3d01b2d93',
        'info_dict': {
-            'id': 'WO_NOS_762771',
+            'id': 'zwart-geld-de-toekomst-komt-uit-afrika',
            'title': 'Zwart geld: de toekomst komt uit Afrika',
            'ext': 'mp4',
-            'title': 'Hoe gaat Europa verder na Parijs?',
+            'description': 'Zwart geld: de toekomst komt uit Afrika',
            'thumbnail': 'https://assets-start.npo.nl/resources/2023/06/30/d9879593-1944-4249-990c-1561dac14d8e.jpg',
            'duration': 3000
        },
        'params': {
            'skip_download': True,
        }
    }, {
        'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
        'info_dict': {
            'id': 'VPWON_1233944',
            'ext': 'm4v',
            'title': 'Aap, poot, pies',
            'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
            'upload_date': '20150508',
            'duration': 599,
        },
        'params': {
            'skip_download': True,
        }
    }, {
        'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
        'info_dict': {
            'id': 'POW_00996502',
            'ext': 'm4v',
            'title': '''"Dit is wel een 'landslide'..."''',
            'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
            'upload_date': '20150508',
            'duration': 462,
        },
        'params': {
            'skip_download': True,
        }
    }, {
        # audio
        'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
        'info_dict': {
            'id': 'RBX_FUNX_6683215',
            'ext': 'mp3',
            'title': 'Jouw Stad Rotterdam',
            'description': 'md5:db251505244f097717ec59fabc372d9f',
        },
        'params': {
            'skip_download': True,
        }
    }, {
        'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
        'only_matching': True,
    }, {
        'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
        'only_matching': True,
    }, {
        'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
        'only_matching': True,
    }, {
        'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
        'only_matching': True,
    }, {
        # live stream
        'url': 'npo:LI_NL1_4188102',
        'only_matching': True,
    }, {
        'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
        'only_matching': True,
    }, {
        'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
        'only_matching': True,
    }, {
        'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
        'only_matching': True,
    }, {
        'url': 'https://npo.nl/KN_1698996',
        'only_matching': True,
    }]
-    @classmethod
+    def _get_token(self, video_id):
-    def suitable(cls, url):
+        return self._download_json(
-        return (False if any(ie.suitable(url)
+            'https://npo.nl/start/api/domain/player-token?productId=%s' % video_id,
-                for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
+            video_id,
-                else super(NPOIE, cls).suitable(url))
+            note='Downloading token')['token']
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        # Remove /afspelen and/or any trailing `/`s
-        return self._get_info(url, video_id) or self._get_old_info(video_id)
+        url = re.sub(r'/(?:afspelen)?/*$', '', url)
-
+        slug = url.split('/')[-1]
-    def _get_info(self, url, video_id):
+
-        token = self._download_json(
+        program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
-            'https://www.npostart.nl/api/token', video_id,
+                                               slug, query={'slug': slug})
-            'Downloading token', headers={
+        product_id = program_metadata.get('productId')
-                'Referer': url,
+        images = program_metadata.get('images')
-                'X-Requested-With': 'XMLHttpRequest',
+        thumbnail = None
-            })['token']
+        for image in images:
-
+            thumbnail = image.get('url')
-        player = self._download_json(
+            break
-            'https://www.npostart.nl/player/%s' % video_id, video_id,
+        title = program_metadata.get('title')
-            'Downloading player JSON', data=urlencode_postdata({
+        descriptions = program_metadata.get('description', {})
-                'autoplay': 0,
+        description = descriptions.get('long') or descriptions.get('short') or descriptions.get('brief')
-                'share': 1,
+        duration = program_metadata.get('durationInSeconds')
-                'pageUrl': url,
+
-                'hasAdConsent': 0,
+        if not product_id:
-                '_token': token,
+            raise ExtractorError('No productId found for slug: %s' % slug)
-            }))
+
-
+        formats = self._extract_formats_by_product_id(product_id, slug, url)
        player_token = player['token']
        drm = False
        format_urls = set()
        formats = []
        for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
            streams = self._download_json(
                'https://start-player.npo.nl/video/%s/streams' % video_id,
                video_id, 'Downloading %s profile JSON' % profile, fatal=False,
                query={
                    'profile': profile,
                    'quality': 'npo',
                    'tokenId': player_token,
                    'streamType': 'broadcast',
                })
            if not streams:
                continue
            stream = streams.get('stream')
            if not isinstance(stream, dict):
                continue
            stream_url = url_or_none(stream.get('src'))
            if not stream_url or stream_url in format_urls:
                continue
            format_urls.add(stream_url)
            if stream.get('protection') is not None or stream.get('keySystemOptions') is not None:
                drm = True
                continue
            stream_type = stream.get('type')
            stream_ext = determine_ext(stream_url)
            if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
                    stream_url, video_id, mpd_id='dash', fatal=False))
            elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    stream_url, video_id, ext='mp4',
                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
            elif re.search(r'\.isml?/Manifest', stream_url):
                formats.extend(self._extract_ism_formats(
                    stream_url, video_id, ism_id='mss', fatal=False))
            else:
                formats.append({
                    'url': stream_url,
                })
-        if not formats:
+        return {
-            if drm:
+            'id': slug,
                raise ExtractorError('This video is DRM protected.', expected=True)
            return
        self._sort_formats(formats)
        info = {
            'id': video_id,
            'title': video_id,
            'formats': formats,
            'title': title or slug,
            'description': description or title or slug,
            'thumbnail': thumbnail,
            'duration': duration,
        }
-        embed_url = url_or_none(player.get('embedUrl'))
+    def _extract_formats_by_product_id(self, product_id, slug, url=None):
-        if embed_url:
+        token = self._get_token(product_id)
-            webpage = self._download_webpage(
+        formats = []
-                embed_url, video_id, 'Downloading embed page', fatal=False)
+        for profile in (
-            if webpage:
+                'dash',
-                video = self._parse_json(
+                # 'hls' is available too, but implementing it doesn't add much
-                    self._search_regex(
+                # As far as I know 'dash' is always available
-                        r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
+        ):
-                        default='{}'), video_id)
+            stream_link = self._download_json(
-                if video:
+                'https://prod.npoplayer.nl/stream-link', video_id=slug,
-                    title = video.get('episodeTitle')
+                data=json.dumps({
-                    subtitles = {}
+                    'profileName': profile,
-                    subtitles_list = video.get('subtitles')
+                    'referrerUrl': url or '',
-                    if isinstance(subtitles_list, list):
+                }).encode('utf8'),
-                        for cc in subtitles_list:
+                headers={
-                            cc_url = url_or_none(cc.get('src'))
+                    'Authorization': token,
-                            if not cc_url:
+                    'Content-Type': 'application/json',
-                                continue
+                },
-                            lang = str_or_none(cc.get('language')) or 'nl'
+                fatal=False,
                            subtitles.setdefault(lang, []).append({
                                'url': cc_url,
                            })
                    return merge_dicts({
                        'title': title,
                        'description': video.get('description'),
                        'thumbnail': url_or_none(
                            video.get('still_image_url') or video.get('orig_image_url')),
                        'duration': int_or_none(video.get('duration')),
                        'timestamp': unified_timestamp(video.get('broadcastDate')),
                        'creator': video.get('channel'),
                        'series': video.get('title'),
                        'episode': title,
                        'episode_number': int_or_none(video.get('episodeNumber')),
                        'subtitles': subtitles,
                    }, info)
        return info
    def _get_old_info(self, video_id):
        metadata = self._download_json(
            'http://e.omroep.nl/metadata/%s' % video_id,
            video_id,
            # We have to remove the javascript callback
            transform_source=strip_jsonp,
            )
            stream_url = stream_link.get('stream', {}).get('streamURL')
            formats.extend(self._extract_mpd_formats(stream_url, slug, mpd_id='dash', fatal=False))
        return formats
        error = metadata.get('error')
        if error:
            raise ExtractorError(error, expected=True)
-        # For some videos actual video id (prid) is different (e.g. for
+class BNNVaraIE(NPOIE):
-        # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
+    IE_NAME = 'bnnvara'
-        # video id is POMS_WNL_853698 but prid is POW_00996502)
+    IE_DESC = 'bnnvara.nl'
-        video_id = metadata.get('prid') or video_id
+    _VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*'
-
+    _TESTS = [{
-        # titel is too generic in some cases so utilize aflevering_titel as well
+        'url': 'https://www.bnnvara.nl/videos/27455',
-        # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html)
+        'md5': '392dd367877739e49b9e0a9a550b178a',
-        title = metadata['titel']
+        'info_dict': {
-        sub_title = metadata.get('aflevering_titel')
+            'id': 'VARA_101369808',
-        if sub_title and sub_title != title:
+            'thumbnail': 'https://media.vara.nl/files/thumbnails/321291_custom_zembla__wie_is_de_mol_680x383.jpg',
-            title += ': %s' % sub_title
+            'title': 'Zembla - Wie is de mol?',
-
+            'ext': 'mp4',
-        token = self._get_token(video_id)
+        }
    }]
-        formats = []
+    def _real_extract(self, url):
-        urls = set()
+        url = url.rstrip('/')
-
+        video_id = url.split('/')[-1]
        def is_legal_url(format_url):
            return format_url and format_url not in urls and re.match(
                r'^(?:https?:)?//', format_url)
        QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
        QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
        quality_from_label = qualities(QUALITY_LABELS)
        quality_from_format_id = qualities(QUALITY_FORMATS)
        items = self._download_json(
            'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
            'Downloading formats JSON', query={
                'adaptive': 'yes',
                'token': token,
            })['items'][0]
        for num, item in enumerate(items):
            item_url = item.get('url')
            if not is_legal_url(item_url):
                continue
            urls.add(item_url)
            format_id = self._search_regex(
                r'video/ida/([^/]+)', item_url, 'format id',
                default=None)
            item_label = item.get('label')
            def add_format_url(format_url):
                width = int_or_none(self._search_regex(
                    r'(\d+)[xX]\d+', format_url, 'width', default=None))
                height = int_or_none(self._search_regex(
                    r'\d+[xX](\d+)', format_url, 'height', default=None))
                if item_label in QUALITY_LABELS:
                    quality = quality_from_label(item_label)
                    f_id = item_label
                elif item_label in QUALITY_FORMATS:
                    quality = quality_from_format_id(format_id)
                    f_id = format_id
                else:
                    quality, f_id = [None] * 2
                formats.append({
                    'url': format_url,
                    'format_id': f_id,
                    'width': width,
                    'height': height,
                    'quality': quality,
                })
-            # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
+        media = self._download_json('https://api.bnnvara.nl/bff/graphql',
-            if item.get('contentType') in ('url', 'audio'):
+                                    video_id,
-                add_format_url(item_url)
+                                    data=json.dumps(
-                continue
+                                        {
-
+                                            'operationName': 'getMedia',
-            try:
+                                            'variables': {
-                stream_info = self._download_json(
+                                                'id': video_id,
-                    item_url + '&type=json', video_id,
+                                                'hasAdConsent': False,
-                    'Downloading %s stream JSON'
+                                                'atInternetId': 70
-                    % item_label or item.get('format') or format_id or num)
+                                            },
-            except ExtractorError as ee:
+                                            'query': 'query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {\n  player(\n    id: $id\n    mediaUrl: $mediaUrl\n    hasAdConsent: $hasAdConsent\n    atInternetId: $atInternetId\n  ) {\n    ... on PlayerSucces {\n      brand {\n        name\n        slug\n        broadcastsEnabled\n        __typename\n      }\n      title\n      programTitle\n      pomsProductId\n      broadcasters {\n        name\n        __typename\n      }\n      duration\n      classifications {\n        title\n        imageUrl\n        type\n        __typename\n      }\n      image {\n        title\n        url\n        __typename\n      }\n      cta {\n        title\n        url\n        __typename\n      }\n      genres {\n        name\n        __typename\n      }\n      subtitles {\n        url\n        language\n        __typename\n      }\n      sources {\n        name\n        url\n        ratio\n        __typename\n      }\n      type\n      token\n      __typename\n    }\n    ... on PlayerError {\n      error\n      __typename\n    }\n    __typename\n  }\n}'
-                if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+                                        }).encode('utf8'),
-                    error = (self._parse_json(
+                                    headers={
-                        ee.cause.read().decode(), video_id,
+                                        'Content-Type': 'application/json',
                        fatal=False) or {}).get('errorstring')
                    if error:
                        raise ExtractorError(error, expected=True)
                raise
            # Stream URL instead of JSON, example: npo:LI_NL1_4188102
            if isinstance(stream_info, compat_str):
                if not stream_info.startswith('http'):
                    continue
                video_url = stream_info
            # JSON
            else:
                video_url = stream_info.get('url')
            if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
                continue
            urls.add(video_url)
            if determine_ext(video_url) == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    video_url, video_id, ext='mp4',
                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
            else:
                add_format_url(video_url)
        is_live = metadata.get('medium') == 'live'
        if not is_live:
            for num, stream in enumerate(metadata.get('streams', [])):
                stream_url = stream.get('url')
                if not is_legal_url(stream_url):
                    continue
                urls.add(stream_url)
                # smooth streaming is not supported
                stream_type = stream.get('type', '').lower()
                if stream_type in ['ss', 'ms']:
                    continue
                if stream_type == 'hds':
                    f4m_formats = self._extract_f4m_formats(
                        stream_url, video_id, fatal=False)
                    # f4m downloader downloads only piece of live stream
                    for f4m_format in f4m_formats:
                        f4m_format['preference'] = -1
                    formats.extend(f4m_formats)
                elif stream_type == 'hls':
                    formats.extend(self._extract_m3u8_formats(
                        stream_url, video_id, ext='mp4', fatal=False))
                # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
                elif '.asf' in stream_url:
                    asx = self._download_xml(
                        stream_url, video_id,
                        'Downloading stream %d ASX playlist' % num,
                        transform_source=fix_xml_ampersands, fatal=False)
                    if not asx:
                        continue
                    ref = asx.find('./ENTRY/Ref')
                    if ref is None:
                        continue
                    video_url = ref.get('href')
                    if not video_url or video_url in urls:
                        continue
                    urls.add(video_url)
                    formats.append({
                        'url': video_url,
                        'ext': stream.get('formaat', 'asf'),
                        'quality': stream.get('kwaliteit'),
                        'preference': -10,
                    })
                else:
                    formats.append({
                        'url': stream_url,
                        'quality': stream.get('kwaliteit'),
                                    })
        product_id = media.get('data', {}).get('player', {}).get('pomsProductId')
-        self._sort_formats(formats)
+        formats = self._extract_formats_by_product_id(product_id, video_id)
        subtitles = {}
        if metadata.get('tt888') == 'ja':
            subtitles['nl'] = [{
                'ext': 'vtt',
                'url': 'http://tt888.omroep.nl/tt888/%s' % video_id,
            }]
        return {
-            'id': video_id,
+            'id': product_id,
-            'title': self._live_title(title) if is_live else title,
+            'title': media.get('data', {}).get('player', {}).get('title'),
            'description': metadata.get('info'),
            'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
            'upload_date': unified_strdate(metadata.get('gidsdatum')),
            'duration': parse_duration(metadata.get('tijdsduur')),
            'formats': formats,
-            'subtitles': subtitles,
+            'thumbnail': media.get('data', {}).get('player', {}).get('image').get('url'),
            'is_live': is_live,
        }
-class NPOLiveIE(NPOBaseIE):
+class ONIE(NPOIE):
-    IE_NAME = 'npo.nl:live'
+    IE_NAME = 'on'
-    _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
+    IE_DESC = 'ongehoordnederland.tv'
-
+    _VALID_URL = r'https?://(?:www\.)?ongehoordnederland.tv/.*'
    _TESTS = [{
-        'url': 'http://www.npo.nl/live/npo-1',
+        'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
        'md5': 'a85ebd50fa86fe5cbce654655f7dbb12',
        'info_dict': {
-            'id': 'LI_NL1_4188102',
+
            'display_id': 'npo-1',
            'ext': 'mp4',
            'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'is_live': True,
        },
        'params': {
            'skip_download': True,
        }
    }, {
        'url': 'http://www.npo.nl/live',
        'only_matching': True,
    }, {
        'url': 'https://www.npostart.nl/live/npo-1',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        display_id = self._match_id(url) or 'npo-1'
+        video_id = url.rstrip('/').split('/')[-1]
-
+        page, _ = self._download_webpage_handle(url, video_id)
-        webpage = self._download_webpage(url, display_id)
+        results = re.findall("page: '(.+)'", page)
        formats = []
        for result in results:
            formats.extend(self._extract_formats_by_product_id(result, video_id))
-        live_id = self._search_regex(
+        if not formats:
-            [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
+            raise ExtractorError('Could not find a POMS product id in the provided URL, '
                                 'perhaps because all stream URLs are DRM protected.')
        return {
-            '_type': 'url_transparent',
+            'id': video_id,
-            'url': 'npo:%s' % live_id,
+            'title': video_id,
-            'ie_key': NPOIE.ie_key(),
+            'formats': formats,
            'id': live_id,
            'display_id': display_id,
        }
-class NPORadioIE(InfoExtractor):
+class ZAPPIE(NPOIE):
-    IE_NAME = 'npo.nl:radio'
+    IE_NAME = 'zapp'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
+    IE_DESC = 'zapp.nl'
    _VALID_URL = r'https?://(?:www\.)?zapp.nl/.*'
-    _TEST = {
+    _TESTS = [{
-        'url': 'http://www.npo.nl/radio/radio-1',
+        'url': 'https://www.zapp.nl/programmas/zappsport/gemist/POMS_AT_811523',
        'md5': '9eb2d8b6f88b72b6b986ea2c26a81588',
        'info_dict': {
-            'id': 'radio-1',
+            'id': 'POMS_AT_811523',
-            'ext': 'mp3',
+            'title': 'POMS_AT_811523',
            'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'is_live': True,
        },
-        'params': {
+    }]
            'skip_download': True,
        }
    }
    @classmethod
    def suitable(cls, url):
        return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
    @staticmethod
    def _html_get_attribute_regex(attribute):
        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        video_id = url.rstrip('/').split('/')[-1]
        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(
+        formats = self._extract_formats_by_product_id(video_id, video_id, url=url)
            self._html_get_attribute_regex('data-channel'), webpage, 'title')
        stream = self._parse_json(
            self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
            video_id)
        codec = stream.get('codec')
        return {
            'id': video_id,
-            'url': stream['url'],
+            'title': video_id,
-            'title': self._live_title(title),
+            'formats': formats,
            'acodec': codec,
            'ext': codec,
            'is_live': True,
        }
-class NPORadioFragmentIE(InfoExtractor):
+class SchoolTVIE(NPOIE):
-    IE_NAME = 'npo.nl:radio:fragment'
+    IE_NAME = 'schooltv'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
+    IE_DESC = 'schooltv.nl'
    _VALID_URL = r'https?://(?:www\.)?schooltv.nl/item/.*'
-    _TEST = {
+    _TESTS = [{
-        'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
+        'url': 'https://schooltv.nl/item/zapp-music-challenge-2015-zapp-music-challenge-2015',
-        'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
+        'md5': 'e9ef151c4886994e2bea23593348cb14',
        'info_dict': {
-            'id': '174356',
+            'id': 'zapp-music-challenge-2015-zapp-music-challenge-2015',
-            'ext': 'mp3',
+            'title': 'Zapp Music Challenge 2015 - Alain Clark & Yaell',
-            'title': 'Jubileumconcert Willeke Alberti',
+            'description': "Een nummer schrijven met de super bekende soulzanger en producer Alain Clark? Dat is de uitdaging voor de dertienjarige Yaell uit Delft. En als het dan echt goed is, mag hij het ook nog eens live gaan spelen op de speelplaats bij Giel Beelen! Muziek is heel erg belangrijk in het leven van Yaell. 'Als er geen muziek zou zijn, dan zou ik heel veel niet kunnen.' Hij is dan ook altijd aan het schrijven, vaak over zijn eigen leven. Maar soms is het best lastig om die teksten te verzinnen. Vindt hij de inspiratie om een hit te maken met Alain?"
        },
-    }
+    }]
    def _real_extract(self, url):
-        audio_id = self._match_id(url)
+        video_id = url.rstrip('/').split('/')[-1]
        # TODO Find out how we could obtain this automatically
        #      Otherwise this extractor might break each time SchoolTV deploys a new release
        build_id = 'b7eHUzAVO7wHXCopYxQhV'
-        webpage = self._download_webpage(url, audio_id)
+        metadata_url = 'https://schooltv.nl/_next/data/' \
                       + build_id \
                       + '/item/' \
                       + video_id + '.json'
-        title = self._html_search_regex(
+        metadata = self._download_json(metadata_url,
-            r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
+                                       video_id).get('pageProps', {}).get('data', {})
            webpage, 'title')
-        audio_url = self._search_regex(
+        formats = self._extract_formats_by_product_id(metadata.get('poms_mid'), video_id)
-            r"data-streams='([^']+)'", webpage, 'audio url')
+
        if not formats:
            raise ExtractorError('Could not find a POMS product id in the provided URL, '
                                 'perhaps because all stream URLs are DRM protected.')
        return {
-            'id': audio_id,
+            'id': video_id,
-            'url': audio_url,
+            'title': metadata.get('title', '') + ' - ' + metadata.get('subtitle', ''),
-            'title': title,
+            'description': metadata.get('description') or metadata.get('short_description'),
            'formats': formats,
        }
-class NPODataMidEmbedIE(InfoExtractor):
+class NTRSubsiteIE(NPOIE):
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        video_id = url.rstrip('/').split('/')[-1]
        webpage = self._download_webpage(url, display_id)
        video_id = self._search_regex(
            r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id')
        return {
            '_type': 'url_transparent',
            'ie_key': 'NPO',
            'url': 'npo:%s' % video_id,
            'display_id': display_id
        }
        page, _ = self._download_webpage_handle(url, video_id)
        results = re.findall(r'data-mid="(.+_.+)"', page)
        formats = []
        for result in results:
            formats.extend(self._extract_formats_by_product_id(result, video_id))
            break
-class SchoolTVIE(NPODataMidEmbedIE):
+        if not formats:
-    IE_NAME = 'schooltv'
+            raise ExtractorError('Could not find a POMS product id in the provided URL, '
-    _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
+                                 'perhaps because all stream URLs are DRM protected.')
-    _TEST = {
+        return {
-        'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
+            'id': video_id,
-        'info_dict': {
+            'title': video_id,
-            'id': 'WO_NTR_429477',
+            'formats': formats,
            'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
            'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
            'ext': 'mp4',
            'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
        },
        'params': {
            # Skip because of m3u8 download
            'skip_download': True
        }
        }
-class HetKlokhuisIE(NPODataMidEmbedIE):
+class HetKlokhuisIE(NTRSubsiteIE):
    IE_NAME = 'hetklokhuis'
-    _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
+    IE_DESC = 'hetklokhuis.nl'
-
+    _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/.*'
-    _TEST = {
+    _TESTS = [{
-        'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
+        'url': 'https://hetklokhuis.nl/dossier/142/zoek-het-uit/tv-uitzending/2987/aliens',
        'md5': '4664b54ed4e05183b1e4f2f4290d551e',
        'info_dict': {
-            'id': 'VPWON_1260528',
+            'id': 'aliens',
-            'display_id': 'Zwaartekrachtsgolven',
+            'title': 'aliens',
            'ext': 'm4v',
            'title': 'Het Klokhuis: Zwaartekrachtsgolven',
            'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48',
            'upload_date': '20170223',
        },
-        'params': {
+    }]
            'skip_download': True
        }
    }
 class NPOPlaylistBaseIE(NPOIE):
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        entries = [
            self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
            for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
        ]
        playlist_title = self._html_search_regex(
            self._PLAYLIST_TITLE_RE, webpage, 'playlist title',
            default=None) or self._og_search_title(webpage)
        return self.playlist_result(entries, playlist_id, playlist_title)
-class VPROIE(NPOPlaylistBaseIE):
+class VPROIE(NPOIE):
    IE_NAME = 'vpro'
-    _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    IE_DESC = 'vpro.nl'
-    _PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)',
+    _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
-                          r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
+    _TESTS = [{
-    _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
+        'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
-
+        'md5': 'cf302e066b5313cfaf8d5adf50d64f13',
    _TESTS = [
        {
            'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
            'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
            'info_dict': {
                'id': 'VPWON_1169289',
                'ext': 'm4v',
                'title': 'De toekomst komt uit Afrika',
                'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
                'upload_date': '20130225',
            },
            'skip': 'Video gone',
        },
        {
            'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html',
            'info_dict': {
                'id': 'sergio-herman',
                'title': 'sergio herman: fucking perfect',
            },
            'playlist_count': 2,
        },
        {
            # playlist with youtube embed
            'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html',
            'info_dict': {
                'id': 'education-education',
                'title': 'education education',
            },
            'playlist_count': 2,
        },
        {
            'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
            'info_dict': {
                'id': 'de-tegenprestatie',
                'title': 'De Tegenprestatie',
            },
            'playlist_count': 2,
        }, {
            'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html',
        'info_dict': {
-                'id': 'VARA_101375237',
+            'id': 'offline-als-luxe.html',
            'title': 'offline-als-luxe.html',
            'ext': 'm4v',
                'title': 'MH17: Het verdriet van Nederland',
                'description': 'md5:09e1a37c1fdb144621e22479691a9f18',
                'upload_date': '20150716',
            },
            'params': {
                # Skip because of m3u8 download
                'skip_download': True
        },
-        }
+    }]
    ]
    def _real_extract(self, url):
        video_id = url.rstrip('/').split('/')[-1]
        page, _ = self._download_webpage_handle(url, video_id)
        results = re.findall(r'data-media-id="([a-zA-Z0-9_]+)"\s', page)
        formats = []
        for result in results:
            formats.extend(self._extract_formats_by_product_id(result, video_id))
            break  # TODO find a better solution, VPRO pages can have multiple videos embedded
-class WNLIE(NPOPlaylistBaseIE):
+        if not formats:
-    IE_NAME = 'wnl'
+            raise ExtractorError('Could not find a POMS product id in the provided URL, '
-    _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
+                                 'perhaps because all stream URLs are DRM protected.')
    _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>'
    _PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+'
-    _TESTS = [{
+        return {
-        'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
+            'id': video_id,
-        'info_dict': {
+            'title': video_id,
-            'id': 'vandaag-de-dag-6-mei',
+            'formats': formats,
-            'title': 'Vandaag de Dag 6 mei',
+        }
        },
        'playlist_count': 4,
    }]
-class AndereTijdenIE(NPOPlaylistBaseIE):
+class AndereTijdenIE(NTRSubsiteIE):
    IE_NAME = 'anderetijden'
-    _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+    IE_DESC = 'anderetijden.nl'
-    _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>'
+    _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/.*'
    _PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']'
    _TESTS = [{
-        'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
+        'url': 'https://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
        'md5': '3d607b16e00b459156b4ab6e163dccd7',
        'info_dict': {
            'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
-            'title': 'Duitse soldaten over de Slag bij Arnhem',
+            'title': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
        },
        'playlist_count': 3,
    }]