youtube-dl/youtube_dl/extractor/myvideo.py

from __future__ import unicode_literals

import binascii
import base64
import hashlib
import re
import json

from .common import InfoExtractor
from ..compat import (
    compat_ord,
    compat_urllib_parse_unquote,
    compat_urllib_parse_urlencode,
)
from ..utils import (
    ExtractorError,
    sanitized_Request,
)


class MyVideoIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
    IE_NAME = 'myvideo'
    _TEST = {
        'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
        'md5': '2d2753e8130479ba2cb7e0a37002053e',
        'info_dict': {
            'id': '8229274',
            'ext': 'flv',
            'title': 'bowling-fail-or-win',
        }
    }

    # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
    # Released into the Public Domain by Tristan Fischer on 2013-05-19
    # https://github.com/rg3/youtube-dl/pull/842
    def __rc4crypt(self, data, key):
        x = 0
        box = list(range(256))
        for i in list(range(256)):
            x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
            box[i], box[x] = box[x], box[i]
        x = 0
        y = 0
        out = ''
        for char in data:
            x = (x + 1) % 256
            y = (y + box[x]) % 256
            box[x], box[y] = box[y], box[x]
            out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
        return out

    def __md5(self, s):
        return hashlib.md5(s).hexdigest().encode()

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        GK = (
            b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
            b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
            b'TnpsbA0KTVRkbU1tSTRNdz09'
        )

        # Get video webpage
        webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
        webpage = self._download_webpage(webpage_url, video_id)

        mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
        if mobj is not None:
            self.report_extraction(video_id)
            video_url = mobj.group(1) + '.flv'

            video_title = self._html_search_regex('<title>([^<]+)</title>',
                                                  webpage, 'title')

            return {
                'id': video_id,
                'url': video_url,
                'title': video_title,
            }

        mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
        if mobj is not None:
            request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
            response = self._download_webpage(request, video_id,
                                              'Downloading video info')
            info = json.loads(base64.b64decode(response).decode('utf-8'))
            return {
                'id': video_id,
                'title': info['title'],
                'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
                'play_path': info['filename'],
                'ext': 'flv',
                'thumbnail': info['thumbnail'][0]['url'],
            }

        # try encxml
        mobj = re.search('var flashvars={(.+?)}', webpage)
        if mobj is None:
            raise ExtractorError('Unable to extract video')

        params = {}
        encxml = ''
        sec = mobj.group(1)
        for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
            if not a == '_encxml':
                params[a] = b
            else:
                encxml = compat_urllib_parse_unquote(b)
        if not params.get('domain'):
            params['domain'] = 'www.myvideo.de'
        xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))
        if 'flash_playertype=MTV' in xmldata_url:
            self._downloader.report_warning('avoiding MTV player')
            xmldata_url = (
                'http://www.myvideo.de/dynamic/get_player_video_xml.php'
                '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
            ) % video_id

        # get enc data
        enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
        enc_data_b = binascii.unhexlify(enc_data)
        sk = self.__md5(
            base64.b64decode(base64.b64decode(GK)) +
            self.__md5(
                str(video_id).encode('utf-8')
            )
        )
        dec_data = self.__rc4crypt(enc_data_b, sk)

        # extracting infos
        self.report_extraction(video_id)

        video_url = None
        mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
        if mobj:
            video_url = compat_urllib_parse_unquote(mobj.group(1))
            if 'myvideo2flash' in video_url:
                self.report_warning(
                    'Rewriting URL to use unencrypted rtmp:// ...',
                    video_id)
                video_url = video_url.replace('rtmpe://', 'rtmp://')

        if not video_url:
            # extract non rtmp videos
            mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
            if mobj is None:
                raise ExtractorError('unable to extract url')
            video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))

        video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
        video_file = compat_urllib_parse_unquote(video_file)

        if not video_file.endswith('f4m'):
            ppath, prefix = video_file.split('.')
            video_playpath = '%s:%s' % (prefix, ppath)
        else:
            video_playpath = ''

        video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
        video_swfobj = compat_urllib_parse_unquote(video_swfobj)

        video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
                                              webpage, 'title')

        return {
            'id': video_id,
            'url': video_url,
            'tc_url': video_url,
            'title': video_title,
            'ext': 'flv',
            'play_path': video_playpath,
            'player_url': video_swfobj,
        }
[myvideo] Modernize 10 years ago			`from __future__ import unicode_literals`

Move MyVideo into its own file 11 years ago			`import binascii`
			`import base64`
			`import hashlib`
			`import re`
[myvideo] add support for videos that place the video info inside www.myvideo.de/service/data/video/{id}/config (fixes #616) 11 years ago			`import json`
Move MyVideo into its own file 11 years ago
			`from .common import InfoExtractor`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 10 years ago			`from ..compat import (`
Move MyVideo into its own file 11 years ago			`compat_ord,`
[myvideo] Use compat_urllib_parse_unquote 9 years ago			`compat_urllib_parse_unquote,`
[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode Closes #8974 8 years ago			`compat_urllib_parse_urlencode,`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 10 years ago			`)`
			`from ..utils import (`
Move MyVideo into its own file 11 years ago			`ExtractorError,`
Switch codebase to use sanitized_Request instead of compat_urllib_request.Request [downloader/dash] Use sanitized_Request [downloader/http] Use sanitized_Request [atresplayer] Use sanitized_Request [bambuser] Use sanitized_Request [bliptv] Use sanitized_Request [brightcove] Use sanitized_Request [cbs] Use sanitized_Request [ceskatelevize] Use sanitized_Request [collegerama] Use sanitized_Request [extractor/common] Use sanitized_Request [crunchyroll] Use sanitized_Request [dailymotion] Use sanitized_Request [dcn] Use sanitized_Request [dramafever] Use sanitized_Request [dumpert] Use sanitized_Request [eitb] Use sanitized_Request [escapist] Use sanitized_Request [everyonesmixtape] Use sanitized_Request [extremetube] Use sanitized_Request [facebook] Use sanitized_Request [fc2] Use sanitized_Request [flickr] Use sanitized_Request [4tube] Use sanitized_Request [gdcvault] Use sanitized_Request [extractor/generic] Use sanitized_Request [hearthisat] Use sanitized_Request [hotnewhiphop] Use sanitized_Request [hypem] Use sanitized_Request [iprima] Use sanitized_Request [ivi] Use sanitized_Request [keezmovies] Use sanitized_Request [letv] Use sanitized_Request [lynda] Use sanitized_Request [metacafe] Use sanitized_Request [minhateca] Use sanitized_Request [miomio] Use sanitized_Request [meovideo] Use sanitized_Request [mofosex] Use sanitized_Request [moniker] Use sanitized_Request [mooshare] Use sanitized_Request [movieclips] Use sanitized_Request [mtv] Use sanitized_Request [myvideo] Use sanitized_Request [neteasemusic] Use sanitized_Request [nfb] Use sanitized_Request [niconico] Use sanitized_Request [noco] Use sanitized_Request [nosvideo] Use sanitized_Request [novamov] Use sanitized_Request [nowness] Use sanitized_Request [nuvid] Use sanitized_Request [played] Use sanitized_Request [pluralsight] Use sanitized_Request [pornhub] Use sanitized_Request [pornotube] Use sanitized_Request [primesharetv] Use sanitized_Request [promptfile] Use sanitized_Request [qqmusic] Use sanitized_Request [rtve] Use sanitized_Request [safari] Use sanitized_Request [sandia] Use sanitized_Request [shared] Use sanitized_Request [sharesix] Use sanitized_Request [sina] Use sanitized_Request [smotri] Use sanitized_Request [sohu] Use sanitized_Request [spankwire] Use sanitized_Request [sportdeutschland] Use sanitized_Request [streamcloud] Use sanitized_Request [streamcz] Use sanitized_Request [tapely] Use sanitized_Request [tube8] Use sanitized_Request [tubitv] Use sanitized_Request [twitch] Use sanitized_Request [twitter] Use sanitized_Request [udemy] Use sanitized_Request [vbox7] Use sanitized_Request [veoh] Use sanitized_Request [vessel] Use sanitized_Request [vevo] Use sanitized_Request [viddler] Use sanitized_Request [videomega] Use sanitized_Request [viewvster] Use sanitized_Request [viki] Use sanitized_Request [vk] Use sanitized_Request [vodlocker] Use sanitized_Request [voicerepublic] Use sanitized_Request [wistia] Use sanitized_Request [xfileshare] Use sanitized_Request [xtube] Use sanitized_Request [xvideos] Use sanitized_Request [yandexmusic] Use sanitized_Request [youku] Use sanitized_Request [youporn] Use sanitized_Request [youtube] Use sanitized_Request [patreon] Use sanitized_Request [extractor/common] Remove unused import [nfb] PEP 8 9 years ago			`sanitized_Request,`
Move MyVideo into its own file 11 years ago			`)`


			`class MyVideoIE(InfoExtractor):`
[myvideo] Mark broken 8 years ago			`_WORKING = False`
Add support for https for all extractors as preventive and future-proof measure 8 years ago			`_VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'`
[myvideo] Modernize 10 years ago			`IE_NAME = 'myvideo'`
Move tests to the IE definitions 11 years ago			`_TEST = {`
[myvideo] Modernize 10 years ago			`'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',`
			`'md5': '2d2753e8130479ba2cb7e0a37002053e',`
			`'info_dict': {`
			`'id': '8229274',`
			`'ext': 'flv',`
			`'title': 'bowling-fail-or-win',`
Move tests to the IE definitions 11 years ago			`}`
			`}`
Move MyVideo into its own file 11 years ago
			`# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git`
			`# Released into the Public Domain by Tristan Fischer on 2013-05-19`
			`# https://github.com/rg3/youtube-dl/pull/842`
PEP8 applied 10 years ago			`def __rc4crypt(self, data, key):`
Move MyVideo into its own file 11 years ago			`x = 0`
			`box = list(range(256))`
			`for i in list(range(256)):`
			`x = (x + box[i] + compat_ord(key[i % len(key)])) % 256`
			`box[i], box[x] = box[x], box[i]`
			`x = 0`
			`y = 0`
			`out = ''`
			`for char in data:`
			`x = (x + 1) % 256`
			`y = (y + box[x]) % 256`
			`box[x], box[y] = box[y], box[x]`
			`out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])`
			`return out`

PEP8 applied 10 years ago			`def __md5(self, s):`
Move MyVideo into its own file 11 years ago			`return hashlib.md5(s).hexdigest().encode()`

PEP8 applied 10 years ago			`def _real_extract(self, url):`
Move MyVideo into its own file 11 years ago			`mobj = re.match(self._VALID_URL, url)`
[myvideo] Modernize 10 years ago			`video_id = mobj.group('id')`
Move MyVideo into its own file 11 years ago
			`GK = (`
Fix all PEP8 issues except E501 10 years ago			`b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'`
			`b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'`
			`b'TnpsbA0KTVRkbU1tSTRNdz09'`
Move MyVideo into its own file 11 years ago			`)`

			`# Get video webpage`
			`webpage_url = 'http://www.myvideo.de/watch/%s' % video_id`
			`webpage = self._download_webpage(webpage_url, video_id)`

			`mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)`
			`if mobj is not None:`
			`self.report_extraction(video_id)`
			`video_url = mobj.group(1) + '.flv'`

			`video_title = self._html_search_regex('<title>([^<]+)</title>',`
PEP8: applied even more rules 10 years ago			`webpage, 'title')`
Move MyVideo into its own file 11 years ago
[myvideo] Modernize 10 years ago			`return {`
			`'id': video_id,`
			`'url': video_url,`
			`'title': video_title,`
			`}`
Move MyVideo into its own file 11 years ago
[myvideo] add support for videos that place the video info inside www.myvideo.de/service/data/video/{id}/config (fixes #616) 11 years ago			`mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)`
			`if mobj is not None:`
Switch codebase to use sanitized_Request instead of compat_urllib_request.Request [downloader/dash] Use sanitized_Request [downloader/http] Use sanitized_Request [atresplayer] Use sanitized_Request [bambuser] Use sanitized_Request [bliptv] Use sanitized_Request [brightcove] Use sanitized_Request [cbs] Use sanitized_Request [ceskatelevize] Use sanitized_Request [collegerama] Use sanitized_Request [extractor/common] Use sanitized_Request [crunchyroll] Use sanitized_Request [dailymotion] Use sanitized_Request [dcn] Use sanitized_Request [dramafever] Use sanitized_Request [dumpert] Use sanitized_Request [eitb] Use sanitized_Request [escapist] Use sanitized_Request [everyonesmixtape] Use sanitized_Request [extremetube] Use sanitized_Request [facebook] Use sanitized_Request [fc2] Use sanitized_Request [flickr] Use sanitized_Request [4tube] Use sanitized_Request [gdcvault] Use sanitized_Request [extractor/generic] Use sanitized_Request [hearthisat] Use sanitized_Request [hotnewhiphop] Use sanitized_Request [hypem] Use sanitized_Request [iprima] Use sanitized_Request [ivi] Use sanitized_Request [keezmovies] Use sanitized_Request [letv] Use sanitized_Request [lynda] Use sanitized_Request [metacafe] Use sanitized_Request [minhateca] Use sanitized_Request [miomio] Use sanitized_Request [meovideo] Use sanitized_Request [mofosex] Use sanitized_Request [moniker] Use sanitized_Request [mooshare] Use sanitized_Request [movieclips] Use sanitized_Request [mtv] Use sanitized_Request [myvideo] Use sanitized_Request [neteasemusic] Use sanitized_Request [nfb] Use sanitized_Request [niconico] Use sanitized_Request [noco] Use sanitized_Request [nosvideo] Use sanitized_Request [novamov] Use sanitized_Request [nowness] Use sanitized_Request [nuvid] Use sanitized_Request [played] Use sanitized_Request [pluralsight] Use sanitized_Request [pornhub] Use sanitized_Request [pornotube] Use sanitized_Request [primesharetv] Use sanitized_Request [promptfile] Use sanitized_Request [qqmusic] Use sanitized_Request [rtve] Use sanitized_Request [safari] Use sanitized_Request [sandia] Use sanitized_Request [shared] Use sanitized_Request [sharesix] Use sanitized_Request [sina] Use sanitized_Request [smotri] Use sanitized_Request [sohu] Use sanitized_Request [spankwire] Use sanitized_Request [sportdeutschland] Use sanitized_Request [streamcloud] Use sanitized_Request [streamcz] Use sanitized_Request [tapely] Use sanitized_Request [tube8] Use sanitized_Request [tubitv] Use sanitized_Request [twitch] Use sanitized_Request [twitter] Use sanitized_Request [udemy] Use sanitized_Request [vbox7] Use sanitized_Request [veoh] Use sanitized_Request [vessel] Use sanitized_Request [vevo] Use sanitized_Request [viddler] Use sanitized_Request [videomega] Use sanitized_Request [viewvster] Use sanitized_Request [viki] Use sanitized_Request [vk] Use sanitized_Request [vodlocker] Use sanitized_Request [voicerepublic] Use sanitized_Request [wistia] Use sanitized_Request [xfileshare] Use sanitized_Request [xtube] Use sanitized_Request [xvideos] Use sanitized_Request [yandexmusic] Use sanitized_Request [youku] Use sanitized_Request [youporn] Use sanitized_Request [youtube] Use sanitized_Request [patreon] Use sanitized_Request [extractor/common] Remove unused import [nfb] PEP 8 9 years ago			`request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')`
[myvideo] add support for videos that place the video info inside www.myvideo.de/service/data/video/{id}/config (fixes #616) 11 years ago			`response = self._download_webpage(request, video_id,`
[myvideo] Modernize 10 years ago			`'Downloading video info')`
[myvideo] add support for videos that place the video info inside www.myvideo.de/service/data/video/{id}/config (fixes #616) 11 years ago			`info = json.loads(base64.b64decode(response).decode('utf-8'))`
[myvideo] Modernize 10 years ago			`return {`
			`'id': video_id,`
			`'title': info['title'],`
			`'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),`
			`'play_path': info['filename'],`
			`'ext': 'flv',`
			`'thumbnail': info['thumbnail'][0]['url'],`
			`}`
[myvideo] add support for videos that place the video info inside www.myvideo.de/service/data/video/{id}/config (fixes #616) 11 years ago
Move MyVideo into its own file 11 years ago			`# try encxml`
			`mobj = re.search('var flashvars={(.+?)}', webpage)`
			`if mobj is None:`
[myvideo] Modernize 10 years ago			`raise ExtractorError('Unable to extract video')`
Move MyVideo into its own file 11 years ago
			`params = {}`
			`encxml = ''`
			`sec = mobj.group(1)`
			`for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):`
			`if not a == '_encxml':`
			`params[a] = b`
			`else:`
[myvideo] Use compat_urllib_parse_unquote 9 years ago			`encxml = compat_urllib_parse_unquote(b)`
Move MyVideo into its own file 11 years ago			`if not params.get('domain'):`
			`params['domain'] = 'www.myvideo.de'`
[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode Closes #8974 8 years ago			`xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))`
Move MyVideo into its own file 11 years ago			`if 'flash_playertype=MTV' in xmldata_url:`
[myvideo] Modernize 10 years ago			`self._downloader.report_warning('avoiding MTV player')`
Move MyVideo into its own file 11 years ago			`xmldata_url = (`
			`'http://www.myvideo.de/dynamic/get_player_video_xml.php'`
			`'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'`
			`) % video_id`

			`# get enc data`
			`enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]`
			`enc_data_b = binascii.unhexlify(enc_data)`
			`sk = self.__md5(`
			`base64.b64decode(base64.b64decode(GK)) +`
			`self.__md5(`
			`str(video_id).encode('utf-8')`
			`)`
			`)`
			`dec_data = self.__rc4crypt(enc_data_b, sk)`

			`# extracting infos`
			`self.report_extraction(video_id)`

			`video_url = None`
			`mobj = re.search('connectionurl=\'(.*?)\'', dec_data)`
			`if mobj:`
[myvideo] Use compat_urllib_parse_unquote 9 years ago			`video_url = compat_urllib_parse_unquote(mobj.group(1))`
Move MyVideo into its own file 11 years ago			`if 'myvideo2flash' in video_url:`
[myvideo] Use RTMP instead of RTMPT (Fixes #2032) 11 years ago			`self.report_warning(`
[myvideo] Modernize 10 years ago			`'Rewriting URL to use unencrypted rtmp:// ...',`
[myvideo] Use RTMP instead of RTMPT (Fixes #2032) 11 years ago			`video_id)`
			`video_url = video_url.replace('rtmpe://', 'rtmp://')`
Move MyVideo into its own file 11 years ago
			`if not video_url:`
			`# extract non rtmp videos`
			`mobj = re.search('path=\'(http.?)\' source=\'(.?)\'', dec_data)`
			`if mobj is None:`
[myvideo] Modernize 10 years ago			`raise ExtractorError('unable to extract url')`
[myvideo] Use compat_urllib_parse_unquote 9 years ago			`video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))`
Move MyVideo into its own file 11 years ago
[myvideo] Modernize 10 years ago			`video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')`
[myvideo] Use compat_urllib_parse_unquote 9 years ago			`video_file = compat_urllib_parse_unquote(video_file)`
Move MyVideo into its own file 11 years ago
			`if not video_file.endswith('f4m'):`
			`ppath, prefix = video_file.split('.')`
			`video_playpath = '%s:%s' % (prefix, ppath)`
			`else:`
			`video_playpath = ''`

Fix some regexes 7 years ago			`video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')`
[myvideo] Use compat_urllib_parse_unquote 9 years ago			`video_swfobj = compat_urllib_parse_unquote(video_swfobj)`
Move MyVideo into its own file 11 years ago
			`video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",`
PEP8: applied even more rules 10 years ago			`webpage, 'title')`
[myvideo] Modernize 10 years ago
			`return {`
			`'id': video_id,`
			`'url': video_url,`
			`'tc_url': video_url,`
			`'title': video_title,`
			`'ext': 'flv',`
			`'play_path': video_playpath,`
			`'player_url': video_swfobj,`
			`}`