Fix imports and general cleanup

· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions
pull/4459/head
Philipp Hagemeister 9 years ago
parent 20e35880bf
commit 1cc79574fc

@ -5,8 +5,8 @@ import re
import sys
import time
from ..compat import compat_str
from ..utils import (
compat_str,
encodeFilename,
format_bytes,
timeconvert,

@ -9,10 +9,12 @@ import xml.etree.ElementTree as etree
from .common import FileDownloader
from .http import HttpFD
from ..compat import (
compat_urlparse,
)
from ..utils import (
struct_pack,
struct_unpack,
compat_urlparse,
format_bytes,
encodeFilename,
sanitize_open,

@ -6,9 +6,11 @@ import subprocess
from ..postprocessor.ffmpeg import FFmpegPostProcessor
from .common import FileDownloader
from ..utils import (
from ..compat import (
compat_urlparse,
compat_urllib_request,
)
from ..utils import (
check_executable,
encodeFilename,
)

@ -4,11 +4,12 @@ import os
import time
from .common import FileDownloader
from ..utils import (
from ..compat import (
compat_urllib_request,
compat_urllib_error,
)
from ..utils import (
ContentTooShortError,
encodeFilename,
sanitize_open,
format_bytes,

@ -7,9 +7,9 @@ import sys
import time
from .common import FileDownloader
from ..compat import compat_str
from ..utils import (
check_executable,
compat_str,
encodeFilename,
format_bytes,
get_exe_version,

@ -5,10 +5,9 @@ import re
import json
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
compat_str,
qualities,
determine_ext,
)
@ -75,9 +74,7 @@ class AllocineIE(InfoExtractor):
'format_id': format_id,
'quality': quality(format_id),
'url': v,
'ext': determine_ext(v),
})
self._sort_formats(formats)
return {

@ -68,4 +68,3 @@ class AolIE(InfoExtractor):
'title': title,
'entries': entries,
}

@ -4,8 +4,8 @@ import re
import json
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
compat_urlparse,
int_or_none,
)

@ -3,8 +3,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
compat_urllib_parse,
determine_ext,
ExtractorError,
)

@ -5,7 +5,7 @@ import json
import itertools
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
)

@ -4,9 +4,11 @@ import json
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
ExtractorError,
)

@ -1,8 +1,8 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
compat_urllib_parse,
xpath_text,
xpath_with_ns,
int_or_none,

@ -4,8 +4,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import (
compat_parse_qs,
ExtractorError,
int_or_none,
unified_strdate,
@ -29,10 +29,9 @@ class BiliBiliIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_code = self._search_regex(
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')

@ -6,20 +6,21 @@ import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
find_xpath_attr,
fix_xml_ampersands,
compat_urlparse,
compat_str,
compat_urllib_request,
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
determine_ext,
ExtractorError,
unsmuggle_url,
find_xpath_attr,
fix_xml_ampersands,
unescapeHTML,
unsmuggle_url,
)

@ -4,10 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
compat_urllib_parse_urlparse,
)
from ..utils import (
ExtractorError,
)

@ -3,9 +3,11 @@ from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor
from ..utils import (
from ..compat import (
compat_str,
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
float_or_none,
unified_strdate,

@ -5,12 +5,14 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
orderedSet,
compat_urllib_parse_urlparse,
compat_urlparse,
)
from ..utils import (
orderedSet,
)
class CondeNastIE(InfoExtractor):

@ -10,10 +10,12 @@ import xml.etree.ElementTree
from hashlib import sha1
from math import pow, sqrt, floor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
ExtractorError,
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
bytes_to_intlist,
intlist_to_bytes,
unified_strdate,

@ -8,13 +8,15 @@ import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_urllib_request,
from ..compat import (
compat_str,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
int_or_none,
orderedSet,
str_to_int,
int_or_none,
ExtractorError,
unescapeHTML,
)

@ -5,7 +5,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)

@ -1,8 +1,6 @@
from __future__ import unicode_literals
import re
from ..utils import (
from ..compat import (
compat_urllib_parse,
)
from .common import InfoExtractor
@ -24,11 +22,10 @@ class EHowIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
webpage, 'video URL')
video_url = self._search_regex(
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
final_url = compat_urllib_parse.unquote(video_url)
uploader = self._html_search_meta('uploader', webpage)
title = self._og_search_title(webpage).replace(' | eHow', '')

@ -6,7 +6,7 @@ import random
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
)

@ -3,9 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
)

@ -3,8 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
)

@ -3,16 +3,18 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
from ..utils import (
str_to_int,
)
class ExtremeTubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)

@ -1,19 +1,20 @@
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import hashlib
from .common import InfoExtractor
from ..utils import (
ExtractorError,
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
)
class FC2IE(InfoExtractor):
_VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
IE_NAME = 'fc2'
_TEST = {
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
@ -26,9 +27,7 @@ class FC2IE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
self._downloader.cookiejar.clear_session_cookies() # must clear

@ -4,11 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
)
class FiredriveIE(InfoExtractor):
@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
url = 'http://firedrive.com/file/%s' % video_id
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:

@ -3,12 +3,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
unified_strdate,
str_to_int,
parse_duration,
)
from ..utils import (
clean_html,
parse_duration,
str_to_int,
unified_strdate,
)
@ -31,9 +33,7 @@ class FourTubeIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage_url = 'http://www.4tube.com/videos/' + video_id
webpage = self._download_webpage(webpage_url, video_id)

@ -5,7 +5,7 @@ import json
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_parse_qs,
compat_urlparse,
)

@ -6,13 +6,15 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse_urlparse,
compat_urlparse,
ExtractorError,
)
from ..utils import (
clean_html,
parse_duration,
compat_urllib_parse_urlparse,
ExtractorError,
int_or_none,
parse_duration,
)

@ -4,9 +4,11 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
unescapeHTML,
)

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)

@ -2,8 +2,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urlparse,
)
from ..utils import (
determine_ext,
)

@ -4,7 +4,7 @@ import itertools
import re
from .common import SearchInfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)

@ -4,11 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
int_or_none,
)
@ -106,7 +107,6 @@ class GorillaVidIE(InfoExtractor):
formats = [{
'format_id': 'sd',
'url': video_url,
'ext': determine_ext(video_url),
'quality': 1,
}]

@ -4,9 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
compat_urllib_request,
int_or_none,
urlencode_postdata,
)
@ -30,9 +32,7 @@ class HostingBulkIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
# Custom request with cookie to set language to English, so our file

@ -1,20 +1,20 @@
from __future__ import unicode_literals
import json
import re
import time
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
)
class HypemIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
_TEST = {
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
@ -27,8 +27,7 @@ class HypemIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
track_id = mobj.group(1)
track_id = self._match_id(url)
data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data)

@ -4,7 +4,7 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urlparse,
)

@ -1,10 +1,9 @@
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)
@ -24,9 +23,7 @@ class InfoQIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')

@ -3,9 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urlparse,
compat_urllib_parse,
)
from ..utils import (
xpath_with_ns,
)

@ -6,8 +6,10 @@ from random import random
from math import floor
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
)

@ -5,8 +5,10 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
)

@ -4,7 +4,7 @@ import os
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
@ -15,7 +15,7 @@ from ..aes import (
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
_TEST = {
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
'file': '1214711.mp4',
@ -27,8 +27,7 @@ class KeezMoviesIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')

@ -4,10 +4,12 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
compat_urllib_parse_urlparse,
compat_urlparse,
)
from ..utils import (
ExtractorError,
find_xpath_attr,
int_or_none,

@ -5,12 +5,14 @@ import json
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
int_or_none,
compat_str,
)

@ -1,43 +1,33 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)
class MalemotionIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
_VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
_TEST = {
'url': 'http://malemotion.com/video/bien-dur.10ew',
'file': '10ew.mp4',
'md5': 'b3cc49f953b107e4a363cdff07d100ce',
'url': 'http://malemotion.com/video/bete-de-concours.ltc',
'md5': '3013e53a0afbde2878bc39998c33e8a5',
'info_dict': {
"title": "Bien dur",
"age_limit": 18,
'id': 'ltc',
'ext': 'mp4',
'title': 'Bête de Concours',
'age_limit': 18,
},
'skip': 'This video has been deleted.'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group("id")
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
# Extract video URL
video_url = compat_urllib_parse.unquote(
self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
# Extract title
video_url = compat_urllib_parse.unquote(self._search_regex(
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
video_title = self._html_search_regex(
r'<title>(.*?)</title', webpage, 'title')
# Extract video thumbnail
video_thumbnail = self._search_regex(
r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
@ -47,14 +37,12 @@ class MalemotionIE(InfoExtractor):
'format_id': 'mp4',
'preference': 1,
}]
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'uploader': None,
'upload_date': None,
'title': video_title,
'thumbnail': video_thumbnail,
'description': None,
'age_limit': 18,
}

@ -3,10 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_parse_qs,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,

@ -5,8 +5,10 @@ import json
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
from ..compat import (
compat_urlparse,
)
from ..utils import (
clean_html,
ExtractorError,
get_element_by_id,

@ -1,12 +1,13 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
get_element_by_attribute,
parse_duration,
strip_jsonp,
@ -15,7 +16,7 @@ from ..utils import (
class MiTeleIE(InfoExtractor):
IE_NAME = 'mitele.es'
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
_TEST = {
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
@ -31,12 +32,10 @@ class MiTeleIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
episode = mobj.group('episode')
episode = self._match_id(url)
webpage = self._download_webpage(url, episode)
embed_data_json = self._search_regex(
r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
flags=re.DOTALL
r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
).replace('\'', '"')
embed_data = json.loads(embed_data_json)

@ -3,8 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
HEADRequest,
int_or_none,

@ -5,10 +5,12 @@ import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
int_or_none,
)

@ -4,7 +4,7 @@ import os
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
@ -12,7 +12,7 @@ from ..utils import (
class MofosexIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
_VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)'
_TEST = {
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
@ -26,7 +26,7 @@ class MofosexIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)

@ -5,7 +5,7 @@ import os.path
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
@ -37,10 +37,9 @@ class MonikerIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
orig_webpage = self._download_webpage(url, video_id)
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
data = dict(fields)

@ -4,11 +4,13 @@ import re
import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
)
class MooshareIE(InfoExtractor):
@ -43,9 +45,7 @@ class MooshareIE(InfoExtractor):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
page = self._download_webpage(url, video_id, 'Downloading page')
if re.search(r'>Video Not Found or Deleted<', page) is not None:

@ -3,13 +3,14 @@ from __future__ import unicode_literals
import hashlib
import json
import re
import time
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_parse_qs,
compat_str,
)
from ..utils import (
int_or_none,
)
@ -32,10 +33,9 @@ class MotorsportIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
flashvars_code = self._html_search_regex(
r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
flashvars = compat_parse_qs(flashvars_code)

@ -3,9 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
)
from ..utils import (
ExtractorError,
compat_str,
clean_html,
)

@ -3,9 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,

@ -2,9 +2,10 @@ from __future__ import unicode_literals
import os.path
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
ExtractorError,
)

@ -4,8 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
clean_html,
)
@ -26,9 +28,9 @@ class NaverIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:

@ -4,8 +4,10 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
)
from ..utils import (
ExtractorError,
find_xpath_attr,
)

@ -1,9 +1,7 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
@ -12,7 +10,7 @@ from ..utils import (
class NFBIE(InfoExtractor):
IE_NAME = 'nfb'
IE_DESC = 'National Film Board of Canada'
_VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
_VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
_TEST = {
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
@ -32,10 +30,10 @@ class NFBIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
video_id = self._match_id(url)
page = self._download_webpage(
'https://www.nfb.ca/film/%s' % video_id, video_id,
'Downloading film page')
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
page, 'director id', fatal=False)

@ -4,9 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
ExtractorError,
compat_urllib_parse_urlparse,
int_or_none,
remove_end,
)

@ -5,14 +5,16 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
unified_strdate,
parse_duration,
int_or_none,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
unified_strdate,
)

@ -6,13 +6,15 @@ import time
import hashlib
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
from ..compat import (
compat_str,
compat_urllib_parse,
ExtractorError,
compat_urllib_request,
)
from ..utils import (
clean_html,
ExtractorError,
unified_strdate,
compat_str,
)

@ -4,9 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
compat_urllib_request,
urlencode_postdata,
xpath_text,
xpath_with_ns,
@ -32,8 +34,7 @@ class NosVideoIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
fields = {
'id': video_id,

@ -3,9 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..utils import (
ExtractorError,
compat_urlparse
)

@ -3,15 +3,17 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_duration,
unified_strdate,
compat_urllib_request,
)
class NuvidIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://m.nuvid.com/video/1310741/',
'md5': 'eab207b7ac4fccfb4e23c86201f11277',
@ -26,8 +28,7 @@ class NuvidIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
formats = []

@ -4,16 +4,17 @@ import json
import re
from .common import InfoExtractor
from ..utils import compat_urllib_parse
from ..compat import compat_urllib_parse
class PhotobucketIE(InfoExtractor):
_VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
_TEST = {
'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
'file': 'zpsc0c3b9fa.mp4',
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
'info_dict': {
'id': 'zpsc0c3b9fa',
'ext': 'mp4',
'timestamp': 1367669341,
'upload_date': '20130504',
'uploader': 'rachaneronas',

@ -5,11 +5,13 @@ import re
import os.path
from .common import InfoExtractor
from ..utils import (
ExtractorError,
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
)
class PlayedIE(InfoExtractor):
@ -28,7 +30,6 @@ class PlayedIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
orig_webpage = self._download_webpage(url, video_id)
m_error = re.search(

@ -4,9 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,

@ -3,31 +3,31 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
clean_html,
compat_urllib_parse,
ExtractorError,
)
class PlayvidIE(InfoExtractor):
_VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
_VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
_TEST = {
'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
'md5': '44930f8afa616efdf9482daf4fe53e1e',
'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
'info_dict': {
'id': 'agbDDi7WZTV',
'id': 'RnmBNgtrrJu',
'ext': 'mp4',
'title': 'Michelle Lewin in Miami Beach',
'duration': 240,
'title': 'md5:9256d01c6317e3f703848b5906880dc8',
'duration': 82,
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m_error = re.search(

@ -4,10 +4,12 @@ import os
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
from ..utils import (
str_to_int,
)
from ..aes import (
@ -16,7 +18,7 @@ from ..aes import (
class PornHubIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
_TEST = {
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': '882f488fa1f0026f023f33576004a2ed',

@ -4,12 +4,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
determine_ext,
ExtractorError,
)
class PromptFileIE(InfoExtractor):

@ -5,8 +5,10 @@ import re
from hashlib import sha1
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)
from ..utils import (
unified_strdate,
)

@ -3,8 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urlparse,
)
from ..utils import (
determine_ext,
int_or_none,
)

@ -3,10 +3,12 @@ from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_urllib_parse,
)
from ..utils import (
parse_duration,
unified_strdate,
compat_urllib_parse,
)

@ -4,12 +4,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
)
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
unescapeHTML,
compat_str,
)

@ -5,10 +5,12 @@ import re
import itertools
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
unified_strdate,
)
from ..utils import (
ExtractorError,
unified_strdate,
)
@ -36,9 +38,7 @@ class RutubeIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
video = self._download_json(
'http://rutube.ru/api/video/%s/?format=json' % video_id,
video_id, 'Downloading video JSON')
@ -114,8 +114,7 @@ class RutubeMovieIE(RutubeChannelIE):
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
movie_id = mobj.group('id')
movie_id = self._match_id(url)
movie = self._download_json(
self._MOVIE_TEMPLATE % movie_id, movie_id,
'Downloading movie JSON')

@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
from ..compat import (
compat_parse_qs,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
)
class ScreencastIE(InfoExtractor):
@ -57,8 +57,7 @@ class ScreencastIE(InfoExtractor):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(

@ -4,10 +4,12 @@ import re
import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
compat_urllib_request,
compat_urllib_parse,
int_or_none,
)
@ -26,26 +28,30 @@ class SharedIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id)
if re.search(r'>File does not exist<', page) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
if '>File does not exist<' in webpage:
raise ExtractorError(
'Video %s does not exist' % video_id, expected=True)
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
download_form = dict(re.findall(
r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage))
request = compat_urllib_request.Request(
url, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
video_page = self._download_webpage(request, video_id, 'Downloading video page')
video_page = self._download_webpage(
request, video_id, 'Downloading video page')
video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
video_url = self._html_search_regex(
r'data-url="([^"]+)"', video_page, 'video URL')
title = base64.b64decode(self._html_search_meta(
'full:title', webpage, 'title')).decode('utf-8')
filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False))
thumbnail = self._html_search_regex(
r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)
return {
'id': video_id,

@ -4,9 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
parse_duration,
)

@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)

@ -4,8 +4,10 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urlparse,
)
from ..utils import (
ExtractorError,
)

@ -7,9 +7,11 @@ import hashlib
import uuid
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
int_or_none,
unified_strdate,

@ -1,13 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
from ..utils import (
ExtractorError,
import re
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
determine_ext,
ExtractorError,
)
import re
from .common import InfoExtractor
@ -27,9 +30,7 @@ class SockshareIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
url = 'http://sockshare.com/file/%s' % video_id
webpage = self._download_webpage(url, video_id)

@ -5,11 +5,12 @@ import re
import itertools
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
compat_urlparse,
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
int_or_none,
unified_strdate,

@ -3,12 +3,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
unified_strdate,
)
from ..utils import (
str_to_int,
unified_strdate,
)
from ..aes import aes_decrypt_text

@ -4,8 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_iso8601,
)

@ -5,7 +5,7 @@ import re
import time
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)

@ -1,13 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..compat import (
compat_str,
)
from ..utils import (
int_or_none,
)

@ -4,10 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
clean_html,
compat_urllib_request,
ExtractorError,
float_or_none,
parse_iso8601,
)

@ -5,7 +5,7 @@ import re
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
from ..compat import (
compat_str,
)

@ -4,8 +4,10 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
)
from ..utils import (
determine_ext,
ExtractorError,
xpath_with_ns,

@ -5,7 +5,7 @@ import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from .discovery import DiscoveryIE
from ..utils import compat_urlparse
from ..compat import compat_urlparse
class TlcIE(DiscoveryIE):

@ -4,9 +4,11 @@ import json
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import (
int_or_none,
str_to_int,
)

@ -1,10 +1,9 @@
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..utils import compat_parse_qs
from ..compat import compat_parse_qs
class TutvIE(InfoExtractor):
@ -20,10 +19,9 @@ class TutvIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
data_content = self._download_webpage(

@ -5,9 +5,11 @@ import itertools
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
parse_iso8601,
)

@ -3,9 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
)

@ -1,11 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urllib_parse,
)
from ..utils import (
unified_strdate,
)

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_urlparse,
)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save