Improve geo bypass mechanism

* Rename options to preffixly match with --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors * Implement faking IP right away for sites with known geo restriction
7 years ago · 4248dad92b
parent 0a840f584c
commit 4248dad92b
13 changed files with 71 additions and 30 deletions
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -323,10 +323,15 @@ class InfoExtractor(object):
    _real_extract() methods and define a _VALID_URL regexp.
    Probably, they should also be added to the list of extractors.
-    _BYPASS_GEO attribute may be set to False in order to disable
+    _GEO_BYPASS attribute may be set to False in order to disable
    geo restriction bypass mechanisms for a particular extractor.
    Though it won't disable explicit geo restriction bypass based on
-    country code provided with geo_bypass_country.
+    country code provided with geo_bypass_country. (experimental)
    _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
    countries for this extractor. One of these countries will be used by
    geo restriction bypass mechanism right away in order to bypass
    geo restriction, of course, if the mechanism is not disabled. (experimental)
    Finally, the _WORKING attribute should be set to False for broken IEs
    in order to warn the users and skip the tests.
@ -335,7 +340,8 @@ class InfoExtractor(object):
    _ready = False
    _downloader = None
    _x_forwarded_for_ip = None
-    _BYPASS_GEO = True
+    _GEO_BYPASS = True
    _GEO_COUNTRIES = None
    _WORKING = True
    def __init__(self, downloader=None):
@ -370,13 +376,27 @@ class InfoExtractor(object):
    def initialize(self):
        """Initializes an instance (authentication, etc)."""
        self.__initialize_geo_bypass()
        if not self._ready:
            self._real_initialize()
            self._ready = True
    def __initialize_geo_bypass(self):
        if not self._x_forwarded_for_ip:
            country_code = self._downloader.params.get('geo_bypass_country', None)
            # If there is no explicit country for geo bypass specified and
            # the extractor is known to be geo restricted let's fake IP
            # as X-Forwarded-For right away.
            if (not country_code and
                    self._GEO_BYPASS and
                    self._downloader.params.get('geo_bypass', True) and
                    self._GEO_COUNTRIES):
                country_code = random.choice(self._GEO_COUNTRIES)
            if country_code:
                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
-        if not self._ready:
+                if self._downloader.params.get('verbose', False):
-            self._real_initialize()
+                    self._downloader.to_stdout(
-            self._ready = True
+                        '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
    def extract(self, url):
        """Extracts URL information and returns it in list of dicts."""
@ -389,16 +409,8 @@ class InfoExtractor(object):
                        ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
                    return ie_result
                except GeoRestrictedError as e:
-                    if (not self._downloader.params.get('geo_bypass_country', None) and
+                    if self.__maybe_fake_ip_and_retry(e.countries):
-                            self._BYPASS_GEO and
+                        continue
                            self._downloader.params.get('geo_bypass', True) and
                            not self._x_forwarded_for_ip and
                            e.countries):
                        self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
                        if self._x_forwarded_for_ip:
                            self.report_warning(
                                'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
                            continue
                    raise
        except ExtractorError:
            raise
@ -407,6 +419,19 @@ class InfoExtractor(object):
        except (KeyError, StopIteration) as e:
            raise ExtractorError('An extractor error has occurred.', cause=e)
    def __maybe_fake_ip_and_retry(self, countries):
        if (not self._downloader.params.get('geo_bypass_country', None) and
                self._GEO_BYPASS and
                self._downloader.params.get('geo_bypass', True) and
                not self._x_forwarded_for_ip and
                countries):
            self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
            if self._x_forwarded_for_ip:
                self.report_warning(
                    'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
                return True
        return False
    def set_downloader(self, downloader):
        """Sets the downloader for this IE."""
        self._downloader = downloader
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@ -20,6 +20,7 @@ from ..utils import (
 class DramaFeverBaseIE(AMPIE):
    _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
    _NETRC_MACHINE = 'dramafever'
    _GEO_COUNTRIES = ['US', 'CA']
    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE):
            if isinstance(e.cause, compat_HTTPError):
                self.raise_geo_restricted(
                    msg='Currently unavailable in your country',
-                    countries=['US', 'CA'])
+                    countries=self._GEO_COUNTRIES)
            raise
        series_id, episode_number = video_id.split('.')
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
        }
    }
    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
    _GEO_COUNTRIES = ['US']
    _TESTS = [{
        'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
        'info_dict': {
@ -104,7 +105,7 @@ class GoIE(AdobePassIE):
                        for error in errors:
                            if error.get('code') == 1002:
                                self.raise_geo_restricted(
-                                    error['message'], countries=['US'])
+                                    error['message'], countries=self._GEO_COUNTRIES)
                        error_message = ', '.join([error['message'] for error in errors])
                        raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
                    asset_url += '?' + entitlement['uplynkData']['sessionKey']
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@ -24,6 +24,7 @@ from ..utils import (
 class ITVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
    _GEO_COUNTRIES = ['GB']
    _TEST = {
        'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
        'info_dict': {
@ -101,7 +102,8 @@ class ITVIE(InfoExtractor):
            fault_code = xpath_text(resp_env, './/faultcode')
            fault_string = xpath_text(resp_env, './/faultstring')
            if fault_code == 'InvalidGeoRegion':
-                self.raise_geo_restricted(msg=fault_string, countries=['GB'])
+                self.raise_geo_restricted(
                    msg=fault_string, countries=self._GEO_COUNTRIES)
            raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
        title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
        video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@ -14,6 +14,7 @@ from ..utils import (
 class NRKBaseIE(InfoExtractor):
    _GEO_COUNTRIES = ['NO']
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor):
            # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
            if 'IsGeoBlocked' in message_type:
                self.raise_geo_restricted(
-                    msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
+                    msg=MESSAGES.get('ProgramIsGeoBlocked'),
                    countries=self._GEO_COUNTRIES)
            raise ExtractorError(
                '%s said: %s' % (self.IE_NAME, MESSAGES.get(
                    message_type, message_type)),
--- a/youtube_dl/extractor/ondemandkorea.py
+++ b/youtube_dl/extractor/ondemandkorea.py
@ -10,6 +10,7 @@ from ..utils import (
 class OnDemandKoreaIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
    _GEO_COUNTRIES = ['US', 'CA']
    _TEST = {
        'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
        'info_dict': {
@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor):
        if 'msg_block_01.png' in webpage:
            self.raise_geo_restricted(
                msg='This content is not available in your region',
-                countries=['US', 'CA'])
+                countries=self._GEO_COUNTRIES)
        if 'This video is only available to ODK PLUS members.' in webpage:
            raise ExtractorError(
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
        )
    ''' % '|'.join(list(zip(*_STATIONS))[0])
    _GEO_COUNTRIES = ['US']
    _TESTS = [
        {
            'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@ -492,7 +494,8 @@ class PBSIE(InfoExtractor):
                message = self._ERRORS.get(
                    redirect_info['http_code'], redirect_info['message'])
                if redirect_info['http_code'] == 403:
-                    self.raise_geo_restricted(msg=message, countries=['US'])
+                    self.raise_geo_restricted(
                        msg=message, countries=self._GEO_COUNTRIES)
                raise ExtractorError(
                    '%s said: %s' % (self.IE_NAME, message), expected=True)
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@ -14,7 +14,8 @@ from ..utils import (
 class SRGSSRIE(InfoExtractor):
    _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
    _GEO_COUNTRIES = ['CH']
    _ERRORS = {
        'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor):
        if media_data.get('block') and media_data['block'] in self._ERRORS:
            message = self._ERRORS[media_data['block']]
            if media_data['block'] == 'GEOBLOCK':
-                self.raise_geo_restricted(msg=message, countries=['CH'])
+                self.raise_geo_restricted(
                    msg=message, countries=self._GEO_COUNTRIES)
            raise ExtractorError(
                '%s said: %s' % (self.IE_NAME, message), expected=True)
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@ -13,6 +13,7 @@ from ..utils import (
 class SVTBaseIE(InfoExtractor):
    _GEO_COUNTRIES = ['SE']
    def _extract_video(self, video_info, video_id):
        formats = []
        for vr in video_info['videoReferences']:
@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor):
                })
        if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
            self.raise_geo_restricted(
-                'This video is only available in Sweden', countries=['SE'])
+                'This video is only available in Sweden',
                countries=self._GEO_COUNTRIES)
        self._sort_formats(formats)
        subtitles = {}
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
                        )
                        (?P<id>[\da-fA-F]+)
                    '''
    _GEO_COUNTRIES = ['BG']
    _TESTS = [{
        'url': 'http://vbox7.com/play:0946fff23c',
        'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
        video_url = video['src']
        if '/na.mp4' in video_url:
-            self.raise_geo_restricted(countries=['BG'])
+            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
        uploader = video.get('uploader')
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@ -14,7 +14,7 @@ from ..utils import (
 class VGTVIE(XstreamIE):
    IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
    _HOST_TO_APPNAME = {
        'vgtv.no': 'vgtv',
@ -218,7 +218,8 @@ class VGTVIE(XstreamIE):
            properties = try_get(
                data, lambda x: x['streamConfiguration']['properties'], list)
            if properties and 'geoblocked' in properties:
-                raise self.raise_geo_restricted(countries=['NO'])
+                raise self.raise_geo_restricted(
                    countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
        self._sort_formats(info['formats'])
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
    _APP_VERSION = '2.2.5.1428709186'
    _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
    _NETRC_MACHINE = 'viki'
    _token = None
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -3291,7 +3291,7 @@ class GeoUtils(object):
        addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
        addr_max = addr_min | (0xffffffff >> int(preflen))
        return compat_str(socket.inet_ntoa(
-            compat_struct_pack('!I', random.randint(addr_min, addr_max))))
+            compat_struct_pack('!L', random.randint(addr_min, addr_max))))
 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):