[commonmistakes] Detect BOMs at the beginning of URLs

Reported at https://bugzilla.redhat.com/show_bug.cgi?id=1093517 .
pull/4938/head
Philipp Hagemeister 9 years ago
parent 834bf069d2
commit c73fae1e2e

@ -74,7 +74,7 @@ from .collegehumor import CollegeHumorIE
from .collegerama import CollegeRamaIE from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .criterion import CriterionIE from .criterion import CriterionIE

@ -27,3 +27,20 @@ class CommonMistakesIE(InfoExtractor):
if not self._downloader.params.get('verbose'): if not self._downloader.params.get('verbose'):
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.' msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
raise ExtractorError(msg, expected=True) raise ExtractorError(msg, expected=True)
class UnicodeBOMIE(InfoExtractor):
IE_DESC = False
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
_TESTS = [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True,
}]
def _real_extract(self, url):
real_url = self._match_id(url)
self.report_warning(
'Your URL starts with a Byte Order Mark (BOM). '
'Removing the BOM and looking for "%s" ...' % real_url)
return self.url_result(real_url)

Loading…
Cancel
Save