pull/29845/merge
dirkf 2 months ago committed by GitHub
commit b5cb53beac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -41,8 +41,12 @@ class TestCompat(unittest.TestCase):
self.assertEqual(compat_getenv(test_var), test_str)
def test_compat_expanduser(self):
from youtube_dl.compat import compat_os_name
old_home = os.environ.get('HOME')
test_str = r'C:\Documents and Settings\тест\Application Data'
if compat_os_name in ('nt', 'ce'):
test_str = r'C:\Documents and Settings\тест\Application Data'
else:
test_str = '/home/тест'
compat_setenv('HOME', test_str)
self.assertEqual(compat_expanduser('~'), test_str)
compat_setenv('HOME', old_home or '')

@ -102,6 +102,7 @@ from youtube_dl.utils import (
url_or_none,
urljoin,
urlencode_postdata,
urlhandle_detect_ext,
urshift,
update_url_query,
variadic,
@ -336,6 +337,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unescapeHTML('&a"'), '&a"')
# HTML5 entities
self.assertEqual(unescapeHTML('.''), '.\'')
# non-semicolon HTML5 (bah!) entities
self.assertEqual(unescapeHTML('&amp&AMPetc'), '&&etc')
self.assertEqual(unescapeHTML('&pound&POUNDetc'), '£&POUNDetc')
def test_date_from_str(self):
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
@ -376,6 +380,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
self.assertEqual(unified_strdate('20211221'), '20211221')
def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@ -404,6 +409,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
self.assertEqual(unified_timestamp('11:31 17-Jun-2021'), 1623929460)
self.assertEqual(unified_timestamp('11:31 17-Jun-2021-0000'), 1623929460)
from youtube_dl.utils import DATE_FORMATS_DAY_FIRST
DATE_FORMATS_DAY_FIRST.append('%H:%M %d-%m-%Y')
self.assertEqual(unified_timestamp('17:30 27-02-2016'), 1456594200)
self.assertEqual(unified_timestamp('17:30 27-02-2016-0000'), 1456594200)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@ -514,7 +525,7 @@ class TestUtil(unittest.TestCase):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
self.assertEqual(
shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if not (compat_os_name in ('nt', 'ce')) else '''ffmpeg -i "ñ€ß'.mp4"''')
def test_float_or_none(self):
self.assertEqual(float_or_none('42.42'), 42.42)
@ -646,6 +657,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
self.assertEqual(parse_duration('103:050'), 103.05)
def test_fix_xml_ampersands(self):
self.assertEqual(
@ -1241,7 +1254,7 @@ class TestUtil(unittest.TestCase):
def test_args_to_str(self):
self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
'foo ba/r -baz \'2 be\' \'\'' if not (compat_os_name in ('nt', 'ce')) else 'foo ba/r -baz "2 be" ""'
)
def test_parse_filesize(self):
@ -1264,9 +1277,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_count('1000'), 1000)
self.assertEqual(parse_count('1.000'), 1000)
self.assertEqual(parse_count('1.1k'), 1100)
self.assertEqual(parse_count('1.1 k'), 1100)
self.assertEqual(parse_count('1,1 k'), 1100)
self.assertEqual(parse_count('1,1kk'), 1100000)
self.assertEqual(parse_count('100 views'), 100)
self.assertEqual(parse_count('1,100 views'), 1100)
self.assertEqual(parse_count('1.1kk'), 1100000)
self.assertEqual(parse_count('1.1kk '), 1100000)
self.assertEqual(parse_count('1.1kk views'), 1100000)
self.assertEqual(parse_count('10M views'), 10000000)
self.assertEqual(parse_count('has 10M views'), 10000000)
def test_parse_resolution(self):
self.assertEqual(parse_resolution(None), {})
@ -1612,7 +1632,10 @@ Line 1
def test_get_elements_by_class(self):
html = '''
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
<span class="not-foo bar">nasty</span>
<span class="foo bar">nice</span>
<span class="bar foo">"also nice"</span>
<span class="bar foo-impostor">also nasty</span>
'''
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
@ -2039,6 +2062,57 @@ Line 1
'a', 'b', 'c', 'd',
from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
def test_urlhandle_detect_ext(self):
class UrlHandle(object):
_info = {}
def __init__(self, info):
self._info = info
@property
def headers(self):
return self._info
# header with non-ASCII character and contradictory Content-Type
urlh = UrlHandle({
'Content-Disposition': b'attachment; filename="Epis\xf3dio contains non-ASCI ISO 8859-1 character.mp3"',
'Content-Type': b'audio/aac',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
# header with no Content-Disposition
urlh = UrlHandle({
'Content-Type': b'audio/mp3',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
# header with Content-Disposition and unquoted filename
urlh = UrlHandle({
'Content-Disposition': b'attachment; filename=unquoted_filename_token.mp3',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
# header with Content-Disposition including spacing and uppercase
urlh = UrlHandle({
'Content-Disposition': b'ATTACHMENT; FileName = unquoted_filename_token.mp3',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
# header with Content-Disposition and extended filename parameter syntax
urlh = UrlHandle({
'Content-Disposition': b"attachment; filename*=iso8859-15''costs%201%A4%20filename.mp3",
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
# header with Content-Disposition and both filename parameter syntaxes
urlh = UrlHandle({
'Content-Disposition': b'''attachment; filename="should ignore.mp4";
FileName* = iso8859-15''costs%201%A4%20filename.mp3''',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
# header with Content-Disposition and 'wrong' order of both syntaxes
urlh = UrlHandle({
'Content-Disposition': b'''attachment; filename*=iso8859-15''costs%201%A4%20filename.mp3;
filename="should ignore.mp4"''',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
if __name__ == '__main__':
unittest.main()

@ -2776,16 +2776,19 @@ else:
# Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
def compat_getenv(key, default=None):
from .utils import get_filesystem_encoding
env = os.getenv(key, default)
if env:
env = env.decode(get_filesystem_encoding())
from .utils import get_filesystem_encoding
encoding = get_filesystem_encoding()
env = env.decode(encoding)
if not encoding.lower().startswith('ut'):
env = env.encode('utf-8').decode('unicode-escape')
return env
def compat_setenv(key, value, env=os.environ):
def encode(v):
from .utils import get_filesystem_encoding
return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
return v.encode(get_filesystem_encoding(), 'backslashreplace') if isinstance(v, compat_str) else v
env[encode(key)] = encode(value)
# HACK: The default implementations of os.path.expanduser from cpython do not decode

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save