From ef044be34bb64c489558dd07818616b514d2e2ad Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 8 Jun 2022 15:52:21 +0100
Subject: [PATCH 01/78] [test] Skip not _WORKING IE in subtitle tests; use
 unittest.skipTest throughout

---
 test/test_download.py  | 7 +++----
 test/test_subtitles.py | 3 +++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/test_download.py b/test/test_download.py
index 8e43cfa12..0951a171a 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -88,7 +88,6 @@ class TestDownload(unittest.TestCase):
 
 # Dynamically generate tests
 
-
 def generator(test_case, tname):
 
     def test_template(self):
@@ -100,9 +99,10 @@ def generator(test_case, tname):
 
         def print_skipping(reason):
             print('Skipping %s: %s' % (test_case['name'], reason))
+            self.skipTest(reason)
+
         if not ie.working():
             print_skipping('IE marked as not _WORKING')
-            return
 
         for tc in test_cases:
             info_dict = tc.get('info_dict', {})
@@ -111,11 +111,10 @@ def generator(test_case, tname):
 
         if 'skip' in test_case:
             print_skipping(test_case['skip'])
-            return
+
         for other_ie in other_ies:
             if not other_ie.working():
                 print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
-                return
 
         params = get_params(test_case.get('params', {}))
         params['outtmpl'] = tname + '_' + params['outtmpl']
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 550e0ca00..c250473be 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -38,6 +38,9 @@ class BaseTestSubtitles(unittest.TestCase):
         self.DL = FakeYDL()
         self.ie = self.IE()
         self.DL.add_info_extractor(self.ie)
+        if not self.IE.working():
+            print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
+            self.skipTest('IE marked as not _WORKING')
 
     def getInfoDict(self):
         info_dict = self.DL.extract_info(self.url, download=False)

From 3aa94d7945dfaa0e04acf2700ffe0e43b00db498 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 8 Jun 2022 23:11:33 +0100
Subject: [PATCH 02/78] [test] Fix workable subtitle tests (except YT) and mark
 others as skip, broken

* broken tests need to be fixed when fixing the respective IE
---
 test/test_subtitles.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index c250473be..23cf06e09 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -131,6 +131,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
         self.assertFalse(subtitles)
 
 
+@unittest.skip('IE broken')
 class TestTedSubtitles(BaseTestSubtitles):
     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
     IE = TEDIE
@@ -155,18 +156,19 @@ class TestVimeoSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
-        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
-        self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
+        self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
+        self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
 
     def test_nosubtitles(self):
         self.DL.expect_warning('video doesn\'t have subtitles')
-        self.url = 'http://vimeo.com/56015672'
+        self.url = 'http://vimeo.com/68093876'
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertFalse(subtitles)
 
 
+@unittest.skip('IE broken')
 class TestWallaSubtitles(BaseTestSubtitles):
     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
     IE = WallaIE
@@ -188,6 +190,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
         self.assertFalse(subtitles)
 
 
+@unittest.skip('IE broken')
 class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
     IE = CeskaTelevizeIE
@@ -209,6 +212,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
         self.assertFalse(subtitles)
 
 
+@unittest.skip('IE broken')
 class TestLyndaSubtitles(BaseTestSubtitles):
     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
     IE = LyndaIE
@@ -221,6 +225,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
 
 
+@unittest.skip('IE broken')
 class TestNPOSubtitles(BaseTestSubtitles):
     url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
     IE = NPOIE
@@ -233,6 +238,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
 
 
+@unittest.skip('IE broken')
 class TestMTVSubtitles(BaseTestSubtitles):
     url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
     IE = ComedyCentralIE
@@ -256,8 +262,8 @@ class TestNRKSubtitles(BaseTestSubtitles):
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
-        self.assertEqual(set(subtitles.keys()), set(['no']))
-        self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
+        self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
+        self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
 
 
 class TestRaiPlaySubtitles(BaseTestSubtitles):
@@ -280,6 +286,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
 
 
+@unittest.skip('IE broken - DRM only')
 class TestVikiSubtitles(BaseTestSubtitles):
     url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
     IE = VikiIE
@@ -306,6 +313,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
 
 
+@unittest.skip('IE broken')
 class TestThePlatformFeedSubtitles(BaseTestSubtitles):
     url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
     IE = ThePlatformFeedIE
@@ -341,7 +349,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['en']))
-        self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+        self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
 
     def test_subtitles_in_page(self):
         self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
@@ -349,7 +357,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['en']))
-        self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+        self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
 
 
 if __name__ == '__main__':

From 811c480f7b6c25ca510a033e6365d00174135392 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 9 Jun 2022 15:25:23 +0100
Subject: [PATCH 03/78] [YouTube] Support JSON3 subtitle format * subtitle
 tests updated to match

---
 test/test_subtitles.py          | 74 ++++++++++++++++++++++++---------
 youtube_dl/extractor/youtube.py |  2 +-
 2 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 23cf06e09..4cbc69ccd 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -59,6 +59,21 @@ class BaseTestSubtitles(unittest.TestCase):
 
 
 class TestYoutubeSubtitles(BaseTestSubtitles):
+    # Available subtitles for QRS8MkLhQmM:
+    # Language formats
+    # ru       vtt, ttml, srv3, srv2, srv1, json3
+    # fr       vtt, ttml, srv3, srv2, srv1, json3
+    # en       vtt, ttml, srv3, srv2, srv1, json3
+    # nl       vtt, ttml, srv3, srv2, srv1, json3
+    # de       vtt, ttml, srv3, srv2, srv1, json3
+    # ko       vtt, ttml, srv3, srv2, srv1, json3
+    # it       vtt, ttml, srv3, srv2, srv1, json3
+    # zh-Hant  vtt, ttml, srv3, srv2, srv1, json3
+    # hi       vtt, ttml, srv3, srv2, srv1, json3
+    # pt-BR    vtt, ttml, srv3, srv2, srv1, json3
+    # es-MX    vtt, ttml, srv3, srv2, srv1, json3
+    # ja       vtt, ttml, srv3, srv2, srv1, json3
+    # pl       vtt, ttml, srv3, srv2, srv1, json3
     url = 'QRS8MkLhQmM'
     IE = YoutubeIE
 
@@ -67,41 +82,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles.keys()), 13)
-        self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
-        self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
+        self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
+        self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
         for lang in ['fr', 'de']:
             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
 
-    def test_youtube_subtitles_ttml_format(self):
+    def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
         self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'ttml'
+        self.DL.params['subtitlesformat'] = fmt
         subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
+        self.assertEqual(md5(subtitles[lang]), md5_hash)
+
+    def test_youtube_subtitles_ttml_format(self):
+        self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
 
     def test_youtube_subtitles_vtt_format(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'vtt'
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
+        self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
 
-    def test_youtube_automatic_captions(self):
-        self.url = '8YoUxe5ncPo'
+    def test_youtube_subtitles_json3_format(self):
+        self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
+
+    def _test_automatic_captions(self, url, lang):
+        self.url = url
         self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslangs'] = ['it']
+        self.DL.params['subtitleslangs'] = [lang]
         subtitles = self.getSubtitles()
-        self.assertTrue(subtitles['it'] is not None)
+        self.assertTrue(subtitles[lang] is not None)
 
+    def test_youtube_automatic_captions(self):
+        # Available automatic captions for 8YoUxe5ncPo:
+        # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
+        # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
+        # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
+        # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
+        # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
+        # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
+        # mt, ms, mr, ug, ta, my, af, sw, is, am, 
+        #                                         *it*, iw, sv, ar,
+        # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
+        # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
+        # ky, sd
+        # ...
+        self._test_automatic_captions('8YoUxe5ncPo', 'it')
+
+    @unittest.skip('ASR subs all in all supported langs now')
     def test_youtube_translated_subtitles(self):
-        # This video has a subtitles track, which can be translated
-        self.url = 'Ky9eprVWzlI'
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertTrue(subtitles['it'] is not None)
+        # This video has a subtitles track, which can be translated (#4555)
+        self._test_automatic_captions('Ky9eprVWzlI', 'it')
 
     def test_youtube_nosubtitles(self):
         self.DL.expect_warning('video doesn\'t have subtitles')
-        self.url = 'n5BB19UTcdA'
+        # Available automatic captions for 8YoUxe5ncPo:
+        # ...
+        # 8YoUxe5ncPo has no subtitles
+        self.url = '8YoUxe5ncPo'
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9c62b8890..91a3b6058 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -499,7 +499,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
     )
-    _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+    _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 
     _GEO_BYPASS = False
 

From 0700fde6403aa9eec1ff02bff7323696a205900c Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan@gmail.com>
Date: Sat, 9 Jan 2021 17:56:12 +0530
Subject: [PATCH 04/78] [utils, etc] Kill child processes when yt-dl is killed

* derived from PR #26592, closes #26592

Authored by: Unrud
---
 youtube_dl/YoutubeDL.py                    |  3 ++-
 youtube_dl/compat.py                       |  3 ++-
 youtube_dl/downloader/external.py          | 16 ++++++++++------
 youtube_dl/downloader/rtmp.py              | 10 ++++++----
 youtube_dl/extractor/openload.py           |  3 ++-
 youtube_dl/postprocessor/embedthumbnail.py |  5 +++--
 youtube_dl/postprocessor/ffmpeg.py         |  5 +++--
 youtube_dl/utils.py                        | 18 ++++++++++++++----
 8 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 019e309cb..3895b408f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -73,6 +73,7 @@ from .utils import (
     PostProcessingError,
     preferredencoding,
     prepend_extension,
+    process_communicate_or_kill,
     register_socks_protocols,
     render_table,
     replace_extension,
@@ -2323,7 +2324,7 @@ class YoutubeDL(object):
                 ['git', 'rev-parse', '--short', 'HEAD'],
                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                 cwd=os.path.dirname(os.path.abspath(__file__)))
-            out, err = sp.communicate()
+            out, err = process_communicate_or_kill(sp)
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
                 self._write_string('[debug] Git HEAD: ' + out + '\n')
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 2004a405a..9f5f85dae 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2890,6 +2890,7 @@ else:
     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 
     def compat_get_terminal_size(fallback=(80, 24)):
+        from .utils import process_communicate_or_kill
         columns = compat_getenv('COLUMNS')
         if columns:
             columns = int(columns)
@@ -2906,7 +2907,7 @@ else:
                 sp = subprocess.Popen(
                     ['stty', 'size'],
                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                out, err = sp.communicate()
+                out, err = process_communicate_or_kill(sp)
                 _lines, _columns = map(int, out.split())
             except Exception:
                 _columns, _lines = _terminal_size(*fallback)
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index c31f8910a..a06ab2e50 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -22,6 +22,7 @@ from ..utils import (
     handle_youtubedl_headers,
     check_executable,
     is_outdated_version,
+    process_communicate_or_kill,
 )
 
 
@@ -104,7 +105,7 @@ class ExternalFD(FileDownloader):
 
         p = subprocess.Popen(
             cmd, stderr=subprocess.PIPE)
-        _, stderr = p.communicate()
+        _, stderr = process_communicate_or_kill(p)
         if p.returncode != 0:
             self.to_stderr(stderr.decode('utf-8', 'replace'))
         return p.returncode
@@ -141,7 +142,7 @@ class CurlFD(ExternalFD):
 
         # curl writes the progress to stderr so don't capture it.
         p = subprocess.Popen(cmd)
-        p.communicate()
+        process_communicate_or_kill(p)
         return p.returncode
 
 
@@ -336,14 +337,17 @@ class FFmpegFD(ExternalFD):
         proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
         try:
             retval = proc.wait()
-        except KeyboardInterrupt:
-            # subprocces.run would send the SIGKILL signal to ffmpeg and the
+        except BaseException as e:
+            # subprocess.run would send the SIGKILL signal to ffmpeg and the
             # mp4 file couldn't be played, but if we ask ffmpeg to quit it
             # produces a file that is playable (this is mostly useful for live
             # streams). Note that Windows is not affected and produces playable
             # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
-            if sys.platform != 'win32':
-                proc.communicate(b'q')
+            if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
+                process_communicate_or_kill(proc, b'q')
+            else:
+                proc.kill()
+                proc.wait()
             raise
         return retval
 
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index fbb7f51b0..8a25dbc8d 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -89,11 +89,13 @@ class RtmpFD(FileDownloader):
                                 self.to_screen('')
                             cursor_in_new_line = True
                             self.to_screen('[rtmpdump] ' + line)
-            finally:
+                if not cursor_in_new_line:
+                    self.to_screen('')
+                return proc.wait()
+            except BaseException:  # Including KeyboardInterrupt
+                proc.kill()
                 proc.wait()
-            if not cursor_in_new_line:
-                self.to_screen('')
-            return proc.returncode
+                raise
 
         url = info_dict['url']
         player_url = info_dict.get('player_url')
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index 0c20d0177..b05d60435 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -16,6 +16,7 @@ from ..utils import (
     ExtractorError,
     get_exe_version,
     is_outdated_version,
+    process_communicate_or_kill,
     std_headers,
 )
 
@@ -226,7 +227,7 @@ class PhantomJSwrapper(object):
             self.exe, '--ssl-protocol=any',
             self._TMP_FILES['script'].name
         ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        out, err = p.communicate()
+        out, err = process_communicate_or_kill(p)
         if p.returncode != 0:
             raise ExtractorError(
                 'Executing JS failed\n:' + encodeArgument(err))
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
index 3990908b6..5e7b6e2df 100644
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -13,8 +13,9 @@ from ..utils import (
     encodeFilename,
     PostProcessingError,
     prepend_extension,
+    process_communicate_or_kill,
     replace_extension,
-    shell_quote
+    shell_quote,
 )
 
 
@@ -109,7 +110,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
                 self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
 
             p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            stdout, stderr = p.communicate()
+            stdout, stderr = process_communicate_or_kill(p)
 
             if p.returncode != 0:
                 msg = stderr.decode('utf-8', 'replace').strip()
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 9f76c9d4e..8c29c8d59 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -16,6 +16,7 @@ from ..utils import (
     is_outdated_version,
     PostProcessingError,
     prepend_extension,
+    process_communicate_or_kill,
     shell_quote,
     subtitles_filename,
     dfxp2srt,
@@ -180,7 +181,7 @@ class FFmpegPostProcessor(PostProcessor):
             handle = subprocess.Popen(
                 cmd, stderr=subprocess.PIPE,
                 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
-            stdout_data, stderr_data = handle.communicate()
+            stdout_data, stderr_data = process_communicate_or_kill(handle)
             expected_ret = 0 if self.probe_available else 1
             if handle.wait() != expected_ret:
                 return None
@@ -228,7 +229,7 @@ class FFmpegPostProcessor(PostProcessor):
         if self._downloader.params.get('verbose', False):
             self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
-        stdout, stderr = p.communicate()
+        stdout, stderr = process_communicate_or_kill(p)
         if p.returncode != 0:
             stderr = stderr.decode('utf-8', 'replace')
             msgs = stderr.strip().split('\n')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 8aa2a43a2..4e00317f1 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2212,6 +2212,15 @@ def unescapeHTML(s):
         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
+def process_communicate_or_kill(p, *args, **kwargs):
+    try:
+        return p.communicate(*args, **kwargs)
+    except BaseException:  # Including KeyboardInterrupt
+        p.kill()
+        p.wait()
+        raise
+
+
 def get_subprocess_encoding():
     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
         # For subprocess calls, encode with locale encoding
@@ -3788,7 +3797,8 @@ def check_executable(exe, args=[]):
     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
     args can be a list of arguments for a short output (like -version) """
     try:
-        subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+        process_communicate_or_kill(subprocess.Popen(
+            [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
     except OSError:
         return False
     return exe
@@ -3802,10 +3812,10 @@ def get_exe_version(exe, args=['--version'],
         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
         # SIGTTOU if youtube-dl is run in the background.
         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
-        out, _ = subprocess.Popen(
+        out, _ = process_communicate_or_kill(subprocess.Popen(
             [encodeArgument(exe)] + args,
             stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+            stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
     except OSError:
         return False
     if isinstance(out, bytes):  # Python 2.x
@@ -5744,7 +5754,7 @@ def write_xattr(path, key, value):
                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
                 except EnvironmentError as e:
                     raise XAttrMetadataError(e.errno, e.strerror)
-                stdout, stderr = p.communicate()
+                stdout, stderr = process_communicate_or_kill(p)
                 stderr = stderr.decode('utf-8', 'replace')
                 if p.returncode != 0:
                     raise XAttrMetadataError(p.returncode, stderr)

From cc179df346abf34c8f77dbb221b839092007f20c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 12 Jun 2022 14:10:38 +0100
Subject: [PATCH 05/78] [XHamster] Support xhday.com alias, extract
 `uploader_id` * support xhday.com alias for xhamster.com (resolves #31023)  
 Authored by: dirkf * extract `uploader_id`:   from
 https://github.com/yt-dlp/yt-dlp/commit/908b56eaf7872149706dbd7fa071f838d0c786b7
   (PR https://github.com/yt-dlp/yt-dlp/pull/844)   Authored by: octotherp

---
 youtube_dl/extractor/xhamster.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index f73b9778f..f764021ba 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import itertools
@@ -23,7 +24,7 @@ from ..utils import (
 
 
 class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:.+?\.)?%s/
@@ -34,7 +35,7 @@ class XHamsterIE(InfoExtractor):
                     ''' % _DOMAINS
     _TESTS = [{
         'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
-        'md5': '98b4687efb1ffd331c4197854dc09e8f',
+        'md5': '34e1ab926db5dc2750fed9e1f34304bb',
         'info_dict': {
             'id': '1509445',
             'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@@ -43,6 +44,7 @@ class XHamsterIE(InfoExtractor):
             'timestamp': 1350194821,
             'upload_date': '20121014',
             'uploader': 'Ruseful2011',
+            'uploader_id': 'ruseful2011',
             'duration': 893,
             'age_limit': 18,
         },
@@ -72,6 +74,7 @@ class XHamsterIE(InfoExtractor):
             'timestamp': 1454948101,
             'upload_date': '20160208',
             'uploader': 'parejafree',
+            'uploader_id': 'parejafree',
             'duration': 72,
             'age_limit': 18,
         },
@@ -117,6 +120,9 @@ class XHamsterIE(InfoExtractor):
     }, {
         'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
         'only_matching': True,
+    }, {
+        'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -245,6 +251,7 @@ class XHamsterIE(InfoExtractor):
             else:
                 categories = None
 
+            uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
             return {
                 'id': video_id,
                 'display_id': display_id,
@@ -253,6 +260,8 @@ class XHamsterIE(InfoExtractor):
                 'timestamp': int_or_none(video.get('created')),
                 'uploader': try_get(
                     video, lambda x: x['author']['name'], compat_str),
+                'uploader_url': uploader_url,
+                'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
                 'thumbnail': video.get('thumbURL'),
                 'duration': int_or_none(video.get('duration')),
                 'view_count': int_or_none(video.get('views')),
@@ -261,7 +270,7 @@ class XHamsterIE(InfoExtractor):
                 'dislike_count': int_or_none(try_get(
                     video, lambda x: x['rating']['dislikes'], int)),
                 'comment_count': int_or_none(video.get('views')),
-                'age_limit': age_limit,
+                'age_limit': age_limit if age_limit is not None else 18,
                 'categories': categories,
                 'formats': formats,
             }
@@ -352,6 +361,7 @@ class XHamsterIE(InfoExtractor):
             'description': description,
             'upload_date': upload_date,
             'uploader': uploader,
+            'uploader_id': uploader.lower() if uploader else None,
             'thumbnail': thumbnail,
             'duration': duration,
             'view_count': view_count,
@@ -420,6 +430,9 @@ class XHamsterUserIE(InfoExtractor):
             'id': 'firatkaan',
         },
         'playlist_mincount': 1,
+    }, {
+        'url': 'https://xhday.com/users/mobhunter',
+        'only_matching': True,
     }]
 
     def _entries(self, user_id):

From 11665dd2367a2eefd1ad090828f987fef11226e4 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 15 Jun 2022 18:26:54 +0100
Subject: [PATCH 06/78] [test] Fix linter for
 3aa94d7945dfaa0e04acf2700ffe0e43b00db498

---
 test/test_download.py  | 1 +
 test/test_subtitles.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_download.py b/test/test_download.py
index 0951a171a..6a6673bc2 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -88,6 +88,7 @@ class TestDownload(unittest.TestCase):
 
 # Dynamically generate tests
 
+
 def generator(test_case, tname):
 
     def test_template(self):
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 4cbc69ccd..1197721ff 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -117,7 +117,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
         # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
         # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
         # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
-        # mt, ms, mr, ug, ta, my, af, sw, is, am, 
+        # mt, ms, mr, ug, ta, my, af, sw, is, am,
         #                                         *it*, iw, sv, ar,
         # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
         # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,

From 8a158a936c8b002ef536e9e2b778ded02c09c0fa Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Jun 2022 19:45:34 +0100
Subject: [PATCH 07/78] [NHK] Use new API URL

---
 youtube_dl/extractor/nhk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py
index 46a800e7e..f43d91cd5 100644
--- a/youtube_dl/extractor/nhk.py
+++ b/youtube_dl/extractor/nhk.py
@@ -8,7 +8,7 @@ from ..utils import urljoin
 
 
 class NhkBaseIE(InfoExtractor):
-    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
+    _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
     _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
     _TYPE_REGEX = r'/(?P<type>video|audio)/'
 

From a03b9775d544b06a5b4f2aa630214c7c22fc2229 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 26 Jun 2022 14:18:33 +0100
Subject: [PATCH 08/78] [Mediaset] Support player version number in URL pattern

Ref: https://github.com/yt-dlp/yt-dlp/issues/4141
---
 youtube_dl/extractor/mediaset.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py
index 2c16fc9e2..20048c6ab 100644
--- a/youtube_dl/extractor/mediaset.py
+++ b/youtube_dl/extractor/mediaset.py
@@ -24,7 +24,7 @@ class MediasetIE(ThePlatformBaseIE):
                             (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
                             (?:
                                 (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
-                                player/index\.html\?.*?\bprogramGuid=
+                                player(?:/v\d+)?/index\.html\?.*?\bprogramGuid=
                             )
                     )(?P<id>[0-9A-Z]{16,})
                     '''
@@ -73,6 +73,10 @@ class MediasetIE(ThePlatformBaseIE):
         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
         'only_matching': True,
+    }, {
+        # embedUrl (from https://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/)
+        'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323&autoplay=true&purl=http://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/',
+        'only_matching': True,
     }, {
         'url': 'mediaset:FAFU000000665924',
         'only_matching': True,

From 090acd58c1d810fbef1bac08d70bbfad9c0a7504 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Jul 2022 20:05:21 +0100
Subject: [PATCH 09/78] [options] Improve be35e53 (--match-/reject-title
 parameter value)

Resolves #31064.
---
 youtube_dl/options.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 6521ad881..f6621ef91 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -270,11 +270,11 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--match-title',
         dest='matchtitle', metavar='REGEX',
-        help='Download only matching titles (case-insensitive regex or sub-string)')
+        help='Download only matching titles (case-insensitive regex or alphanumeric sub-string)')
     selection.add_option(
         '--reject-title',
         dest='rejecttitle', metavar='REGEX',
-        help='Skip download for matching titles (case-insensitive regex or sub-string)')
+        help='Skip download for matching titles (case-insensitive regex or alphanumeric sub-string)')
     selection.add_option(
         '--max-downloads',
         dest='max_downloads', metavar='NUMBER', type=int, default=None,

From 5f5c127ece74e52aa5b49b6d2941cc0f848d3c36 Mon Sep 17 00:00:00 2001
From: Kyraminol Endyeran <kyraminari@gmail.com>
Date: Tue, 12 Jul 2022 01:35:40 +0200
Subject: [PATCH 10/78] [VVVVID] Support video/dash types (#31060)

Resolves #31030.
---
 youtube_dl/extractor/vvvvid.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py
index bc196f8a0..6a0d4e8f0 100644
--- a/youtube_dl/extractor/vvvvid.py
+++ b/youtube_dl/extractor/vvvvid.py
@@ -64,6 +64,18 @@ class VVVVIDIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # video_type == 'video/dash'
+        'url': 'https://www.vvvvid.it/show/683/made-in-abyss/1542/693786/nanachi',
+        'info_dict': {
+            'id': '693786',
+            'ext': 'mp4',
+            'title': 'Nanachi',
+        },
+        'params': {
+            'skip_download': True,
+            'format': 'mp4',
+        },
     }, {
         'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
         'only_matching': True
@@ -205,6 +217,9 @@ class VVVVIDIE(InfoExtractor):
                 })
                 is_youtube = True
                 break
+            elif video_type == 'video/dash':
+                formats.extend(self._extract_m3u8_formats(
+                    embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
             else:
                 formats.extend(self._extract_wowza_formats(
                     'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))

From adb5294177265ba35b45746dbb600965076ed150 Mon Sep 17 00:00:00 2001
From: Wes <morganw@gmail.com>
Date: Fri, 29 Jul 2022 20:10:00 -0500
Subject: [PATCH 11/78] [aenetworks] Update _THEPLATFORM_KEY and
 _THEPLATFORM_SECRET (#29749)

Fixes ytdl-org/youtube-dl#29300
---
 youtube_dl/extractor/aenetworks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index e55c03fd7..2a1f08e39 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -20,8 +20,8 @@ class AENetworksBaseIE(ThePlatformIE):
             (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
             fyi\.tv
         )/'''
-    _THEPLATFORM_KEY = 'crazyjava'
-    _THEPLATFORM_SECRET = 's3cr3t'
+    _THEPLATFORM_KEY = '43jXaGRQud'
+    _THEPLATFORM_SECRET = 'S10BPXHMlb'
     _DOMAIN_MAP = {
         'history.com': ('HISTORY', 'history'),
         'aetv.com': ('AETV', 'aetv'),

From deee741fb145360576ceae9d69b1b43db082c404 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 9 Aug 2022 21:05:00 +0100
Subject: [PATCH 12/78] [test, etc] Improve download test logs; also clean up
 some new flake8 issues (#31153)

* [test] Identify testcase errors better
* [test] Identify download errors better
* [extractor/minds] Linter
* [extractor/aes] Linter
---
 test/test_download.py         | 7 +++++--
 youtube_dl/aes.py             | 2 +-
 youtube_dl/extractor/minds.py | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/test/test_download.py b/test/test_download.py
index 6a6673bc2..19936969f 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -33,6 +33,7 @@ from youtube_dl.compat import (
 from youtube_dl.utils import (
     DownloadError,
     ExtractorError,
+    error_to_compat_str,
     format_bytes,
     UnavailableVideoError,
 )
@@ -108,7 +109,7 @@ def generator(test_case, tname):
         for tc in test_cases:
             info_dict = tc.get('info_dict', {})
             if not (info_dict.get('id') and info_dict.get('ext')):
-                raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
+                raise Exception('Test definition (%s) requires both \'id\' and \'ext\' keys present to define the output file' % (tname, ))
 
         if 'skip' in test_case:
             print_skipping(test_case['skip'])
@@ -161,7 +162,9 @@ def generator(test_case, tname):
                 except (DownloadError, ExtractorError) as err:
                     # Check if the exception is not a network related one
                     if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
-                        raise
+                        msg = getattr(err, 'msg', error_to_compat_str(err))
+                        err.msg = '%s (%s)' % (msg, tname, )
+                        raise err
 
                     if try_num == RETRIES:
                         report_warning('%s failed due to network errors, skipping...' % tname)
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index 461bb6d41..d0de2d93f 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -303,7 +303,7 @@ def xor(data1, data2):
 
 
 def rijndael_mul(a, b):
-    if(a == 0 or b == 0):
+    if (a == 0 or b == 0):
         return 0
     return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
 
diff --git a/youtube_dl/extractor/minds.py b/youtube_dl/extractor/minds.py
index 8e9f0f825..e8fd582aa 100644
--- a/youtube_dl/extractor/minds.py
+++ b/youtube_dl/extractor/minds.py
@@ -78,7 +78,7 @@ class MindsIE(MindsBaseIE):
             else:
                 return self.url_result(entity['perma_url'])
         else:
-            assert(entity['subtype'] == 'video')
+            assert (entity['subtype'] == 'video')
             video_id = entity_id
         # 1080p and webm formats available only on the sources array
         video = self._call_api(

From e6a836d54ca1d3cd02f3ee45ef707a46f23e8291 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 10 Aug 2022 15:37:59 +0100
Subject: [PATCH 13/78] [core] Make `--max-downloads ...` stop immediately on
 reaching the limit

Based on and closes #26638.
---
 youtube_dl/YoutubeDL.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 3895b408f..e77b8d50c 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1779,10 +1779,9 @@ class YoutubeDL(object):
 
         assert info_dict.get('_type', 'video') == 'video'
 
-        max_downloads = self.params.get('max_downloads')
-        if max_downloads is not None:
-            if self._num_downloads >= int(max_downloads):
-                raise MaxDownloadsReached()
+        max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
+        if self._num_downloads >= max_downloads:
+            raise MaxDownloadsReached()
 
         # TODO: backward compatibility, to be removed
         info_dict['fulltitle'] = info_dict['title']
@@ -2062,6 +2061,9 @@ class YoutubeDL(object):
                     self.report_error('postprocessing: %s' % str(err))
                     return
                 self.record_download_archive(info_dict)
+                # avoid possible nugatory search for further items (PR #26638)
+                if self._num_downloads >= max_downloads:
+                    raise MaxDownloadsReached()
 
     def download(self, url_list):
         """Download a given list of URLs."""

From d231b56717c73ee597d2e077d11b69ed48a1b02d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 14 Aug 2022 18:45:45 +0100
Subject: [PATCH 14/78] [jsinterp] Overhaul JSInterp to handle new YT players
 4c3f79c5, 324f67b9 (#31170)

* back-port from yt-dlp 8f53dc44a0cc1c2d98c35740b9293462c080f5d0, thanks pukkandan
* also support void, improve <</>> precedence, improve expressions in comma-list
* add more tests
---
 test/test_jsinterp.py          |  49 ++-
 test/test_utils.py             |   3 +
 test/test_youtube_signature.py |  13 +
 youtube_dl/compat.py           |  54 ++-
 youtube_dl/jsinterp.py         | 589 ++++++++++++++++++++-------------
 youtube_dl/utils.py            |  47 ++-
 6 files changed, 504 insertions(+), 251 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index acdabffb1..c6c931743 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -19,6 +19,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x3(){return 42;}')
         self.assertEqual(jsi.call_function('x3'), 42)
 
+        jsi = JSInterpreter('function x3(){42}')
+        self.assertEqual(jsi.call_function('x3'), None)
+
         jsi = JSInterpreter('var x5 = function(){return 42;}')
         self.assertEqual(jsi.call_function('x5'), 42)
 
@@ -51,8 +54,11 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return 11 >> 2;}')
         self.assertEqual(jsi.call_function('f'), 2)
 
+        jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
+        self.assertEqual(jsi.call_function('f'), 5)
+
     def test_array_access(self):
-        jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
+        jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
         self.assertEqual(jsi.call_function('f'), [5, 2, 7])
 
     def test_parens(self):
@@ -62,6 +68,10 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
         self.assertEqual(jsi.call_function('f'), 9)
 
+    def test_quotes(self):
+        jsi = JSInterpreter(r'function f(){return "a\"\\("}')
+        self.assertEqual(jsi.call_function('f'), r'a"\(')
+
     def test_assignments(self):
         jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
         self.assertEqual(jsi.call_function('f'), 31)
@@ -104,18 +114,29 @@ class TestJSInterpreter(unittest.TestCase):
         }''')
         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
 
+    def test_builtins(self):
+        jsi = JSInterpreter('''
+        function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 86000)
+        jsi = JSInterpreter('''
+        function x(dt) { return new Date(dt) - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
+
     def test_call(self):
         jsi = JSInterpreter('''
         function x() { return 2; }
-        function y(a) { return x() + a; }
+        function y(a) { return x() + (a?a:0); }
         function z() { return y(3); }
         ''')
         self.assertEqual(jsi.call_function('z'), 5)
+        self.assertEqual(jsi.call_function('y'), 2)
 
     def test_for_loop(self):
         # function x() { a=0; for (i=0; i-10; i++) {a++} a }
         jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i = i + 1) {a++} a }
+        function x() { a=0; for (i=0; i-10; i++) {a++} return a }
         ''')
         self.assertEqual(jsi.call_function('x'), 10)
 
@@ -156,19 +177,19 @@ class TestJSInterpreter(unittest.TestCase):
 
     def test_for_loop_continue(self):
         jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a }
+        function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
         ''')
         self.assertEqual(jsi.call_function('x'), 0)
 
     def test_for_loop_break(self):
         jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i++) { break; a++ } a }
+        function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
         ''')
         self.assertEqual(jsi.call_function('x'), 0)
 
     def test_literal_list(self):
         jsi = JSInterpreter('''
-        function x() { [1, 2, "asdf", [5, 6, 7]][3] }
+        function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
         ''')
         self.assertEqual(jsi.call_function('x'), [5, 6, 7])
 
@@ -177,6 +198,22 @@ class TestJSInterpreter(unittest.TestCase):
         function x() { a=5; a -= 1, a+=3; return a }
         ''')
         self.assertEqual(jsi.call_function('x'), 7)
+        jsi = JSInterpreter('''
+        function x() { a=5; return (a -= 1, a+=3, a); }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 7)
+
+    def test_void(self):
+        jsi = JSInterpreter('''
+        function x() { return void 42; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), None)
+
+    def test_return_function(self):
+        jsi = JSInterpreter('''
+        function x() { return [1, function(){return 1}][1] }
+        ''')
+        self.assertEqual(jsi.call_function('x')([]), 1)
 
 
 if __name__ == '__main__':
diff --git a/test/test_utils.py b/test/test_utils.py
index 259c4763e..f1a748dde 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -370,6 +370,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
         self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
         self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+        self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
+        self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
+        self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index fc5e9828e..6e955e0f0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -90,12 +90,25 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
         'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
     ),
+    (
+        'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
+        'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
+    ),
+    (
+        'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
+        'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
+    ),
+    (
+        'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
+        'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
+    ),
 ]
 
 
 class TestPlayerInfo(unittest.TestCase):
     def test_youtube_extract_player_info(self):
         PLAYER_URLS = (
+            ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
             ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
             ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
             ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 9f5f85dae..6d2c31a61 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2985,7 +2985,6 @@ except ImportError:
     except ImportError:
         compat_filter = filter
 
-
 try:
     from future_builtins import zip as compat_zip
 except ImportError:  # not 2.6+ or is 3.x
@@ -2995,6 +2994,57 @@ except ImportError:  # not 2.6+ or is 3.x
         compat_zip = zip
 
 
+# method renamed between Py2/3
+try:
+    from itertools import zip_longest as compat_itertools_zip_longest
+except ImportError:
+    from itertools import izip_longest as compat_itertools_zip_longest
+
+
+# new class in collections
+try:
+    from collections import ChainMap as compat_collections_chain_map
+except ImportError:
+    # Py < 3.3
+    class compat_collections_chain_map(compat_collections_abc.MutableMapping):
+
+        maps = [{}]
+
+        def __init__(self, *maps):
+            self.maps = list(maps) or [{}]
+
+        def __getitem__(self, k):
+            for m in self.maps:
+                if k in m:
+                    return m[k]
+            raise KeyError(k)
+
+        def __setitem__(self, k, v):
+            self.maps[0].__setitem__(k, v)
+            return
+
+        def __delitem__(self, k):
+            if k in self.maps[0]:
+                del self.maps[0][k]
+                return
+            raise KeyError(k)
+
+        def __iter__(self):
+            return itertools.chain(*reversed(self.maps))
+
+        def __len__(self):
+            return len(iter(self))
+
+        def new_child(self, m=None, **kwargs):
+            m = m or {}
+            m.update(kwargs)
+            return compat_collections_chain_map(m, *self.maps)
+
+        @property
+        def parents(self):
+            return compat_collections_chain_map(*(self.maps[1:]))
+
+
 if sys.version_info < (3, 3):
     def compat_b64decode(s, *args, **kwargs):
         if isinstance(s, compat_str):
@@ -3031,6 +3081,7 @@ __all__ = [
     'compat_basestring',
     'compat_chr',
     'compat_collections_abc',
+    'compat_collections_chain_map',
     'compat_cookiejar',
     'compat_cookiejar_Cookie',
     'compat_cookies',
@@ -3051,6 +3102,7 @@ __all__ = [
     'compat_input',
     'compat_integer_types',
     'compat_itertools_count',
+    'compat_itertools_zip_longest',
     'compat_kwargs',
     'compat_map',
     'compat_numeric_types',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 8eaa911cd..c60a9b3c2 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,42 +1,87 @@
 from __future__ import unicode_literals
 
+import itertools
 import json
+import math
 import operator
 import re
 
 from .utils import (
+    NO_DEFAULT,
     ExtractorError,
+    js_to_json,
     remove_quotes,
+    unified_timestamp,
 )
 from .compat import (
-    compat_collections_abc,
+    compat_collections_chain_map as ChainMap,
+    compat_itertools_zip_longest as zip_longest,
     compat_str,
 )
-MutableMapping = compat_collections_abc.MutableMapping
 
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
 
-class Nonlocal:
-    pass
-
+# (op, definition) in order of binding priority, tightest first
+# avoid dict to maintain order
+# definition None => Defined in JSInterpreter._operator
+_DOT_OPERATORS = (
+    ('.', None),
+    # TODO: ('?.', None),
+)
 
-_OPERATORS = [
+_OPERATORS = (
     ('|', operator.or_),
     ('^', operator.xor),
     ('&', operator.and_),
     ('>>', operator.rshift),
     ('<<', operator.lshift),
-    ('-', operator.sub),
     ('+', operator.add),
-    ('%', operator.mod),
-    ('/', operator.truediv),
+    ('-', operator.sub),
     ('*', operator.mul),
-]
-_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
-_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
+    ('/', operator.truediv),
+    ('%', operator.mod),
+)
 
-_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+_COMP_OPERATORS = (
+    ('===', operator.is_),
+    ('==', operator.eq),
+    ('!==', operator.is_not),
+    ('!=', operator.ne),
+    ('<=', operator.le),
+    ('>=', operator.ge),
+    ('<', operator.lt),
+    ('>', operator.gt),
+)
+
+_LOG_OPERATORS = (
+    ('&', operator.and_),
+    ('|', operator.or_),
+    ('^', operator.xor),
+)
+
+_SC_OPERATORS = (
+    ('?', None),
+    ('||', None),
+    ('&&', None),
+    # TODO: ('??', None),
+)
+
+_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
 
 _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
+_QUOTES = '\'"'
+
+
+def _ternary(cndn, if_true=True, if_false=False):
+    """Simulate JS's ternary operator (cndn?if_true:if_false)"""
+    if cndn in (False, None, 0, ''):
+        return if_false
+    try:
+        if math.isnan(cndn):  # NB: NaN cannot be checked by membership
+            return if_false
+    except TypeError:
+        pass
+    return if_true
 
 
 class JS_Break(ExtractorError):
@@ -49,70 +94,77 @@ class JS_Continue(ExtractorError):
         ExtractorError.__init__(self, 'Invalid continue')
 
 
-class LocalNameSpace(MutableMapping):
-    def __init__(self, *stack):
-        self.stack = tuple(stack)
-
-    def __getitem__(self, key):
-        for scope in self.stack:
-            if key in scope:
-                return scope[key]
-        raise KeyError(key)
-
+class LocalNameSpace(ChainMap):
     def __setitem__(self, key, value):
-        for scope in self.stack:
+        for scope in self.maps:
             if key in scope:
                 scope[key] = value
-                break
-        else:
-            self.stack[0][key] = value
-        return value
+                return
+        self.maps[0][key] = value
 
     def __delitem__(self, key):
         raise NotImplementedError('Deleting is not supported')
 
-    def __iter__(self):
-        for scope in self.stack:
-            for scope_item in iter(scope):
-                yield scope_item
-
-    def __len__(self, key):
-        return len(iter(self))
-
     def __repr__(self):
-        return 'LocalNameSpace%s' % (self.stack, )
+        return 'LocalNameSpace%s' % (self.maps, )
 
 
 class JSInterpreter(object):
+    __named_object_counter = 0
+
     def __init__(self, code, objects=None):
-        if objects is None:
-            objects = {}
-        self.code = code
-        self._functions = {}
-        self._objects = objects
-        self.__named_object_counter = 0
+        self.code, self._functions = code, {}
+        self._objects = {} if objects is None else objects
+
+    class Exception(ExtractorError):
+        def __init__(self, msg, *args, **kwargs):
+            expr = kwargs.pop('expr', None)
+            if expr is not None:
+                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
+            super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     def _named_object(self, namespace, obj):
         self.__named_object_counter += 1
-        name = '__youtube_dl_jsinterp_obj%s' % (self.__named_object_counter, )
+        name = '__youtube_dl_jsinterp_obj%d' % (self.__named_object_counter, )
         namespace[name] = obj
         return name
 
     @staticmethod
-    def _separate(expr, delim=',', max_split=None):
+    def _separate(expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
-        start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
+        start, splits, pos, skipping, delim_len = 0, 0, 0, 0, len(delim) - 1
+        in_quote, escaping = None, False
         for idx, char in enumerate(expr):
-            if char in _MATCHING_PARENS:
-                counters[_MATCHING_PARENS[char]] += 1
-            elif char in counters:
-                counters[char] -= 1
-            if char != delim[pos] or any(counters.values()):
-                pos = 0
+            if not in_quote:
+                if char in _MATCHING_PARENS:
+                    counters[_MATCHING_PARENS[char]] += 1
+                elif char in counters:
+                    counters[char] -= 1
+            if not escaping:
+                if char in _QUOTES and in_quote in (char, None):
+                    in_quote = None if in_quote else char
+                else:
+                    escaping = in_quote and char == '\\'
+            else:
+                escaping = False
+
+            if char != delim[pos] or any(counters.values()) or in_quote:
+                pos = skipping = 0
                 continue
-            elif pos != delim_len:
+            elif skipping > 0:
+                skipping -= 1
+                continue
+            elif pos == 0 and skip_delims:
+                here = expr[idx:]
+                for s in skip_delims if isinstance(skip_delims, (list, tuple)) else [skip_delims]:
+                    if here.startswith(s) and s:
+                        skipping = len(s) - 1
+                        break
+                if skipping > 0:
+                    continue
+            if pos < delim_len:
                 pos += 1
                 continue
             yield expr[start: idx - delim_len]
@@ -122,61 +174,108 @@ class JSInterpreter(object):
                 break
         yield expr[start:]
 
-    @staticmethod
-    def _separate_at_paren(expr, delim):
-        separated = list(JSInterpreter._separate(expr, delim, 1))
+    @classmethod
+    def _separate_at_paren(cls, expr, delim):
+        separated = list(cls._separate(expr, delim, 1))
+
         if len(separated) < 2:
-            raise ExtractorError('No terminating paren {0} in {1}'.format(delim, expr))
+            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
+    @staticmethod
+    def _all_operators():
+        return itertools.chain(
+            _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
+
+    def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
+        if op in ('||', '&&'):
+            if (op == '&&') ^ _ternary(left_val):
+                return left_val  # short circuiting
+        elif op == '?':
+            right_expr = _ternary(left_val, *self._separate(right_expr, ':', 1))
+
+        right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
+        opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
+        if not opfunc:
+            return right_val
+
+        try:
+            return opfunc(left_val, right_val)
+        except Exception as e:
+            raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
+
+    def _index(self, obj, idx):
+        if idx == 'length':
+            return len(obj)
+        try:
+            return obj[int(idx)] if isinstance(obj, list) else obj[idx]
+        except Exception as e:
+            raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
+
+    def _dump(self, obj, namespace):
+        try:
+            return json.dumps(obj)
+        except TypeError:
+            return self._named_object(namespace, obj)
+
     def interpret_statement(self, stmt, local_vars, allow_recursion=100):
         if allow_recursion < 0:
-            raise ExtractorError('Recursion limit reached')
+            raise self.Exception('Recursion limit reached')
+        allow_recursion -= 1
 
-        sub_statements = list(self._separate(stmt, ';'))
-        stmt = (sub_statements or ['']).pop()
+        should_return = False
+        sub_statements = list(self._separate(stmt, ';')) or ['']
+        expr = stmt = sub_statements.pop().strip()
         for sub_stmt in sub_statements:
-            ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1)
-            if should_abort:
-                return ret
+            ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
+            if should_return:
+                return ret, should_return
 
-        should_abort = False
-        stmt = stmt.lstrip()
-        stmt_m = re.match(r'var\s', stmt)
-        if stmt_m:
-            expr = stmt[len(stmt_m.group(0)):]
-        else:
-            return_m = re.match(r'return(?:\s+|$)', stmt)
-            if return_m:
-                expr = stmt[len(return_m.group(0)):]
-                should_abort = True
-            else:
-                # Try interpreting it as an expression
-                expr = stmt
+        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|$)', stmt)
+        if m:
+            expr = stmt[len(m.group(0)):].strip()
+            should_return = not m.group('var')
+        if not expr:
+            return None, should_return
 
-        v = self.interpret_expression(expr, local_vars, allow_recursion)
-        return v, should_abort
+        if expr[0] in _QUOTES:
+            inner, outer = self._separate(expr, expr[0], 1)
+            inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
+            if not outer:
+                return inner, should_return
+            expr = self._named_object(local_vars, inner) + outer
+
+        if expr.startswith('new '):
+            obj = expr[4:]
+            if obj.startswith('Date('):
+                left, right = self._separate_at_paren(obj[4:], ')')
+                left = self.interpret_expression(left, local_vars, allow_recursion)
+                expr = unified_timestamp(left, False)
+                if not expr:
+                    raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
+                expr = self._dump(int(expr * 1000), local_vars) + right
+            else:
+                raise self.Exception('Unsupported object {obj}'.format(**locals()), expr=expr)
 
-    def interpret_expression(self, expr, local_vars, allow_recursion):
-        expr = expr.strip()
-        if expr == '':  # Empty expression
-            return None
+        if expr.startswith('void '):
+            left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
+            return None, should_return
 
         if expr.startswith('{'):
             inner, outer = self._separate_at_paren(expr, '}')
-            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1)
+            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
-                return inner
+                return inner, should_abort or should_return
             else:
-                expr = json.dumps(inner) + outer
+                expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('('):
             inner, outer = self._separate_at_paren(expr, ')')
-            inner = self.interpret_expression(inner, local_vars, allow_recursion)
-            if not outer:
-                return inner
+            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
+            if not outer or should_abort:
+                return inner, should_abort or should_return
             else:
-                expr = json.dumps(inner) + outer
+                expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('['):
             inner, outer = self._separate_at_paren(expr, ']')
@@ -185,57 +284,53 @@ class JSInterpreter(object):
                 for item in self._separate(inner)])
             expr = name + outer
 
-        m = re.match(r'try\s*', expr)
-        if m:
+        m = re.match(r'(?P<try>try|finally)\s*|(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
+        md = m.groupdict() if m else {}
+        if md.get('try'):
             if expr[m.end()] == '{':
                 try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
             else:
                 try_expr, expr = expr[m.end() - 1:], ''
-            ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1)
+            ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
             if should_abort:
-                return ret
-            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+                return ret, True
+            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            return ret, should_abort or should_return
 
-        m = re.match(r'(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
-        md = m.groupdict() if m else {}
-        if md.get('catch'):
+        elif md.get('catch'):
             # We ignore the catch block
             _, expr = self._separate_at_paren(expr, '}')
-            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            return ret, should_abort or should_return
 
         elif md.get('for'):
-            def raise_constructor_error(c):
-                raise ExtractorError(
-                    'Premature return in the initialization of a for loop in {0!r}'.format(c))
-
             constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
             if remaining.startswith('{'):
                 body, expr = self._separate_at_paren(remaining, '}')
             else:
-                m = re.match(r'switch\s*\(', remaining)  # FIXME
-                if m:
-                    switch_val, remaining = self._separate_at_paren(remaining[m.end() - 1:], ')')
+                switch_m = re.match(r'switch\s*\(', remaining)  # FIXME
+                if switch_m:
+                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
                     body, expr = self._separate_at_paren(remaining, '}')
                     body = 'switch(%s){%s}' % (switch_val, body)
                 else:
                     body, expr = remaining, ''
             start, cndn, increment = self._separate(constructor, ';')
-            if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]:
-                raise_constructor_error(constructor)
+            self.interpret_expression(start, local_vars, allow_recursion)
             while True:
-                if not self.interpret_expression(cndn, local_vars, allow_recursion):
+                if not _ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
                     break
                 try:
-                    ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1)
+                    ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
                     if should_abort:
-                        return ret
+                        return ret, True
                 except JS_Break:
                     break
                 except JS_Continue:
                     pass
-                if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]:
-                    raise_constructor_error(constructor)
-            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+                self.interpret_expression(increment, local_vars, allow_recursion)
+            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            return ret, should_abort or should_return
 
         elif md.get('switch'):
             switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
@@ -245,7 +340,7 @@ class JSInterpreter(object):
             for default in (False, True):
                 matched = False
                 for item in items:
-                    case, stmt = [i.strip() for i in self._separate(item, ':', 1)]
+                    case, stmt = (i.strip() for i in self._separate(item, ':', 1))
                     if default:
                         matched = matched or case == 'default'
                     elif not matched:
@@ -254,24 +349,28 @@ class JSInterpreter(object):
                     if not matched:
                         continue
                     try:
-                        ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1)
+                        ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion)
                         if should_abort:
                             return ret
                     except JS_Break:
                         break
                 if matched:
                     break
-            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            return ret, should_abort or should_return
 
         # Comma separated statements
         sub_expressions = list(self._separate(expr))
-        expr = sub_expressions.pop().strip() if sub_expressions else ''
-        for sub_expr in sub_expressions:
-            self.interpret_expression(sub_expr, local_vars, allow_recursion)
+        if len(sub_expressions) > 1:
+            for sub_expr in sub_expressions:
+                ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
+                if should_abort:
+                    return ret, True
+            return ret, False
 
         for m in re.finditer(r'''(?x)
-                (?P<pre_sign>\+\+|--)(?P<var1>%(_NAME_RE)s)|
-                (?P<var2>%(_NAME_RE)s)(?P<post_sign>\+\+|--)''' % globals(), expr):
+                (?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
+                (?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)'''.format(**globals()), expr):
             var = m.group('var1') or m.group('var2')
             start, end = m.span()
             sign = m.group('pre_sign') or m.group('post_sign')
@@ -279,85 +378,87 @@ class JSInterpreter(object):
             local_vars[var] += 1 if sign[0] == '+' else -1
             if m.group('pre_sign'):
                 ret = local_vars[var]
-            expr = expr[:start] + json.dumps(ret) + expr[end:]
-
-        for op, opfunc in _ASSIGN_OPERATORS:
-            m = re.match(r'''(?x)
-                (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
-                \s*%s
-                (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
-            if not m:
-                continue
-            right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
-
-            if m.groupdict().get('index'):
-                lvar = local_vars[m.group('out')]
-                idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
-                if not isinstance(idx, int):
-                    raise ExtractorError('List indices must be integers: %s' % (idx, ))
-                cur = lvar[idx]
-                val = opfunc(cur, right_val)
-                lvar[idx] = val
-                return val
-            else:
-                cur = local_vars.get(m.group('out'))
-                val = opfunc(cur, right_val)
-                local_vars[m.group('out')] = val
-                return val
+            expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
 
-        if expr.isdigit():
-            return int(expr)
-
-        if expr == 'break':
+        if not expr:
+            return None, should_return
+
+        m = re.match(r'''(?x)
+            (?P<assign>
+                (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
+                (?P<op>{_OPERATOR_RE})?
+                =(?P<expr>.*)$
+            )|(?P<return>
+                (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
+            )|(?P<indexing>
+                (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
+            )|(?P<attribute>
+                (?P<var>{_NAME_RE})(?:\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
+            )|(?P<function>
+                (?P<fname>{_NAME_RE})\((?P<args>.*)\)$
+            )'''.format(**globals()), expr)
+        md = m.groupdict() if m else {}
+        if md.get('assign'):
+            left_val = local_vars.get(m.group('out'))
+
+            if not m.group('index'):
+                local_vars[m.group('out')] = self._operator(
+                    m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
+                return local_vars[m.group('out')], should_return
+            elif left_val is None:
+                raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
+
+            idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
+            if not isinstance(idx, (int, float)):
+                raise self.Exception('List index %s must be integer' % (idx, ), expr=expr)
+            idx = int(idx)
+            left_val[idx] = self._operator(
+                m.group('op'), left_val[idx], m.group('expr'), expr, local_vars, allow_recursion)
+            return left_val[idx], should_return
+
+        elif expr.isdigit():
+            return int(expr), should_return
+
+        elif expr == 'break':
             raise JS_Break()
         elif expr == 'continue':
             raise JS_Continue()
 
-        var_m = re.match(
-            r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE,
-            expr)
-        if var_m:
-            return local_vars[var_m.group('name')]
+        elif md.get('return'):
+            return local_vars[m.group('name')], should_return
 
         try:
-            return json.loads(expr)
+            ret = json.loads(js_to_json(expr))  # strict=True)
+            if not md.get('attribute'):
+                return ret, should_return
         except ValueError:
             pass
 
-        m = re.match(
-            r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
-        if m:
+        if md.get('indexing'):
             val = local_vars[m.group('in')]
             idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
-            return val[idx]
-
-        def raise_expr_error(where, op, exp):
-            raise ExtractorError('Premature {0} return of {1} in {2!r}'.format(where, op, exp))
+            return self._index(val, idx), should_return
 
-        for op, opfunc in _OPERATORS:
-            separated = list(self._separate(expr, op))
+        for op, _ in self._all_operators():
+            # hackety: </> have higher priority than <</>>, but don't confuse them
+            skip_delim = (op + op) if op in ('<', '>') else None
+            separated = list(self._separate(expr, op, skip_delims=skip_delim))
             if len(separated) < 2:
                 continue
-            right_val = separated.pop()
-            left_val = op.join(separated)
-            left_val, should_abort = self.interpret_statement(
-                left_val, local_vars, allow_recursion - 1)
-            if should_abort:
-                raise_expr_error('left-side', op, expr)
-            right_val, should_abort = self.interpret_statement(
-                right_val, local_vars, allow_recursion - 1)
-            if should_abort:
-                raise_expr_error('right-side', op, expr)
-            return opfunc(left_val or 0, right_val)
 
-        m = re.match(
-            r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE,
-            expr)
-        if m:
-            variable = m.group('var')
-            nl = Nonlocal()
+            right_expr = separated.pop()
+            while op == '-' and len(separated) > 1 and not separated[-1].strip():
+                right_expr = '-' + right_expr
+                separated.pop()
+            left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
+            return self._operator(op, 0 if left_val is None else left_val,
+                                  right_expr, expr, local_vars, allow_recursion), should_return
 
-            nl.member = remove_quotes(m.group('member') or m.group('member2'))
+        if md.get('attribute'):
+            variable = m.group('var')
+            member = m.group('member')
+            if not member:
+                member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
             arg_str = expr[m.end():]
             if arg_str.startswith('('):
                 arg_str, remaining = self._separate_at_paren(arg_str, ')')
@@ -367,25 +468,24 @@ class JSInterpreter(object):
             def assertion(cndn, msg):
                 """ assert, but without risk of getting optimized out """
                 if not cndn:
-                    raise ExtractorError('{0} {1}: {2}'.format(nl.member, msg, expr))
+                    raise ExtractorError('{member} {msg}'.format(**locals()), expr=expr)
 
             def eval_method():
-                # nonlocal member
-                member = nl.member
-                if variable == 'String':
-                    obj = compat_str
-                elif variable in local_vars:
-                    obj = local_vars[variable]
-                else:
+                if (variable, member) == ('console', 'debug'):
+                    return
+                types = {
+                    'String': compat_str,
+                    'Math': float,
+                }
+                obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
+                if obj is NO_DEFAULT:
                     if variable not in self._objects:
                         self._objects[variable] = self.extract_object(variable)
                     obj = self._objects[variable]
 
+                # Member access
                 if arg_str is None:
-                    # Member access
-                    if member == 'length':
-                        return len(obj)
-                    return obj[member]
+                    return self._index(obj, member)
 
                 # Function call
                 argvals = [
@@ -396,12 +496,17 @@ class JSInterpreter(object):
                     if member == 'fromCharCode':
                         assertion(argvals, 'takes one or more arguments')
                         return ''.join(map(chr, argvals))
-                    raise ExtractorError('Unsupported string method %s' % (member, ))
+                    raise self.Exception('Unsupported string method ' + member, expr=expr)
+                elif obj == float:
+                    if member == 'pow':
+                        assertion(len(argvals) == 2, 'takes two arguments')
+                        return argvals[0] ** argvals[1]
+                    raise self.Exception('Unsupported Math method ' + member, expr=expr)
 
                 if member == 'split':
                     assertion(argvals, 'takes one or more arguments')
-                    assertion(argvals == [''], 'with arguments is not implemented')
-                    return list(obj)
+                    assertion(len(argvals) == 1, 'with limit argument is not implemented')
+                    return obj.split(argvals[0]) if argvals[0] else list(obj)
                 elif member == 'join':
                     assertion(isinstance(obj, list), 'must be applied on a list')
                     assertion(len(argvals) == 1, 'takes exactly one argument')
@@ -447,7 +552,7 @@ class JSInterpreter(object):
                     assertion(argvals, 'takes one or more arguments')
                     assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
                     f, this = (argvals + [''])[:2]
-                    return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)]
+                    return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
                 elif member == 'indexOf':
                     assertion(argvals, 'takes one or more arguments')
                     assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
@@ -457,32 +562,35 @@ class JSInterpreter(object):
                     except ValueError:
                         return -1
 
-                if isinstance(obj, list):
-                    member = int(member)
-                    nl.member = member
-                return obj[member](argvals)
+                idx = int(member) if isinstance(obj, list) else member
+                return obj[idx](argvals, allow_recursion=allow_recursion)
 
             if remaining:
-                return self.interpret_expression(
+                ret, should_abort = self.interpret_statement(
                     self._named_object(local_vars, eval_method()) + remaining,
                     local_vars, allow_recursion)
+                return ret, should_return or should_abort
             else:
-                return eval_method()
+                return eval_method(), should_return
 
-        m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
-        if m:
-            fname = m.group('func')
-            argvals = tuple([
-                int(v) if v.isdigit() else local_vars[v]
-                for v in self._separate(m.group('args'))])
+        elif md.get('function'):
+            fname = m.group('fname')
+            argvals = [self.interpret_expression(v, local_vars, allow_recursion)
+                       for v in self._separate(m.group('args'))]
             if fname in local_vars:
-                return local_vars[fname](argvals)
+                return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return
             elif fname not in self._functions:
                 self._functions[fname] = self.extract_function(fname)
-            return self._functions[fname](argvals)
+            return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return
 
-        if expr:
-            raise ExtractorError('Unsupported JS expression %r' % expr)
+        raise self.Exception(
+            'Unsupported JS expression ' + (expr[:40] if expr != stmt else ''), expr=stmt)
+
+    def interpret_expression(self, expr, local_vars, allow_recursion):
+        ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion)
+        if should_return:
+            raise self.Exception('Cannot return from an expression', expr)
+        return ret
 
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
@@ -494,15 +602,17 @@ class JSInterpreter(object):
                 }\s*;
             ''' % (re.escape(objname), _FUNC_NAME_RE),
             self.code)
+        if not obj_m:
+            raise self.Exception('Could not find object ' + objname)
         fields = obj_m.group('fields')
         # Currently, it only supports function definitions
         fields_m = re.finditer(
             r'''(?x)
-                (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
-            ''' % _FUNC_NAME_RE,
+                (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
+            ''' % (_FUNC_NAME_RE, _NAME_RE),
             fields)
         for f in fields_m:
-            argnames = f.group('args').split(',')
+            argnames = self.build_arglist(f.group('args'))
             obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
 
         return obj
@@ -510,15 +620,19 @@ class JSInterpreter(object):
     def extract_function_code(self, funcname):
         """ @returns argnames, code """
         func_m = re.search(
-            r'''(?x)
-                (?:function\s+%(f_n)s|[{;,]\s*%(f_n)s\s*=\s*function|var\s+%(f_n)s\s*=\s*function)\s*
+            r'''(?xs)
+                (?:
+                    function\s+%(name)s|
+                    [{;,]\s*%(name)s\s*=\s*function|
+                    (?:var|const|let)\s+%(name)s\s*=\s*function
+                )\s*
                 \((?P<args>[^)]*)\)\s*
-                (?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % {'f_n': re.escape(funcname), },
+                (?P<code>{.+})''' % {'name': re.escape(funcname)},
             self.code)
         code, _ = self._separate_at_paren(func_m.group('code'), '}')  # refine the match
         if func_m is None:
-            raise ExtractorError('Could not find JS function %r' % funcname)
-        return func_m.group('args').split(','), code
+            raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
+        return self.build_arglist(func_m.group('args')), code
 
     def extract_function(self, funcname):
         return self.extract_function_from_code(*self.extract_function_code(funcname))
@@ -534,7 +648,7 @@ class JSInterpreter(object):
             name = self._named_object(
                 local_vars,
                 self.extract_function_from_code(
-                    [x.strip() for x in mobj.group('args').split(',')],
+                    self.build_arglist(mobj.group('args')),
                     body, local_vars, *global_stack))
             code = code[:start] + name + remaining
         return self.build_function(argnames, code, local_vars, *global_stack)
@@ -542,17 +656,22 @@ class JSInterpreter(object):
     def call_function(self, funcname, *args):
         return self.extract_function(funcname)(args)
 
+    @classmethod
+    def build_arglist(cls, arg_text):
+        if not arg_text:
+            return []
+        return list(filter(None, (x.strip() or None for x in cls._separate(arg_text))))
+
     def build_function(self, argnames, code, *global_stack):
         global_stack = list(global_stack) or [{}]
-        local_vars = global_stack.pop(0)
-
-        def resf(args, **kwargs):
-            local_vars.update(dict(zip(argnames, args)))
-            local_vars.update(kwargs)
-            var_stack = LocalNameSpace(local_vars, *global_stack)
-            for stmt in self._separate(code.replace('\n', ''), ';'):
-                ret, should_abort = self.interpret_statement(stmt, var_stack)
-                if should_abort:
-                    break
-            return ret
+        argnames = tuple(argnames)
+
+        def resf(args, kwargs={}, allow_recursion=100):
+            global_stack[0].update(
+                zip_longest(argnames, args, fillvalue=None))
+            global_stack[0].update(kwargs)
+            var_stack = LocalNameSpace(*global_stack)
+            ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1)
+            if should_abort:
+                return ret
         return resf
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4e00317f1..a5f584ec5 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1696,6 +1696,17 @@ MONTH_NAMES = {
         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 }
 
+# Timezone names for RFC2822 obs-zone
+# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
+TIMEZONE_NAMES = {
+    'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
+    'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
+    'EST': -5, 'EDT': -4,  # Eastern
+    'CST': -6, 'CDT': -5,  # Central
+    'MST': -7, 'MDT': -6,  # Mountain
+    'PST': -8, 'PDT': -7   # Pacific
+}
+
 KNOWN_EXTENSIONS = (
     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
     'flv', 'f4v', 'f4a', 'f4b',
@@ -1735,12 +1746,17 @@ DATE_FORMATS = (
     '%b %dth %Y %I:%M',
     '%Y %m %d',
     '%Y-%m-%d',
+    '%Y.%m.%d.',
     '%Y/%m/%d',
     '%Y/%m/%d %H:%M',
     '%Y/%m/%d %H:%M:%S',
+    '%Y%m%d%H%M',
+    '%Y%m%d%H%M%S',
+    '%Y%m%d',
     '%Y-%m-%d %H:%M',
     '%Y-%m-%d %H:%M:%S',
     '%Y-%m-%d %H:%M:%S.%f',
+    '%Y-%m-%d %H:%M:%S:%f',
     '%d.%m.%Y %H:%M',
     '%d.%m.%Y %H.%M',
     '%Y-%m-%dT%H:%M:%SZ',
@@ -1753,6 +1769,7 @@ DATE_FORMATS = (
     '%b %d %Y at %H:%M:%S',
     '%B %d %Y at %H:%M',
     '%B %d %Y at %H:%M:%S',
+    '%H:%M %d-%b-%Y',
 )
 
 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -1763,6 +1780,7 @@ DATE_FORMATS_DAY_FIRST.extend([
     '%d/%m/%Y',
     '%d/%m/%y',
     '%d/%m/%Y %H:%M:%S',
+    '%d-%m-%Y %H:%M',
 ])
 
 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
@@ -2966,10 +2984,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
 def extract_timezone(date_str):
     m = re.search(
-        r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
-        date_str)
+        r'''(?x)
+            ^.{8,}?                                              # >=8 char non-TZ prefix, if present
+            (?P<tz>Z|                                            # just the UTC Z, or
+                (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
+                   (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
+                   [ ]?                                          # optional space
+                (?P<sign>\+|-)                                   # +/-
+                (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
+            $)
+        ''', date_str)
     if not m:
-        timezone = datetime.timedelta()
+        m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+        timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
+        if timezone is not None:
+            date_str = date_str[:-len(m.group('tz'))]
+        timezone = datetime.timedelta(hours=timezone or 0)
     else:
         date_str = date_str[:-len(m.group('tz'))]
         if not m.group('sign'):
@@ -3037,7 +3067,8 @@ def unified_timestamp(date_str, day_first=True):
     if date_str is None:
         return None
 
-    date_str = re.sub(r'[,|]', '', date_str)
+    date_str = re.sub(r'\s+', ' ', re.sub(
+        r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
 
     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
     timezone, date_str = extract_timezone(date_str)
@@ -3063,7 +3094,7 @@ def unified_timestamp(date_str, day_first=True):
             pass
     timetuple = email.utils.parsedate_tz(date_str)
     if timetuple:
-        return calendar.timegm(timetuple) + pm_delta * 3600
+        return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
 
 
 def determine_ext(url, default_ext='unknown_video'):
@@ -3673,13 +3704,11 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
     if get_attr:
         if v is not None:
             v = getattr(v, get_attr, None)
-    if v == '':
-        v = None
-    if v is None:
+    if v in (None, ''):
         return default
     try:
         return int(v) * invscale // scale
-    except (ValueError, TypeError):
+    except (ValueError, TypeError, OverflowError):
         return default
 
 

From e52e8b8111cf7ca27daef184bacd926865e951b1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 15 Aug 2022 16:45:04 +0100
Subject: [PATCH 15/78] [postprocessor] Don't replace existing value with null
 metadata parsed from title

---
 youtube_dl/postprocessor/metadatafromtitle.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
index f5c14d974..6cd5bb70f 100644
--- a/youtube_dl/postprocessor/metadatafromtitle.py
+++ b/youtube_dl/postprocessor/metadatafromtitle.py
@@ -40,6 +40,8 @@ class MetadataFromTitlePP(PostProcessor):
                 % self._titleformat)
             return [], info
         for attribute, value in match.groupdict().items():
+            if value is None:
+                continue
             info[attribute] = value
             self._downloader.to_screen(
                 '[fromtitle] parsed %s: %s'

From b0a60ce2032172aeaaf27fe3866ab72768f10cb2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 17 Aug 2022 14:22:02 +0100
Subject: [PATCH 16/78] [jsinterp] Improve JS language support (#31175)

* operator ??
* operator ?.
* operator **
* accurate operator functions
* `undefined` handling
* object literals {a: 1, "b": expr}
* more tests for weird JS comparisons: see https://github.com/ytdl-org/youtube-dl/issues/31173#issuecomment-1217854397.
---
 test/test_jsinterp.py          | 114 ++++++++++++++++++++
 test/test_youtube_signature.py |   4 +
 youtube_dl/jsinterp.py         | 189 ++++++++++++++++++++++++++-------
 3 files changed, 267 insertions(+), 40 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index c6c931743..328941e09 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -8,7 +8,10 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import math
+
 from youtube_dl.jsinterp import JSInterpreter
+undefined = JSInterpreter.undefined
 
 
 class TestJSInterpreter(unittest.TestCase):
@@ -48,6 +51,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return 1 << 5;}')
         self.assertEqual(jsi.call_function('f'), 32)
 
+        jsi = JSInterpreter('function f(){return 2 ** 5}')
+        self.assertEqual(jsi.call_function('f'), 32)
+
         jsi = JSInterpreter('function f(){return 19 & 21;}')
         self.assertEqual(jsi.call_function('f'), 17)
 
@@ -57,6 +63,15 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
         self.assertEqual(jsi.call_function('f'), 5)
 
+        jsi = JSInterpreter('function f(){return 1 == 2}')
+        self.assertEqual(jsi.call_function('f'), False)
+
+        jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
+        self.assertEqual(jsi.call_function('f'), 2)
+
+        jsi = JSInterpreter('function f(){return 0 ?? 42;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+
     def test_array_access(self):
         jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
         self.assertEqual(jsi.call_function('f'), [5, 2, 7])
@@ -203,6 +218,11 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 7)
 
+        jsi = JSInterpreter('''
+        function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+
     def test_void(self):
         jsi = JSInterpreter('''
         function x() { return void 42; }
@@ -215,6 +235,100 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x')([]), 1)
 
+    def test_null(self):
+        jsi = JSInterpreter('''
+        function x() { return null; }
+        ''')
+        self.assertIs(jsi.call_function('x'), None)
+
+        jsi = JSInterpreter('''
+        function x() { return [null > 0, null < 0, null == 0, null === 0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False, False, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [null >= 0, null <= 0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [True, True])
+
+    def test_undefined(self):
+        jsi = JSInterpreter('''
+        function x() { return undefined === undefined; }
+        ''')
+        self.assertTrue(jsi.call_function('x'))
+
+        jsi = JSInterpreter('''
+        function x() { return undefined; }
+        ''')
+        self.assertIs(jsi.call_function('x'), undefined)
+
+        jsi = JSInterpreter('''
+        function x() { let v; return v; }
+        ''')
+        self.assertIs(jsi.call_function('x'), undefined)
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [True, True, False, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False, False, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined >= 0, undefined <= 0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False, True, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, True, False, False])
+
+        jsi = JSInterpreter('''
+        function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
+        ''')
+        for y in jsi.call_function('x'):
+            self.assertTrue(math.isnan(y))
+
+        jsi = JSInterpreter('''
+        function x() { let v; return v**0; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 1)
+
+        jsi = JSInterpreter('''
+        function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False, undefined, undefined])
+
+        jsi = JSInterpreter('function x(){return undefined ?? 42; }')
+        self.assertEqual(jsi.call_function('x'), 42)
+
+    def test_object(self):
+        jsi = JSInterpreter('''
+        function x() { return {}; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), {})
+        jsi = JSInterpreter('''
+        function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [42, 0])
+        jsi = JSInterpreter('''
+        function x() { let a; return a?.qq; }
+        ''')
+        self.assertIs(jsi.call_function('x'), undefined)
+        jsi = JSInterpreter('''
+        function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
+        ''')
+        self.assertIs(jsi.call_function('x'), undefined)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 6e955e0f0..4d756dad3 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -102,6 +102,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
         'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
     ),
+    (
+        'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
+        'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index c60a9b3c2..8e119d08a 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -7,7 +7,6 @@ import operator
 import re
 
 from .utils import (
-    NO_DEFAULT,
     ExtractorError,
     js_to_json,
     remove_quotes,
@@ -21,6 +20,70 @@ from .compat import (
 
 _NAME_RE = r'[a-zA-Z_$][\w$]*'
 
+_UNDEFINED = object()
+
+
+def _js_bit_op(op):
+
+    def wrapped(a, b):
+        def zeroise(x):
+            return 0 if x in (None, _UNDEFINED) else x
+        return op(zeroise(a), zeroise(b))
+
+    return wrapped
+
+
+def _js_arith_op(op):
+
+    def wrapped(a, b):
+        if _UNDEFINED in (a, b):
+            return float('nan')
+        return op(a or 0, b or 0)
+
+    return wrapped
+
+
+def _js_div(a, b):
+    if _UNDEFINED in (a, b) or not (a and b):
+        return float('nan')
+    return float('inf') if not b else operator.truediv(a or 0, b)
+
+
+def _js_mod(a, b):
+    if _UNDEFINED in (a, b) or not b:
+        return float('nan')
+    return (a or 0) % b
+
+
+def _js_exp(a, b):
+    if not b:
+        # even 0 ** 0 !!
+        return 1
+    if _UNDEFINED in (a, b):
+        return float('nan')
+    return (a or 0) ** b
+
+
+def _js_eq_op(op):
+
+    def wrapped(a, b):
+        if set((a, b)) <= set((None, _UNDEFINED)):
+            return op(a, a)
+        return op(a, b)
+
+    return wrapped
+
+
+def _js_comp_op(op):
+
+    def wrapped(a, b):
+        if _UNDEFINED in (a, b):
+            return False
+        return op(a or 0, b or 0)
+
+    return wrapped
+
+
 # (op, definition) in order of binding priority, tightest first
 # avoid dict to maintain order
 # definition None => Defined in JSInterpreter._operator
@@ -30,40 +93,38 @@ _DOT_OPERATORS = (
 )
 
 _OPERATORS = (
-    ('|', operator.or_),
-    ('^', operator.xor),
-    ('&', operator.and_),
-    ('>>', operator.rshift),
-    ('<<', operator.lshift),
-    ('+', operator.add),
-    ('-', operator.sub),
-    ('*', operator.mul),
-    ('/', operator.truediv),
-    ('%', operator.mod),
+    ('>>', _js_bit_op(operator.rshift)),
+    ('<<', _js_bit_op(operator.lshift)),
+    ('+', _js_arith_op(operator.add)),
+    ('-', _js_arith_op(operator.sub)),
+    ('*', _js_arith_op(operator.mul)),
+    ('/', _js_div),
+    ('%', _js_mod),
+    ('**', _js_exp),
 )
 
 _COMP_OPERATORS = (
     ('===', operator.is_),
-    ('==', operator.eq),
+    ('==', _js_eq_op(operator.eq)),
     ('!==', operator.is_not),
-    ('!=', operator.ne),
-    ('<=', operator.le),
-    ('>=', operator.ge),
-    ('<', operator.lt),
-    ('>', operator.gt),
+    ('!=', _js_eq_op(operator.ne)),
+    ('<=', _js_comp_op(operator.le)),
+    ('>=', _js_comp_op(operator.ge)),
+    ('<', _js_comp_op(operator.lt)),
+    ('>', _js_comp_op(operator.gt)),
 )
 
 _LOG_OPERATORS = (
-    ('&', operator.and_),
-    ('|', operator.or_),
-    ('^', operator.xor),
+    ('|', _js_bit_op(operator.or_)),
+    ('^', _js_bit_op(operator.xor)),
+    ('&', _js_bit_op(operator.and_)),
 )
 
 _SC_OPERATORS = (
     ('?', None),
+    ('??', None),
     ('||', None),
     ('&&', None),
-    # TODO: ('??', None),
 )
 
 _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
@@ -74,7 +135,7 @@ _QUOTES = '\'"'
 
 def _ternary(cndn, if_true=True, if_false=False):
     """Simulate JS's ternary operator (cndn?if_true:if_false)"""
-    if cndn in (False, None, 0, ''):
+    if cndn in (False, None, 0, '', _UNDEFINED):
         return if_false
     try:
         if math.isnan(cndn):  # NB: NaN cannot be checked by membership
@@ -95,6 +156,12 @@ class JS_Continue(ExtractorError):
 
 
 class LocalNameSpace(ChainMap):
+    def __getitem__(self, key):
+        try:
+            return super(LocalNameSpace, self).__getitem__(key)
+        except KeyError:
+            return _UNDEFINED
+
     def __setitem__(self, key, value):
         for scope in self.maps:
             if key in scope:
@@ -105,6 +172,13 @@ class LocalNameSpace(ChainMap):
     def __delitem__(self, key):
         raise NotImplementedError('Deleting is not supported')
 
+    def __contains__(self, key):
+        try:
+            super(LocalNameSpace, self).__getitem__(key)
+            return True
+        except KeyError:
+            return False
+
     def __repr__(self):
         return 'LocalNameSpace%s' % (self.maps, )
 
@@ -112,6 +186,8 @@ class LocalNameSpace(ChainMap):
 class JSInterpreter(object):
     __named_object_counter = 0
 
+    undefined = _UNDEFINED
+
     def __init__(self, code, objects=None):
         self.code, self._functions = code, {}
         self._objects = {} if objects is None else objects
@@ -185,12 +261,16 @@ class JSInterpreter(object):
     @staticmethod
     def _all_operators():
         return itertools.chain(
+            # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
             _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
 
     def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
         if op in ('||', '&&'):
             if (op == '&&') ^ _ternary(left_val):
                 return left_val  # short circuiting
+        elif op == '??':
+            if left_val not in (None, self.undefined):
+                return left_val
         elif op == '?':
             right_expr = _ternary(left_val, *self._separate(right_expr, ':', 1))
 
@@ -204,12 +284,14 @@ class JSInterpreter(object):
         except Exception as e:
             raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
 
-    def _index(self, obj, idx):
+    def _index(self, obj, idx, allow_undefined=False):
         if idx == 'length':
             return len(obj)
         try:
             return obj[int(idx)] if isinstance(obj, list) else obj[idx]
         except Exception as e:
+            if allow_undefined:
+                return self.undefined
             raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
 
     def _dump(self, obj, namespace):
@@ -249,8 +331,8 @@ class JSInterpreter(object):
             obj = expr[4:]
             if obj.startswith('Date('):
                 left, right = self._separate_at_paren(obj[4:], ')')
-                left = self.interpret_expression(left, local_vars, allow_recursion)
-                expr = unified_timestamp(left, False)
+                expr = unified_timestamp(
+                    self.interpret_expression(left, local_vars, allow_recursion), False)
                 if not expr:
                     raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
                 expr = self._dump(int(expr * 1000), local_vars) + right
@@ -263,6 +345,14 @@ class JSInterpreter(object):
 
         if expr.startswith('{'):
             inner, outer = self._separate_at_paren(expr, '}')
+            # try for object expression
+            sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
+            if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
+                return dict(
+                    (key_expr if re.match(_NAME_RE, key_expr) else key_expr,
+                     self.interpret_expression(val_expr, local_vars, allow_recursion))
+                    for key_expr, val_expr in sub_expressions), should_return
+            # or statement list
             inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
                 return inner, should_abort or should_return
@@ -387,13 +477,13 @@ class JSInterpreter(object):
             (?P<assign>
                 (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
                 (?P<op>{_OPERATOR_RE})?
-                =(?P<expr>.*)$
+                =(?!=)(?P<expr>.*)$
             )|(?P<return>
                 (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
             )|(?P<indexing>
                 (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
             )|(?P<attribute>
-                (?P<var>{_NAME_RE})(?:\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
+                (?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
             )|(?P<function>
                 (?P<fname>{_NAME_RE})\((?P<args>.*)\)$
             )'''.format(**globals()), expr)
@@ -405,7 +495,7 @@ class JSInterpreter(object):
                 local_vars[m.group('out')] = self._operator(
                     m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
                 return local_vars[m.group('out')], should_return
-            elif left_val is None:
+            elif left_val in (None, self.undefined):
                 raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
 
             idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
@@ -424,6 +514,9 @@ class JSInterpreter(object):
         elif expr == 'continue':
             raise JS_Continue()
 
+        elif expr == 'undefined':
+            return self.undefined, should_return
+
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
 
@@ -441,7 +534,9 @@ class JSInterpreter(object):
 
         for op, _ in self._all_operators():
             # hackety: </> have higher priority than <</>>, but don't confuse them
-            skip_delim = (op + op) if op in ('<', '>') else None
+            skip_delim = (op + op) if op in '<>*?' else None
+            if op == '?':
+                skip_delim = (skip_delim, '?.')
             separated = list(self._separate(expr, op, skip_delims=skip_delim))
             if len(separated) < 2:
                 continue
@@ -451,12 +546,10 @@ class JSInterpreter(object):
                 right_expr = '-' + right_expr
                 separated.pop()
             left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
-            return self._operator(op, 0 if left_val is None else left_val,
-                                  right_expr, expr, local_vars, allow_recursion), should_return
+            return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
 
         if md.get('attribute'):
-            variable = m.group('var')
-            member = m.group('member')
+            variable, member, nullish = m.group('var', 'member', 'nullish')
             if not member:
                 member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
             arg_str = expr[m.end():]
@@ -477,15 +570,24 @@ class JSInterpreter(object):
                     'String': compat_str,
                     'Math': float,
                 }
-                obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
-                if obj is NO_DEFAULT:
-                    if variable not in self._objects:
-                        self._objects[variable] = self.extract_object(variable)
-                    obj = self._objects[variable]
+                obj = local_vars.get(variable)
+                if obj in (self.undefined, None):
+                    obj = types.get(variable, self.undefined)
+                if obj is self.undefined:
+                    try:
+                        if variable not in self._objects:
+                            self._objects[variable] = self.extract_object(variable)
+                        obj = self._objects[variable]
+                    except self.Exception:
+                        if not nullish:
+                            raise
+
+                if nullish and obj is self.undefined:
+                    return self.undefined
 
                 # Member access
                 if arg_str is None:
-                    return self._index(obj, member)
+                    return self._index(obj, member, nullish)
 
                 # Function call
                 argvals = [
@@ -660,7 +762,14 @@ class JSInterpreter(object):
     def build_arglist(cls, arg_text):
         if not arg_text:
             return []
-        return list(filter(None, (x.strip() or None for x in cls._separate(arg_text))))
+
+        def valid_arg(y):
+            y = y.strip()
+            if not y:
+                raise cls.Exception('Missing arg in "%s"' % (arg_text, ))
+            return y
+
+        return [valid_arg(x) for x in cls._separate(arg_text)]
 
     def build_function(self, argnames, code, *global_stack):
         global_stack = list(global_stack) or [{}]

From 538ec65ba7634bb9ad9f8eb4ce72713c673969dc Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 Aug 2022 11:45:04 +0100
Subject: [PATCH 17/78] [jsinterp] Handle regexp literals and throw/catch
 execution (#31182)

* based on https://github.com/yt-dlp/yt-dlp/commit/f6ca640b122239d5ab215f8c2564efb7ac3e8c65, thanks pukkandan
* adds parse support for regexp flags
---
 test/test_jsinterp.py          |  21 +++++
 test/test_youtube_signature.py |   4 +
 youtube_dl/jsinterp.py         | 136 +++++++++++++++++++++++++++------
 3 files changed, 139 insertions(+), 22 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 328941e09..faddf00d5 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -9,6 +9,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import math
+import re
 
 from youtube_dl.jsinterp import JSInterpreter
 undefined = JSInterpreter.undefined
@@ -316,19 +317,39 @@ class TestJSInterpreter(unittest.TestCase):
         function x() { return {}; }
         ''')
         self.assertEqual(jsi.call_function('x'), {})
+
         jsi = JSInterpreter('''
         function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
         ''')
         self.assertEqual(jsi.call_function('x'), [42, 0])
+
         jsi = JSInterpreter('''
         function x() { let a; return a?.qq; }
         ''')
         self.assertIs(jsi.call_function('x'), undefined)
+
         jsi = JSInterpreter('''
         function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
         ''')
         self.assertIs(jsi.call_function('x'), undefined)
 
+    def test_regex(self):
+        jsi = JSInterpreter('''
+        function x() { let a=/,,[/,913,/](,)}/; }
+        ''')
+        self.assertIs(jsi.call_function('x'), None)
+
+        jsi = JSInterpreter('''
+        function x() { let a=/,,[/,913,/](,)}/; return a; }
+        ''')
+        # Pythons disagree on the type of a pattern
+        self.assertTrue(isinstance(jsi.call_function('x'), type(re.compile(''))))
+
+        jsi = JSInterpreter('''
+        function x() { let a=/,,[/,913,/](,)}/i; return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x').flags & re.I, re.I)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4d756dad3..43e22388d 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -106,6 +106,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
         'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
     ),
+    (
+        'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
+        'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 8e119d08a..48c27a1c0 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -7,6 +7,7 @@ import operator
 import re
 
 from .utils import (
+    error_to_compat_str,
     ExtractorError,
     js_to_json,
     remove_quotes,
@@ -130,7 +131,7 @@ _SC_OPERATORS = (
 _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
 
 _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
-_QUOTES = '\'"'
+_QUOTES = '\'"/'
 
 
 def _ternary(cndn, if_true=True, if_false=False):
@@ -155,6 +156,12 @@ class JS_Continue(ExtractorError):
         ExtractorError.__init__(self, 'Invalid continue')
 
 
+class JS_Throw(ExtractorError):
+    def __init__(self, e):
+        self.error = e
+        ExtractorError.__init__(self, 'Uncaught exception ' + error_to_compat_str(e))
+
+
 class LocalNameSpace(ChainMap):
     def __getitem__(self, key):
         try:
@@ -172,6 +179,17 @@ class LocalNameSpace(ChainMap):
     def __delitem__(self, key):
         raise NotImplementedError('Deleting is not supported')
 
+    # except
+    def pop(self, key, *args):
+        try:
+            off = self.__getitem__(key)
+            super(LocalNameSpace, self).__delitem__(key)
+            return off
+        except KeyError:
+            if len(args) > 0:
+                return args[0]
+            raise
+
     def __contains__(self, key):
         try:
             super(LocalNameSpace, self).__getitem__(key)
@@ -188,9 +206,29 @@ class JSInterpreter(object):
 
     undefined = _UNDEFINED
 
+    RE_FLAGS = {
+        # special knowledge: Python's re flags are bitmask values, current max 128
+        # invent new bitmask values well above that for literal parsing
+        # TODO: new pattern class to execute matches with these flags
+        'd': 1024,  # Generate indices for substring matches
+        'g': 2048,  # Global search
+        'i': re.I,  # Case-insensitive search
+        'm': re.M,  # Multi-line search
+        's': re.S,  # Allows . to match newline characters
+        'u': re.U,  # Treat a pattern as a sequence of unicode code points
+        'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
+    }
+
+    _EXC_NAME = '__youtube_dl_exception__'
+    _OBJ_NAME = '__youtube_dl_jsinterp_obj'
+
+    OP_CHARS = None
+
     def __init__(self, code, objects=None):
         self.code, self._functions = code, {}
         self._objects = {} if objects is None else objects
+        if type(self).OP_CHARS is None:
+            type(self).OP_CHARS = self.OP_CHARS = self.__op_chars()
 
     class Exception(ExtractorError):
         def __init__(self, msg, *args, **kwargs):
@@ -199,32 +237,64 @@ class JSInterpreter(object):
                 msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
+    @classmethod
+    def __op_chars(cls):
+        op_chars = set(';,')
+        for op in cls._all_operators():
+            for c in op[0]:
+                op_chars.add(c)
+        return op_chars
+
     def _named_object(self, namespace, obj):
         self.__named_object_counter += 1
-        name = '__youtube_dl_jsinterp_obj%d' % (self.__named_object_counter, )
+        name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
         namespace[name] = obj
         return name
 
-    @staticmethod
-    def _separate(expr, delim=',', max_split=None, skip_delims=None):
+    @classmethod
+    def _regex_flags(cls, expr):
+        flags = 0
+        if not expr:
+            return flags, expr
+        for idx, ch in enumerate(expr):
+            if ch not in cls.RE_FLAGS:
+                break
+            flags |= cls.RE_FLAGS[ch]
+        return flags, expr[idx:] if idx > 0 else expr
+
+    @classmethod
+    def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
-        start, splits, pos, skipping, delim_len = 0, 0, 0, 0, len(delim) - 1
-        in_quote, escaping = None, False
+        start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
+        in_quote, escaping, skipping = None, False, 0
+        after_op, in_regex_char_group, skip_re = True, False, 0
+
         for idx, char in enumerate(expr):
+            if skip_re > 0:
+                skip_re -= 1
+                continue
             if not in_quote:
                 if char in _MATCHING_PARENS:
                     counters[_MATCHING_PARENS[char]] += 1
                 elif char in counters:
                     counters[char] -= 1
-            if not escaping:
-                if char in _QUOTES and in_quote in (char, None):
-                    in_quote = None if in_quote else char
-                else:
-                    escaping = in_quote and char == '\\'
-            else:
-                escaping = False
+            if not escaping and char in _QUOTES and in_quote in (char, None):
+                if in_quote or after_op or char != '/':
+                    in_quote = None if in_quote and not in_regex_char_group else char
+                    if in_quote is None and char == '/' and delim != '/':
+                        # regexp flags
+                        n_idx = idx + 1
+                        while n_idx < len(expr) and expr[n_idx] in cls.RE_FLAGS:
+                            n_idx += 1
+                        skip_re = n_idx - idx - 1
+                        if skip_re > 0:
+                            continue
+            elif in_quote == '/' and char in '[]':
+                in_regex_char_group = char == '['
+            escaping = not escaping and in_quote and char == '\\'
+            after_op = not in_quote and char in cls.OP_CHARS or (char == ' ' and after_op)
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -313,16 +383,23 @@ class JSInterpreter(object):
             if should_return:
                 return ret, should_return
 
-        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|$)', stmt)
+        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
         if m:
             expr = stmt[len(m.group(0)):].strip()
+            if m.group('throw'):
+                raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
             should_return = not m.group('var')
         if not expr:
             return None, should_return
 
         if expr[0] in _QUOTES:
             inner, outer = self._separate(expr, expr[0], 1)
-            inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
+            if expr[0] == '/':
+                flags, _ = self._regex_flags(outer)
+                inner, outer = inner.replace('"', r'\"'), ''
+                inner = re.compile(js_to_json(inner + expr[0]), flags=flags)  # , strict=True))
+            else:
+                inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
             if not outer:
                 return inner, should_return
             expr = self._named_object(local_vars, inner) + outer
@@ -374,22 +451,37 @@ class JSInterpreter(object):
                 for item in self._separate(inner)])
             expr = name + outer
 
-        m = re.match(r'(?P<try>try|finally)\s*|(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
+        m = re.match(r'''(?x)
+            (?P<try>try|finally)\s*|
+            (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
+            (?P<switch>switch)\s*\(|
+            (?P<for>for)\s*\(|'''.format(**globals()), expr)
         md = m.groupdict() if m else {}
         if md.get('try'):
             if expr[m.end()] == '{':
                 try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
             else:
                 try_expr, expr = expr[m.end() - 1:], ''
-            ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
-            if should_abort:
-                return ret, True
+            try:
+                ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
+                if should_abort:
+                    return ret, True
+            except JS_Throw as e:
+                local_vars[self._EXC_NAME] = e.error
+            except Exception as e:
+                # XXX: This works for now, but makes debugging future issues very hard
+                local_vars[self._EXC_NAME] = e
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
             return ret, should_abort or should_return
 
         elif md.get('catch'):
-            # We ignore the catch block
-            _, expr = self._separate_at_paren(expr, '}')
+            catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
+            if self._EXC_NAME in local_vars:
+                catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
+                ret, should_abort = self.interpret_statement(catch_expr, catch_vars, allow_recursion)
+                if should_abort:
+                    return ret, True
+
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
             return ret, should_abort or should_return
 
@@ -503,7 +595,7 @@ class JSInterpreter(object):
                 raise self.Exception('List index %s must be integer' % (idx, ), expr=expr)
             idx = int(idx)
             left_val[idx] = self._operator(
-                m.group('op'), left_val[idx], m.group('expr'), expr, local_vars, allow_recursion)
+                m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
             return left_val[idx], should_return
 
         elif expr.isdigit():

From 46b8ae2f520c17aaa756082676788c6287b6809e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 Aug 2022 15:34:33 +0100
Subject: [PATCH 18/78] [jsinterp] Clean up and pull yt-dlp style * add
 compat_re_Pattern * improve compat_collections_chain_map * use class
 JS_Undefined * remove unused code

---
 test/test_jsinterp.py          |  20 +++---
 test/test_youtube_signature.py |   3 +-
 youtube_dl/compat.py           |  21 +++++-
 youtube_dl/jsinterp.py         | 123 ++++++++++++---------------------
 4 files changed, 77 insertions(+), 90 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index faddf00d5..96786a84c 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -11,8 +11,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
 import re
 
-from youtube_dl.jsinterp import JSInterpreter
-undefined = JSInterpreter.undefined
+from youtube_dl.compat import compat_re_Pattern
+
+from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
 
 
 class TestJSInterpreter(unittest.TestCase):
@@ -261,12 +262,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { return undefined; }
         ''')
-        self.assertIs(jsi.call_function('x'), undefined)
+        self.assertIs(jsi.call_function('x'), JS_Undefined)
 
         jsi = JSInterpreter('''
         function x() { let v; return v; }
         ''')
-        self.assertIs(jsi.call_function('x'), undefined)
+        self.assertIs(jsi.call_function('x'), JS_Undefined)
 
         jsi = JSInterpreter('''
         function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
@@ -307,7 +308,7 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
         ''')
-        self.assertEqual(jsi.call_function('x'), [False, False, undefined, undefined])
+        self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
 
         jsi = JSInterpreter('function x(){return undefined ?? 42; }')
         self.assertEqual(jsi.call_function('x'), 42)
@@ -326,12 +327,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a; return a?.qq; }
         ''')
-        self.assertIs(jsi.call_function('x'), undefined)
+        self.assertIs(jsi.call_function('x'), JS_Undefined)
 
         jsi = JSInterpreter('''
         function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
         ''')
-        self.assertIs(jsi.call_function('x'), undefined)
+        self.assertIs(jsi.call_function('x'), JS_Undefined)
 
     def test_regex(self):
         jsi = JSInterpreter('''
@@ -342,13 +343,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/; return a; }
         ''')
-        # Pythons disagree on the type of a pattern
-        self.assertTrue(isinstance(jsi.call_function('x'), type(re.compile(''))))
+        self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
 
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/i; return a; }
         ''')
-        self.assertEqual(jsi.call_function('x').flags & re.I, re.I)
+        self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
 
 if __name__ == '__main__':
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 43e22388d..327d4c40d 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -12,10 +12,11 @@ import io
 import re
 import string
 
+from youtube_dl.compat import compat_str, compat_urlretrieve
+
 from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.jsinterp import JSInterpreter
-from youtube_dl.compat import compat_str, compat_urlretrieve
 
 _SIG_TESTS = [
     (
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 6d2c31a61..3002109ca 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3023,18 +3023,34 @@ except ImportError:
             self.maps[0].__setitem__(k, v)
             return
 
-        def __delitem__(self, k):
+        def __contains__(self, k):
+            return any((k in m) for m in self.maps)
+
+        def __delitem(self, k):
             if k in self.maps[0]:
                 del self.maps[0][k]
                 return
             raise KeyError(k)
 
+        def __delitem__(self, k):
+            self.__delitem(k)
+
         def __iter__(self):
             return itertools.chain(*reversed(self.maps))
 
         def __len__(self):
             return len(iter(self))
 
+        # to match Py3, don't del directly
+        def pop(self, k, *args):
+            if self.__contains__(k):
+                off = self.__getitem__(k)
+                self.__delitem(k)
+                return off
+            elif len(args) > 0:
+                return args[0]
+            raise KeyError(k)
+
         def new_child(self, m=None, **kwargs):
             m = m or {}
             m.update(kwargs)
@@ -3044,6 +3060,8 @@ except ImportError:
         def parents(self):
             return compat_collections_chain_map(*(self.maps[1:]))
 
+# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
+compat_re_Pattern = type(re.compile(''))
 
 if sys.version_info < (3, 3):
     def compat_b64decode(s, *args, **kwargs):
@@ -3110,6 +3128,7 @@ __all__ = [
     'compat_os_name',
     'compat_parse_qs',
     'compat_print',
+    'compat_re_Pattern',
     'compat_realpath',
     'compat_setenv',
     'compat_shlex_quote',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 48c27a1c0..6719d0dfd 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -19,16 +19,12 @@ from .compat import (
     compat_str,
 )
 
-_NAME_RE = r'[a-zA-Z_$][\w$]*'
-
-_UNDEFINED = object()
-
 
 def _js_bit_op(op):
 
     def wrapped(a, b):
         def zeroise(x):
-            return 0 if x in (None, _UNDEFINED) else x
+            return 0 if x in (None, JS_Undefined) else x
         return op(zeroise(a), zeroise(b))
 
     return wrapped
@@ -37,7 +33,7 @@ def _js_bit_op(op):
 def _js_arith_op(op):
 
     def wrapped(a, b):
-        if _UNDEFINED in (a, b):
+        if JS_Undefined in (a, b):
             return float('nan')
         return op(a or 0, b or 0)
 
@@ -45,22 +41,21 @@ def _js_arith_op(op):
 
 
 def _js_div(a, b):
-    if _UNDEFINED in (a, b) or not (a and b):
+    if JS_Undefined in (a, b) or not (a and b):
         return float('nan')
     return float('inf') if not b else operator.truediv(a or 0, b)
 
 
 def _js_mod(a, b):
-    if _UNDEFINED in (a, b) or not b:
+    if JS_Undefined in (a, b) or not b:
         return float('nan')
     return (a or 0) % b
 
 
 def _js_exp(a, b):
     if not b:
-        # even 0 ** 0 !!
-        return 1
-    if _UNDEFINED in (a, b):
+        return 1  # even 0 ** 0 !!
+    elif JS_Undefined in (a, b):
         return float('nan')
     return (a or 0) ** b
 
@@ -68,7 +63,7 @@ def _js_exp(a, b):
 def _js_eq_op(op):
 
     def wrapped(a, b):
-        if set((a, b)) <= set((None, _UNDEFINED)):
+        if set((a, b)) <= set((None, JS_Undefined)):
             return op(a, a)
         return op(a, b)
 
@@ -78,21 +73,28 @@ def _js_eq_op(op):
 def _js_comp_op(op):
 
     def wrapped(a, b):
-        if _UNDEFINED in (a, b):
+        if JS_Undefined in (a, b):
             return False
         return op(a or 0, b or 0)
 
     return wrapped
 
 
+def _js_ternary(cndn, if_true=True, if_false=False):
+    """Simulate JS's ternary operator (cndn?if_true:if_false)"""
+    if cndn in (False, None, 0, '', JS_Undefined):
+        return if_false
+    try:
+        if math.isnan(cndn):  # NB: NaN cannot be checked by membership
+            return if_false
+    except TypeError:
+        pass
+    return if_true
+
+
 # (op, definition) in order of binding priority, tightest first
 # avoid dict to maintain order
 # definition None => Defined in JSInterpreter._operator
-_DOT_OPERATORS = (
-    ('.', None),
-    # TODO: ('?.', None),
-)
-
 _OPERATORS = (
     ('>>', _js_bit_op(operator.rshift)),
     ('<<', _js_bit_op(operator.lshift)),
@@ -130,20 +132,13 @@ _SC_OPERATORS = (
 
 _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
 
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
 _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
 _QUOTES = '\'"/'
 
 
-def _ternary(cndn, if_true=True, if_false=False):
-    """Simulate JS's ternary operator (cndn?if_true:if_false)"""
-    if cndn in (False, None, 0, '', _UNDEFINED):
-        return if_false
-    try:
-        if math.isnan(cndn):  # NB: NaN cannot be checked by membership
-            return if_false
-    except TypeError:
-        pass
-    return if_true
+class JS_Undefined(object):
+    pass
 
 
 class JS_Break(ExtractorError):
@@ -167,7 +162,7 @@ class LocalNameSpace(ChainMap):
         try:
             return super(LocalNameSpace, self).__getitem__(key)
         except KeyError:
-            return _UNDEFINED
+            return JS_Undefined
 
     def __setitem__(self, key, value):
         for scope in self.maps:
@@ -179,24 +174,6 @@ class LocalNameSpace(ChainMap):
     def __delitem__(self, key):
         raise NotImplementedError('Deleting is not supported')
 
-    # except
-    def pop(self, key, *args):
-        try:
-            off = self.__getitem__(key)
-            super(LocalNameSpace, self).__delitem__(key)
-            return off
-        except KeyError:
-            if len(args) > 0:
-                return args[0]
-            raise
-
-    def __contains__(self, key):
-        try:
-            super(LocalNameSpace, self).__getitem__(key)
-            return True
-        except KeyError:
-            return False
-
     def __repr__(self):
         return 'LocalNameSpace%s' % (self.maps, )
 
@@ -204,9 +181,7 @@ class LocalNameSpace(ChainMap):
 class JSInterpreter(object):
     __named_object_counter = 0
 
-    undefined = _UNDEFINED
-
-    RE_FLAGS = {
+    _RE_FLAGS = {
         # special knowledge: Python's re flags are bitmask values, current max 128
         # invent new bitmask values well above that for literal parsing
         # TODO: new pattern class to execute matches with these flags
@@ -257,10 +232,10 @@ class JSInterpreter(object):
         if not expr:
             return flags, expr
         for idx, ch in enumerate(expr):
-            if ch not in cls.RE_FLAGS:
+            if ch not in cls._RE_FLAGS:
                 break
-            flags |= cls.RE_FLAGS[ch]
-        return flags, expr[idx:] if idx > 0 else expr
+            flags |= cls._RE_FLAGS[ch]
+        return flags, expr[idx + 1:]
 
     @classmethod
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
@@ -283,14 +258,6 @@ class JSInterpreter(object):
             if not escaping and char in _QUOTES and in_quote in (char, None):
                 if in_quote or after_op or char != '/':
                     in_quote = None if in_quote and not in_regex_char_group else char
-                    if in_quote is None and char == '/' and delim != '/':
-                        # regexp flags
-                        n_idx = idx + 1
-                        while n_idx < len(expr) and expr[n_idx] in cls.RE_FLAGS:
-                            n_idx += 1
-                        skip_re = n_idx - idx - 1
-                        if skip_re > 0:
-                            continue
             elif in_quote == '/' and char in '[]':
                 in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
@@ -336,13 +303,13 @@ class JSInterpreter(object):
 
     def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
         if op in ('||', '&&'):
-            if (op == '&&') ^ _ternary(left_val):
+            if (op == '&&') ^ _js_ternary(left_val):
                 return left_val  # short circuiting
         elif op == '??':
-            if left_val not in (None, self.undefined):
+            if left_val not in (None, JS_Undefined):
                 return left_val
         elif op == '?':
-            right_expr = _ternary(left_val, *self._separate(right_expr, ':', 1))
+            right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
 
         right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
         opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
@@ -361,7 +328,7 @@ class JSInterpreter(object):
             return obj[int(idx)] if isinstance(obj, list) else obj[idx]
         except Exception as e:
             if allow_undefined:
-                return self.undefined
+                return JS_Undefined
             raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
 
     def _dump(self, obj, namespace):
@@ -395,9 +362,8 @@ class JSInterpreter(object):
         if expr[0] in _QUOTES:
             inner, outer = self._separate(expr, expr[0], 1)
             if expr[0] == '/':
-                flags, _ = self._regex_flags(outer)
-                inner, outer = inner.replace('"', r'\"'), ''
-                inner = re.compile(js_to_json(inner + expr[0]), flags=flags)  # , strict=True))
+                flags, outer = self._regex_flags(outer)
+                inner = re.compile(inner[1:], flags=flags)  # , strict=True))
             else:
                 inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
             if not outer:
@@ -422,7 +388,7 @@ class JSInterpreter(object):
 
         if expr.startswith('{'):
             inner, outer = self._separate_at_paren(expr, '}')
-            # try for object expression
+            # try for object expression (Map)
             sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
             if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
                 return dict(
@@ -455,7 +421,8 @@ class JSInterpreter(object):
             (?P<try>try|finally)\s*|
             (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
             (?P<switch>switch)\s*\(|
-            (?P<for>for)\s*\(|'''.format(**globals()), expr)
+            (?P<for>for)\s*\(|
+            '''.format(**globals()), expr)
         md = m.groupdict() if m else {}
         if md.get('try'):
             if expr[m.end()] == '{':
@@ -500,7 +467,7 @@ class JSInterpreter(object):
             start, cndn, increment = self._separate(constructor, ';')
             self.interpret_expression(start, local_vars, allow_recursion)
             while True:
-                if not _ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
+                if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
                     break
                 try:
                     ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
@@ -587,7 +554,7 @@ class JSInterpreter(object):
                 local_vars[m.group('out')] = self._operator(
                     m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
                 return local_vars[m.group('out')], should_return
-            elif left_val in (None, self.undefined):
+            elif left_val in (None, JS_Undefined):
                 raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
 
             idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
@@ -607,7 +574,7 @@ class JSInterpreter(object):
             raise JS_Continue()
 
         elif expr == 'undefined':
-            return self.undefined, should_return
+            return JS_Undefined, should_return
 
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
@@ -663,9 +630,9 @@ class JSInterpreter(object):
                     'Math': float,
                 }
                 obj = local_vars.get(variable)
-                if obj in (self.undefined, None):
-                    obj = types.get(variable, self.undefined)
-                if obj is self.undefined:
+                if obj in (JS_Undefined, None):
+                    obj = types.get(variable, JS_Undefined)
+                if obj is JS_Undefined:
                     try:
                         if variable not in self._objects:
                             self._objects[variable] = self.extract_object(variable)
@@ -674,8 +641,8 @@ class JSInterpreter(object):
                         if not nullish:
                             raise
 
-                if nullish and obj is self.undefined:
-                    return self.undefined
+                if nullish and obj is JS_Undefined:
+                    return JS_Undefined
 
                 # Member access
                 if arg_str is None:

From fd3f3bebd0699f4b782a24a503093c965c4f4f5e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 Aug 2022 19:11:08 +0100
Subject: [PATCH 19/78] [uktvplay] Support domain without .uktv

---
 youtube_dl/extractor/uktvplay.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/uktvplay.py b/youtube_dl/extractor/uktvplay.py
index f28fd514d..9ef9638cd 100644
--- a/youtube_dl/extractor/uktvplay.py
+++ b/youtube_dl/extractor/uktvplay.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class UKTVPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
+    _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
         'info_dict': {

From a8d5316aaf3dc740aad486b8c394b2f3e70f5a58 Mon Sep 17 00:00:00 2001
From: gudata <gudata@users.noreply.github.com>
Date: Fri, 19 Aug 2022 23:00:21 +0300
Subject: [PATCH 20/78] [infoq] Avoid crash if the page has no `mp3Form`

* proposed fix for issue #31131, aligns with yt-dlp

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/infoq.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index 0a70a1fb4..60b02b699 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -1,6 +1,9 @@
 # coding: utf-8
 
 from __future__ import unicode_literals
+from ..utils import (
+    ExtractorError,
+)
 
 from ..compat import (
     compat_b64decode,
@@ -90,7 +93,11 @@ class InfoQIE(BokeCCBaseIE):
         }]
 
     def _extract_http_audio(self, webpage, video_id):
-        fields = self._form_hidden_inputs('mp3Form', webpage)
+        try:
+            fields = self._form_hidden_inputs('mp3Form', webpage)
+        except ExtractorError:
+            fields = {}
+
         http_audio_url = fields.get('filename')
         if not http_audio_url:
             return []

From 556862bc911bb54435b7b0b01451789b884b0390 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 21 Aug 2022 00:19:19 +0100
Subject: [PATCH 21/78] [utils] Ensure RFC3986 encoding result is unicode

---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a5f584ec5..fea38ed32 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3970,7 +3970,8 @@ def escape_rfc3986(s):
     """Escape non-ASCII characters as suggested by RFC 3986"""
     if sys.version_info < (3, 0) and isinstance(s, compat_str):
         s = s.encode('utf-8')
-    return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+    # ensure unicode: after quoting, it can always be converted
+    return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
 
 
 def escape_url(url):

From 66e58dccc29de65cc95ee97915987d785b2b4b31 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 21 Aug 2022 00:21:02 +0100
Subject: [PATCH 22/78] [core] Avoid processing empty format list after
 removing bad formats * also ensure compat encoding of error strings

---
 youtube_dl/YoutubeDL.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index e77b8d50c..8e8546596 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -721,7 +721,7 @@ class YoutubeDL(object):
                 filename = encodeFilename(filename, True).decode(preferredencoding())
             return sanitize_path(filename)
         except ValueError as err:
-            self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
+            self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
             return None
 
     def _match_entry(self, info_dict, incomplete):
@@ -1570,9 +1570,6 @@ class YoutubeDL(object):
         else:
             formats = info_dict['formats']
 
-        if not formats:
-            raise ExtractorError('No video formats found!')
-
         def is_wellformed(f):
             url = f.get('url')
             if not url:
@@ -1585,7 +1582,10 @@ class YoutubeDL(object):
             return True
 
         # Filter out malformed formats for better extraction robustness
-        formats = list(filter(is_wellformed, formats))
+        formats = list(filter(is_wellformed, formats or []))
+
+        if not formats:
+            raise ExtractorError('No video formats found!')
 
         formats_dict = {}
 
@@ -2058,7 +2058,7 @@ class YoutubeDL(object):
                 try:
                     self.post_process(filename, info_dict)
                 except (PostProcessingError) as err:
-                    self.report_error('postprocessing: %s' % str(err))
+                    self.report_error('postprocessing: %s' % error_to_compat_str(err))
                     return
                 self.record_download_archive(info_dict)
                 # avoid possible nugatory search for further items (PR #26638)

From 573b13410e5c2f939676116e2700ec8efd9cf97b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 25 Aug 2022 12:14:59 +0100
Subject: [PATCH 23/78] [YouTube] Improve error check for n-sig processing

---
 youtube_dl/extractor/youtube.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 91a3b6058..3d12e2e4a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1500,7 +1500,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         return lambda s: jsi.extract_function_from_code(*func_code)([s])
 
     def _n_descramble(self, n_param, player_url, video_id):
-        """Compute the response to YT's "n" parameter challenge
+        """Compute the response to YT's "n" parameter challenge,
+           or None
 
         Args:
         n_param     -- challenge string that is the value of the
@@ -1518,7 +1519,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if player_id not in self._player_cache:
                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
             func = self._player_cache[player_id]
-            self._player_cache[sig_id] = func(n_param)
+            ret = func(n_param)
+            if ret.startswith('enhanced_except_'):
+                raise ExtractorError('Unhandled exception in decode')
+            self._player_cache[sig_id] = ret
             if self._downloader.params.get('verbose', False):
                 self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
             return self._player_cache[sig_id]
@@ -1539,10 +1543,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 continue
             n_param = n_param[-1]
             n_response = self._n_descramble(n_param, player_url, video_id)
-            if n_response:
-                qs['n'] = [n_response]
-                fmt['url'] = compat_urlparse.urlunparse(
-                    parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+            if n_response is None:
+                # give up if descrambling failed
+                break
+            qs['n'] = [n_response]
+            fmt['url'] = compat_urlparse.urlunparse(
+                parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
     def _mark_watched(self, video_id, player_response):
         playback_url = url_or_none(try_get(

From d619dd712f63aab1964f8fdde9ceea514a5e581d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 25 Aug 2022 12:16:10 +0100
Subject: [PATCH 24/78] [jsinterp] Fix bug in operator precedence * from
 https://github.com/yt-dlp/yt-dlp/commit/164b03c4864b0d44cfee5e7702f7c2317164a6cf
 * added tests

---
 test/test_jsinterp.py          | 25 +++++++++++++++++++++++++
 test/test_youtube_signature.py |  4 ++++
 youtube_dl/jsinterp.py         |  7 ++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 96786a84c..0a97bdbc4 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -192,6 +192,31 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 10)
 
+    def test_catch(self):
+        jsi = JSInterpreter('''
+        function x() { try{throw 10} catch(e){return 5} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+
+    @unittest.expectedFailure
+    def test_finally(self):
+        jsi = JSInterpreter('''
+        function x() { try{throw 10} finally {return 42} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 42)
+        jsi = JSInterpreter('''
+        function x() { try{throw 10} catch(e){return 5} finally {return 42} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 42)
+
+    def test_nested_try(self):
+        jsi = JSInterpreter('''
+        function x() {try {
+            try{throw 10} finally {throw 42} 
+            } catch(e){return 5} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+
     def test_for_loop_continue(self):
         jsi = JSInterpreter('''
         function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 327d4c40d..4bb0a30b0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -111,6 +111,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
         'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
     ),
+    (
+        'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
+        '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 6719d0dfd..a8456ec1c 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -5,6 +5,7 @@ import json
 import math
 import operator
 import re
+from collections import Counter
 
 from .utils import (
     error_to_compat_str,
@@ -108,8 +109,8 @@ _OPERATORS = (
 
 _COMP_OPERATORS = (
     ('===', operator.is_),
-    ('==', _js_eq_op(operator.eq)),
     ('!==', operator.is_not),
+    ('==', _js_eq_op(operator.eq)),
     ('!=', _js_eq_op(operator.ne)),
     ('<=', _js_comp_op(operator.le)),
     ('>=', _js_comp_op(operator.ge)),
@@ -241,7 +242,9 @@ class JSInterpreter(object):
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
+        # collections.Counter() is ~10% slower
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
+        # counters = Counter()
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
         after_op, in_regex_char_group, skip_re = True, False, 0
@@ -442,6 +445,7 @@ class JSInterpreter(object):
             return ret, should_abort or should_return
 
         elif md.get('catch'):
+
             catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
             if self._EXC_NAME in local_vars:
                 catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
@@ -450,6 +454,7 @@ class JSInterpreter(object):
                     return ret, True
 
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+
             return ret, should_abort or should_return
 
         elif md.get('for'):

From 4c6fba37650d60acbd32a9f2d6e2468a730d0f1c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 Aug 2022 08:17:54 +0100
Subject: [PATCH 25/78] [jsinterp] Improve try/catch/finally support

---
 test/test_jsinterp.py  | 14 ++++++-
 youtube_dl/jsinterp.py | 92 +++++++++++++++++++++++-------------------
 2 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 0a97bdbc4..fb4882d00 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -74,6 +74,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return 0 ?? 42;}')
         self.assertEqual(jsi.call_function('f'), 0)
 
+        jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
+        self.assertFalse(jsi.call_function('f'))
+
     def test_array_access(self):
         jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
         self.assertEqual(jsi.call_function('f'), [5, 2, 7])
@@ -198,7 +201,6 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 5)
 
-    @unittest.expectedFailure
     def test_finally(self):
         jsi = JSInterpreter('''
         function x() { try{throw 10} finally {return 42} }
@@ -212,7 +214,7 @@ class TestJSInterpreter(unittest.TestCase):
     def test_nested_try(self):
         jsi = JSInterpreter('''
         function x() {try {
-            try{throw 10} finally {throw 42} 
+            try{throw 10} finally {throw 42}
             } catch(e){return 5} }
         ''')
         self.assertEqual(jsi.call_function('x'), 5)
@@ -229,6 +231,14 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 0)
 
+    def test_for_loop_try(self):
+        jsi = JSInterpreter('''
+        function x() {
+            for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
+            return 42 }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 42)
+
     def test_literal_list(self):
         jsi = JSInterpreter('''
         function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a8456ec1c..08726e478 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -5,7 +5,6 @@ import json
 import math
 import operator
 import re
-from collections import Counter
 
 from .utils import (
     error_to_compat_str,
@@ -15,6 +14,7 @@ from .utils import (
     unified_timestamp,
 )
 from .compat import (
+    compat_basestring,
     compat_collections_chain_map as ChainMap,
     compat_itertools_zip_longest as zip_longest,
     compat_str,
@@ -76,6 +76,10 @@ def _js_comp_op(op):
     def wrapped(a, b):
         if JS_Undefined in (a, b):
             return False
+        if isinstance(a, compat_basestring):
+            b = compat_str(b or 0)
+        elif isinstance(b, compat_basestring):
+            a = compat_str(a or 0)
         return op(a or 0, b or 0)
 
     return wrapped
@@ -195,7 +199,6 @@ class JSInterpreter(object):
         'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
     }
 
-    _EXC_NAME = '__youtube_dl_exception__'
     _OBJ_NAME = '__youtube_dl_jsinterp_obj'
 
     OP_CHARS = None
@@ -242,9 +245,8 @@ class JSInterpreter(object):
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
-        # collections.Counter() is ~10% slower
+        # collections.Counter() is ~10% slower in both 2.7 and 3.9
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
-        # counters = Counter()
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
         after_op, in_regex_char_group, skip_re = True, False, 0
@@ -291,7 +293,9 @@ class JSInterpreter(object):
         yield expr[start:]
 
     @classmethod
-    def _separate_at_paren(cls, expr, delim):
+    def _separate_at_paren(cls, expr, delim=None):
+        if delim is None:
+            delim = expr and _MATCHING_PARENS[expr[0]]
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
@@ -376,7 +380,7 @@ class JSInterpreter(object):
         if expr.startswith('new '):
             obj = expr[4:]
             if obj.startswith('Date('):
-                left, right = self._separate_at_paren(obj[4:], ')')
+                left, right = self._separate_at_paren(obj[4:])
                 expr = unified_timestamp(
                     self.interpret_expression(left, local_vars, allow_recursion), False)
                 if not expr:
@@ -390,7 +394,7 @@ class JSInterpreter(object):
             return None, should_return
 
         if expr.startswith('{'):
-            inner, outer = self._separate_at_paren(expr, '}')
+            inner, outer = self._separate_at_paren(expr)
             # try for object expression (Map)
             sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
             if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
@@ -406,7 +410,7 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('('):
-            inner, outer = self._separate_at_paren(expr, ')')
+            inner, outer = self._separate_at_paren(expr)
             inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
                 return inner, should_abort or should_return
@@ -414,57 +418,63 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('['):
-            inner, outer = self._separate_at_paren(expr, ']')
+            inner, outer = self._separate_at_paren(expr)
             name = self._named_object(local_vars, [
                 self.interpret_expression(item, local_vars, allow_recursion)
                 for item in self._separate(inner)])
             expr = name + outer
 
         m = re.match(r'''(?x)
-            (?P<try>try|finally)\s*|
-            (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
-            (?P<switch>switch)\s*\(|
-            (?P<for>for)\s*\(|
-            '''.format(**globals()), expr)
+                (?P<try>try)\s*\{|
+                (?P<switch>switch)\s*\(|
+                (?P<for>for)\s*\(
+                ''', expr)
         md = m.groupdict() if m else {}
         if md.get('try'):
-            if expr[m.end()] == '{':
-                try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
-            else:
-                try_expr, expr = expr[m.end() - 1:], ''
+            try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+            err = None
             try:
                 ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
                 if should_abort:
                     return ret, True
-            except JS_Throw as e:
-                local_vars[self._EXC_NAME] = e.error
             except Exception as e:
                 # XXX: This works for now, but makes debugging future issues very hard
-                local_vars[self._EXC_NAME] = e
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
-            return ret, should_abort or should_return
-
-        elif md.get('catch'):
-
-            catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
-            if self._EXC_NAME in local_vars:
-                catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
-                ret, should_abort = self.interpret_statement(catch_expr, catch_vars, allow_recursion)
+                err = e
+
+            pending = (None, False)
+            m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
+            if m:
+                sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                if err:
+                    catch_vars = {}
+                    if m.group('err'):
+                        catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
+                    catch_vars = local_vars.new_child(m=catch_vars)
+                    err = None
+                    pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
+
+            m = re.match(r'finally\s*\{', expr)
+            if m:
+                sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
                 if should_abort:
                     return ret, True
 
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            ret, should_abort = pending
+            if should_abort:
+                return ret, True
 
-            return ret, should_abort or should_return
+            if err:
+                raise err
 
         elif md.get('for'):
-            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
             if remaining.startswith('{'):
-                body, expr = self._separate_at_paren(remaining, '}')
+                body, expr = self._separate_at_paren(remaining)
             else:
                 switch_m = re.match(r'switch\s*\(', remaining)  # FIXME
                 if switch_m:
-                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
+                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
                     body, expr = self._separate_at_paren(remaining, '}')
                     body = 'switch(%s){%s}' % (switch_val, body)
                 else:
@@ -483,11 +493,9 @@ class JSInterpreter(object):
                 except JS_Continue:
                     pass
                 self.interpret_expression(increment, local_vars, allow_recursion)
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
-            return ret, should_abort or should_return
 
         elif md.get('switch'):
-            switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+            switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
             switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
             body, expr = self._separate_at_paren(remaining, '}')
             items = body.replace('default:', 'case default:').split('case ')[1:]
@@ -510,6 +518,8 @@ class JSInterpreter(object):
                         break
                 if matched:
                     break
+
+        if md:
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
             return ret, should_abort or should_return
 
@@ -618,7 +628,7 @@ class JSInterpreter(object):
                 member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
             arg_str = expr[m.end():]
             if arg_str.startswith('('):
-                arg_str, remaining = self._separate_at_paren(arg_str, ')')
+                arg_str, remaining = self._separate_at_paren(arg_str)
             else:
                 arg_str, remaining = None, arg_str
 
@@ -795,7 +805,7 @@ class JSInterpreter(object):
                 \((?P<args>[^)]*)\)\s*
                 (?P<code>{.+})''' % {'name': re.escape(funcname)},
             self.code)
-        code, _ = self._separate_at_paren(func_m.group('code'), '}')  # refine the match
+        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
         if func_m is None:
             raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
         return self.build_arglist(func_m.group('args')), code
@@ -810,7 +820,7 @@ class JSInterpreter(object):
             if mobj is None:
                 break
             start, body_start = mobj.span()
-            body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+            body, remaining = self._separate_at_paren(code[body_start - 1:])
             name = self._named_object(
                 local_vars,
                 self.extract_function_from_code(

From 0f6422590e44e99e9b81cf2367666efe89fae3aa Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 Aug 2022 10:17:56 +0100
Subject: [PATCH 26/78] [compat] Replace deficient ChainMap class in Py3.3 and
 earlier

---
 youtube_dl/compat.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 3002109ca..366a93924 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3004,8 +3004,11 @@ except ImportError:
 # new class in collections
 try:
     from collections import ChainMap as compat_collections_chain_map
+    # Py3.3's ChainMap is deficient
+    if sys.version_info <= (3, 3):
+        raise ImportError
 except ImportError:
-    # Py < 3.3
+    # Py <= 3.3
     class compat_collections_chain_map(compat_collections_abc.MutableMapping):
 
         maps = [{}]
@@ -3060,6 +3063,7 @@ except ImportError:
         def parents(self):
             return compat_collections_chain_map(*(self.maps[1:]))
 
+
 # Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
 compat_re_Pattern = type(re.compile(''))
 

From ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 Aug 2022 12:22:01 +0100
Subject: [PATCH 27/78] [compat] Replace deficient ChainMap class in Py3.3 and
 earlier * fix version check

---
 youtube_dl/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 366a93924..eca6d63de 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3005,7 +3005,7 @@ except ImportError:
 try:
     from collections import ChainMap as compat_collections_chain_map
     # Py3.3's ChainMap is deficient
-    if sys.version_info <= (3, 3):
+    if sys.version_info < (3, 4):
         raise ImportError
 except ImportError:
     # Py <= 3.3

From 4050e10a4c3445c5399239567eb074acb2f65c18 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 29 Aug 2022 13:02:17 +0100
Subject: [PATCH 28/78] [options] Document that postprocessing is not forced by
 --postprocessor-args

Resolves #30307
---
 youtube_dl/options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index f6621ef91..f6d2b0898 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -801,7 +801,7 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '--postprocessor-args',
         dest='postprocessor_args', metavar='ARGS',
-        help='Give these arguments to the postprocessor')
+        help='Give these arguments to the postprocessor (if postprocessing is required)')
     postproc.add_option(
         '-k', '--keep-video',
         action='store_true', dest='keepvideo', default=False,

From 55c823634db890a328ffc23588fcd6f35d9b3ddf Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 31 Aug 2022 23:22:48 +0100
Subject: [PATCH 29/78] [jsinterp] Handle new YT players 113ca41c, c57c113c *
 add NaN * allow any white-space character for `after_op` * align with yt-dlp
 f26af78a8ac11d9d617ed31ea5282cfaa5bcbcfa (charcodeAt and bitwise overflow) *
 allow escaping in regex, fixing player c57c113c

---
 test/test_jsinterp.py          | 21 ++++++++++++++++
 test/test_youtube_signature.py | 16 ++++++++++++
 youtube_dl/jsinterp.py         | 46 +++++++++++++++++++++-------------
 3 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index fb4882d00..5121c8cf8 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -135,6 +135,11 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
 
     def test_builtins(self):
+        jsi = JSInterpreter('''
+        function x() { return NaN }
+        ''')
+        self.assertTrue(math.isnan(jsi.call_function('x')))
+
         jsi = JSInterpreter('''
         function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
         ''')
@@ -385,6 +390,22 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
+    def test_char_code_at(self):
+        jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
+        self.assertEqual(jsi.call_function('x', 0), 116)
+        self.assertEqual(jsi.call_function('x', 1), 101)
+        self.assertEqual(jsi.call_function('x', 2), 115)
+        self.assertEqual(jsi.call_function('x', 3), 116)
+        self.assertEqual(jsi.call_function('x', 4), None)
+        self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
+
+    def test_bitwise_operators_overflow(self):
+        jsi = JSInterpreter('function x(){return -524999584 << 5}')
+        self.assertEqual(jsi.call_function('x'), 379882496)
+
+        jsi = JSInterpreter('function x(){return 1236566549 << 5}')
+        self.assertEqual(jsi.call_function('x'), 915423904)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4bb0a30b0..ec914a871 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -111,10 +111,26 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
         'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
     ),
+    (
+        'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
+        '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
+    ),
     (
         'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
         '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
     ),
+    (
+        'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js',
+        '5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7',
+    ),
+    (
+        'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
+        'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
+    ),
+    (
+        'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
+        '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 08726e478..d13329396 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -23,10 +23,11 @@ from .compat import (
 
 def _js_bit_op(op):
 
+    def zeroise(x):
+        return 0 if x in (None, JS_Undefined) else x
+
     def wrapped(a, b):
-        def zeroise(x):
-            return 0 if x in (None, JS_Undefined) else x
-        return op(zeroise(a), zeroise(b))
+        return op(zeroise(a), zeroise(b)) & 0xffffffff
 
     return wrapped
 
@@ -44,7 +45,7 @@ def _js_arith_op(op):
 def _js_div(a, b):
     if JS_Undefined in (a, b) or not (a and b):
         return float('nan')
-    return float('inf') if not b else operator.truediv(a or 0, b)
+    return operator.truediv(a or 0, b) if b else float('inf')
 
 
 def _js_mod(a, b):
@@ -260,13 +261,14 @@ class JSInterpreter(object):
                     counters[_MATCHING_PARENS[char]] += 1
                 elif char in counters:
                     counters[char] -= 1
-            if not escaping and char in _QUOTES and in_quote in (char, None):
-                if in_quote or after_op or char != '/':
-                    in_quote = None if in_quote and not in_regex_char_group else char
-            elif in_quote == '/' and char in '[]':
-                in_regex_char_group = char == '['
+            if not escaping:
+                if char in _QUOTES and in_quote in (char, None):
+                    if in_quote or after_op or char != '/':
+                        in_quote = None if in_quote and not in_regex_char_group else char
+                elif in_quote == '/' and char in '[]':
+                    in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and char in cls.OP_CHARS or (char == ' ' and after_op)
+            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -590,6 +592,8 @@ class JSInterpreter(object):
 
         elif expr == 'undefined':
             return JS_Undefined, should_return
+        elif expr == 'NaN':
+            return float('NaN'), should_return
 
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
@@ -635,7 +639,8 @@ class JSInterpreter(object):
             def assertion(cndn, msg):
                 """ assert, but without risk of getting optimized out """
                 if not cndn:
-                    raise ExtractorError('{member} {msg}'.format(**locals()), expr=expr)
+                    memb = member
+                    raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)
 
             def eval_method():
                 if (variable, member) == ('console', 'debug'):
@@ -737,6 +742,13 @@ class JSInterpreter(object):
                         return obj.index(idx, start)
                     except ValueError:
                         return -1
+                elif member == 'charCodeAt':
+                    assertion(isinstance(obj, compat_str), 'must be applied on a string')
+                    # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
+                    idx = argvals[0] if isinstance(argvals[0], int) else 0
+                    if idx >= len(obj):
+                        return None
+                    return ord(obj[idx])
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)
@@ -820,12 +832,10 @@ class JSInterpreter(object):
             if mobj is None:
                 break
             start, body_start = mobj.span()
-            body, remaining = self._separate_at_paren(code[body_start - 1:])
-            name = self._named_object(
-                local_vars,
-                self.extract_function_from_code(
-                    self.build_arglist(mobj.group('args')),
-                    body, local_vars, *global_stack))
+            body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+            name = self._named_object(local_vars, self.extract_function_from_code(
+                [x.strip() for x in mobj.group('args').split(',')],
+                body, local_vars, *global_stack))
             code = code[:start] + name + remaining
         return self.build_function(argnames, code, local_vars, *global_stack)
 
@@ -854,7 +864,7 @@ class JSInterpreter(object):
                 zip_longest(argnames, args, fillvalue=None))
             global_stack[0].update(kwargs)
             var_stack = LocalNameSpace(*global_stack)
-            ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1)
+            ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
             if should_abort:
                 return ret
         return resf

From 218c423bc042674a8834ffc09520a94fbbe7b138 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 1 Sep 2022 13:28:30 +0100
Subject: [PATCH 30/78] [cache] Add cache validation by program version, based
 on yt-dlp

---
 test/test_cache.py  | 16 ++++++++++++++--
 youtube_dl/cache.py | 28 +++++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/test/test_cache.py b/test/test_cache.py
index a16160142..931074aa1 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -3,17 +3,18 @@
 
 from __future__ import unicode_literals
 
-import shutil
-
 # Allow direct execution
 import os
 import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import shutil
 
 from test.helper import FakeYDL
 from youtube_dl.cache import Cache
+from youtube_dl.utils import version_tuple
+from youtube_dl.version import __version__
 
 
 def _is_empty(d):
@@ -54,6 +55,17 @@ class TestCache(unittest.TestCase):
         self.assertFalse(os.path.exists(self.test_dir))
         self.assertEqual(c.load('test_cache', 'k.'), None)
 
+    def test_cache_validation(self):
+        ydl = FakeYDL({
+            'cachedir': self.test_dir,
+        })
+        c = Cache(ydl)
+        obj = {'x': 1, 'y': ['ä', '\\a', True]}
+        c.store('test_cache', 'k.', obj)
+        self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
+        new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
+        self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
index 7bdade1bd..4822439d0 100644
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -10,12 +10,21 @@ import traceback
 
 from .compat import compat_getenv
 from .utils import (
+    error_to_compat_str,
     expand_path,
+    is_outdated_version,
+    try_get,
     write_json_file,
 )
+from .version import __version__
 
 
 class Cache(object):
+
+    _YTDL_DIR = 'youtube-dl'
+    _VERSION_KEY = _YTDL_DIR + '_version'
+    _DEFAULT_VERSION = '2021.12.17'
+
     def __init__(self, ydl):
         self._ydl = ydl
 
@@ -23,7 +32,7 @@ class Cache(object):
         res = self._ydl.params.get('cachedir')
         if res is None:
             cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
-            res = os.path.join(cache_root, 'youtube-dl')
+            res = os.path.join(cache_root, self._YTDL_DIR)
         return expand_path(res)
 
     def _get_cache_fn(self, section, key, dtype):
@@ -50,13 +59,22 @@ class Cache(object):
             except OSError as ose:
                 if ose.errno != errno.EEXIST:
                     raise
-            write_json_file(data, fn)
+            write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
         except Exception:
             tb = traceback.format_exc()
             self._ydl.report_warning(
                 'Writing cache to %r failed: %s' % (fn, tb))
 
-    def load(self, section, key, dtype='json', default=None):
+    def _validate(self, data, min_ver):
+        version = try_get(data, lambda x: x[self._VERSION_KEY])
+        if not version:  # Backward compatibility
+            data, version = {'data': data}, self._DEFAULT_VERSION
+        if not is_outdated_version(version, min_ver or '0', assume_new=False):
+            return data['data']
+        self._ydl.to_screen(
+            'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
+
+    def load(self, section, key, dtype='json', default=None, min_ver=None):
         assert dtype in ('json',)
 
         if not self.enabled:
@@ -66,12 +84,12 @@ class Cache(object):
         try:
             try:
                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
-                    return json.load(cachef)
+                    return self._validate(json.load(cachef), min_ver)
             except ValueError:
                 try:
                     file_size = os.path.getsize(cache_fn)
                 except (OSError, IOError) as oe:
-                    file_size = str(oe)
+                    file_size = error_to_compat_str(oe)
                 self._ydl.report_warning(
                     'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
         except IOError:

From 7009bb9f3182449ae8cc05cc28b768b63030a485 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 2 Sep 2022 20:41:39 +0530
Subject: [PATCH 31/78] [jsinterp] Workaround operator associativity issue *
 temporary fix for player 5a3b6271 [1]

1. https://github.com/yt-dlp/yt-dlp/issues/4635#issuecomment-1235384480
---
 test/test_youtube_signature.py | 4 ++++
 youtube_dl/jsinterp.py         | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index ec914a871..4e678cae0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -131,6 +131,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
         '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
     ),
+    (
+        'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
+        'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index d13329396..99dd98435 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -107,8 +107,8 @@ _OPERATORS = (
     ('+', _js_arith_op(operator.add)),
     ('-', _js_arith_op(operator.sub)),
     ('*', _js_arith_op(operator.mul)),
-    ('/', _js_div),
     ('%', _js_mod),
+    ('/', _js_div),
     ('**', _js_exp),
 )
 

From 9493ffdb8b690732e995422621bad3ed6c9041f5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 4 Oct 2022 00:42:15 +0100
Subject: [PATCH 32/78] [test] Use windows-2019 for tests (At least for now)
 resolves #31249

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 90bd63c32..a609f3704 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,12 +15,12 @@ jobs:
         run-tests-ext: [sh]
         include:
         # python 3.2 is only available on windows via setup-python
-        - os: windows-latest
+        - os: windows-2019
           python-version: 3.2
           python-impl: cpython
           ytdl-test-set: core
           run-tests-ext: bat
-        - os: windows-latest
+        - os: windows-2019
           python-version: 3.2
           python-impl: cpython
           ytdl-test-set: download

From d35557a75d943865e40410d51bfcc18276e98532 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Fri, 23 Sep 2022 12:10:35 +1200
Subject: [PATCH 33/78] [Telegraaf] Use mobile GraphQL API endpoint

Workaround for Cloudflare 403
Fixes https://github.com/yt-dlp/yt-dlp/issues/5000
Authored by: coletdjnz
---
 youtube_dl/extractor/telegraaf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py
index 2dc020537..5174898f2 100644
--- a/youtube_dl/extractor/telegraaf.py
+++ b/youtube_dl/extractor/telegraaf.py
@@ -34,7 +34,9 @@ class TelegraafIE(InfoExtractor):
         article_id = self._match_id(url)
 
         video_id = self._download_json(
-            'https://www.telegraaf.nl/graphql', article_id, query={
+            'https://app.telegraaf.nl/graphql', article_id,
+            headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
+            query={
                 'query': '''{
   article(uid: %s) {
     videos {

From 22127b271c8f3e9266840bc5a2fb994d6248e369 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 10 Oct 2022 17:41:40 +0000
Subject: [PATCH 34/78] [NRK] Remove explicit Accept-Encoding header that
 invites Brotli

Fixes #31285
---
 youtube_dl/extractor/nrk.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 6d01a25c3..5a62b50fc 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -60,8 +60,7 @@ class NRKBaseIE(InfoExtractor):
         return self._download_json(
             urljoin('https://psapi.nrk.no/', path),
             video_id, note or 'Downloading %s JSON' % item,
-            fatal=fatal, query=query,
-            headers={'Accept-Encoding': 'gzip, deflate, br'})
+            fatal=fatal, query=query)
 
 
 class NRKIE(NRKBaseIE):

From 1b1442887e67b63545453e10816904e2b4c561c1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 10 Oct 2022 19:26:32 +0100
Subject: [PATCH 35/78] [manyvids] Improve extraction (#31172)

* extract all formats from page
* extract description, uploader, views, likes
* downrate previews
* fix tests
* use txt_or_none()
---
 youtube_dl/extractor/manyvids.py | 113 +++++++++++++++++++++++++------
 1 file changed, 91 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py
index e8d7163e4..6805102ba 100644
--- a/youtube_dl/extractor/manyvids.py
+++ b/youtube_dl/extractor/manyvids.py
@@ -1,11 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     determine_ext,
+    extract_attributes,
     int_or_none,
     str_to_int,
+    url_or_none,
     urlencode_postdata,
 )
 
@@ -20,17 +25,20 @@ class ManyVidsIE(InfoExtractor):
             'id': '133957',
             'ext': 'mp4',
             'title': 'everthing about me (Preview)',
+            'uploader': 'ellyxxix',
             'view_count': int,
             'like_count': int,
         },
     }, {
         # full video
         'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
-        'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
+        'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
         'info_dict': {
             'id': '935718',
             'ext': 'mp4',
             'title': 'MY FACE REVEAL',
+            'description': 'md5:ec5901d41808b3746fed90face161612',
+            'uploader': 'Sarah Calanthe',
             'view_count': int,
             'like_count': int,
         },
@@ -41,15 +49,43 @@ class ManyVidsIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        video_url = self._search_regex(
-            r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
-            webpage, 'video URL', group='url')
+        info = self._search_regex(
+            r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
+            webpage, 'meta details', default='')
+        info = extract_attributes(info)
+
+        player = self._search_regex(
+            r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
+            webpage, 'player details', default='')
+        player = extract_attributes(player)
+
+        video_urls_and_ids = (
+            (info.get('data-meta-video'), 'video'),
+            (player.get('data-video-transcoded'), 'transcoded'),
+            (player.get('data-video-filepath'), 'filepath'),
+            (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
+        )
+
+        def txt_or_none(s, default=None):
+            return (s.strip() or default) if isinstance(s, compat_str) else default
+
+        uploader = txt_or_none(info.get('data-meta-author'))
+
+        def mung_title(s):
+            if uploader:
+                s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
+            return txt_or_none(s)
 
-        title = self._html_search_regex(
-            (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
-             r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
-            webpage, 'title', default=None) or self._html_search_meta(
-            'twitter:title', webpage, 'title', fatal=True)
+        title = (
+            mung_title(info.get('data-meta-title'))
+            or self._html_search_regex(
+                (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
+                 r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
+                webpage, 'title', default=None)
+            or self._html_search_meta(
+                'twitter:title', webpage, 'title', fatal=True))
+
+        title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
 
         if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
             title += ' (Preview)'
@@ -70,23 +106,56 @@ class ManyVidsIE(InfoExtractor):
                     'X-Requested-With': 'XMLHttpRequest'
                 })
 
-        if determine_ext(video_url) == 'm3u8':
-            formats = self._extract_m3u8_formats(
-                video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                m3u8_id='hls')
-        else:
-            formats = [{'url': video_url}]
+        formats = []
+        for v_url, fmt in video_urls_and_ids:
+            v_url = url_or_none(v_url)
+            if not v_url:
+                continue
+            if determine_ext(v_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    v_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls'))
+            else:
+                formats.append({
+                    'url': v_url,
+                    'format_id': fmt,
+                })
+
+        self._remove_duplicate_formats(formats)
+
+        for f in formats:
+            if f.get('height') is None:
+                f['height'] = int_or_none(
+                    self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
+            if '/preview/' in f['url']:
+                f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
+                f['preference'] = -10
+            if 'transcoded' in f['format_id']:
+                f['preference'] = f.get('preference', -1) - 1
+
+        self._sort_formats(formats)
+
+        def get_likes():
+            likes = self._search_regex(
+                r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
+                webpage, 'likes', default='')
+            likes = extract_attributes(likes)
+            return int_or_none(likes.get('data-likes'))
 
-        like_count = int_or_none(self._search_regex(
-            r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
-        view_count = str_to_int(self._html_search_regex(
-            r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
-            'view count', default=None))
+        def get_views():
+            return str_to_int(self._html_search_regex(
+                r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
+                webpage, 'view count', default=None))
 
         return {
             'id': video_id,
             'title': title,
-            'view_count': view_count,
-            'like_count': like_count,
             'formats': formats,
+            'description': txt_or_none(info.get('data-meta-description')),
+            'uploader': txt_or_none(info.get('data-meta-author')),
+            'thumbnail': (
+                url_or_none(info.get('data-meta-image'))
+                or url_or_none(player.get('data-video-screenshot'))),
+            'view_count': get_views(),
+            'like_count': get_likes(),
         }

From 82e4eca711a128138ed0b84ddb4321e403d56340 Mon Sep 17 00:00:00 2001
From: Xiyue <113869642+xiyue077@users.noreply.github.com>
Date: Tue, 11 Oct 2022 09:52:48 +1100
Subject: [PATCH 36/78] [motherless] Fixed the broken uploader_id in the
 extractor (#31243)

* Fixed the broken uploader_id in the extractor.
* Make uploader_id RE looser
* Fix uploader_id in test Motherless_3
* Fix group pagination
* # coding: utf-8

Co-authored-by: Andy Xuming <xuminic@gmail.com>
Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/motherless.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index ef1e081f2..35d2b46ed 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import datetime
@@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor):
             'title': 'a/ Hot Teens',
             'categories': list,
             'upload_date': '20210104',
-            'uploader_id': 'yonbiw',
+            'uploader_id': 'anonymous',
             'thumbnail': r're:https?://.*\.jpg',
             'age_limit': 18,
         },
@@ -127,7 +128,7 @@ class MotherlessIE(InfoExtractor):
 
         comment_count = webpage.count('class="media-comment-contents"')
         uploader_id = self._html_search_regex(
-            r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
+            r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)''',
             webpage, 'uploader_id')
 
         categories = self._html_search_meta('keywords', webpage, default=None)
@@ -169,7 +170,7 @@ class MotherlessGroupIE(InfoExtractor):
             'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
                            'any kind!'
         },
-        'playlist_mincount': 9,
+        'playlist_mincount': 0,
     }]
 
     @classmethod
@@ -208,9 +209,9 @@ class MotherlessGroupIE(InfoExtractor):
             r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
         description = self._html_search_meta(
             'description', webpage, fatal=False)
-        page_count = self._int(self._search_regex(
-            r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
-            webpage, 'page_count'), 'page_count')
+        page_count = str_to_int(self._search_regex(
+            r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
+            webpage, 'page_count', default='1'))
         PAGE_SIZE = 80
 
         def _get_page(idx):

From 2ced5a79128f53faad94dc494d05925eb957c414 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 9 Aug 2022 19:34:34 +0100
Subject: [PATCH 37/78] [test] Implement string "lambda x: condition(x)" as an
 expected value

Semantics equivalent to `assert condition(got)`
---
 test/helper.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/helper.py b/test/helper.py
index e62aab11e..c6a2f0667 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -128,6 +128,12 @@ def expect_value(self, got, expected, field):
         self.assertTrue(
             contains_str in got,
             'field %s (value: %r) should contain %r' % (field, got, contains_str))
+    elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
+        fn = eval(expected)
+        suite = expected.split(':', 1)[1].strip()
+        self.assertTrue(
+            fn(got),
+            'Expected field %s to meet condition %s, but value %r failed ' % (field, suite, got))
     elif isinstance(expected, type):
         self.assertTrue(
             isinstance(got, expected),
@@ -137,7 +143,7 @@ def expect_value(self, got, expected, field):
     elif isinstance(expected, list) and isinstance(got, list):
         self.assertEqual(
             len(expected), len(got),
-            'Expect a list of length %d, but got a list of length %d for field %s' % (
+            'Expected a list of length %d, but got a list of length %d for field %s' % (
                 len(expected), len(got), field))
         for index, (item_got, item_expected) in enumerate(zip(got, expected)):
             type_got = type(item_got)

From c282e5f8d723763ba88c521221e4535f46453949 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 9 Aug 2022 19:37:58 +0100
Subject: [PATCH 38/78] [ZDF] Overhaul ZDF extractors * pull some yt-dlp
 changes into ZDFBaseIE._extract_format() * add test cases from yt-dlp to
 ZDFIE * fix crash in ZDFIE._extract_mobile() when object had no `formitaeten`
 * improve title extraction in ZDFChannelIE (remove trailing station ident) *
 avoid extracting non-video playlist items (fixes #31149)

---
 youtube_dl/extractor/zdf.py | 175 ++++++++++++++++++++----------------
 1 file changed, 100 insertions(+), 75 deletions(-)

diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 3d39bb33a..fcc63ef52 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -8,13 +8,14 @@ from ..compat import compat_str
 from ..utils import (
     determine_ext,
     ExtractorError,
+    extract_attributes,
     float_or_none,
     int_or_none,
     merge_dicts,
     NO_DEFAULT,
-    orderedSet,
     parse_codecs,
     qualities,
+    str_or_none,
     try_get,
     unified_timestamp,
     update_url_query,
@@ -57,28 +58,39 @@ class ZDFBaseIE(InfoExtractor):
         format_urls.add(format_url)
         mime_type = meta.get('mimeType')
         ext = determine_ext(format_url)
+
+        join_nonempty = lambda s, l: s.join(filter(None, l))
+        meta_map = lambda t: map(lambda x: str_or_none(meta.get(x)), t)
+
         if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
-            formats.extend(self._extract_m3u8_formats(
+            new_formats = self._extract_m3u8_formats(
                 format_url, video_id, 'mp4', m3u8_id='hls',
-                entry_protocol='m3u8_native', fatal=False))
+                entry_protocol='m3u8_native', fatal=False)
         elif mime_type == 'application/f4m+xml' or ext == 'f4m':
-            formats.extend(self._extract_f4m_formats(
-                update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
+            new_formats = self._extract_f4m_formats(
+                update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
         else:
             f = parse_codecs(meta.get('mimeCodec'))
+            if not f:
+                data = meta.get('type', '').split('_')
+                if try_get(data, lambda x: x[2]) == ext:
+                    f = dict(zip(('vcodec', 'acodec'), data[1]))
+
             format_id = ['http']
-            for p in (meta.get('type'), meta.get('quality')):
-                if p and isinstance(p, compat_str):
-                    format_id.append(p)
+            format_id.extend(join_nonempty('-', meta_map(('type', 'quality'))))
             f.update({
                 'url': format_url,
                 'format_id': '-'.join(format_id),
-                'format_note': meta.get('quality'),
-                'language': meta.get('language'),
-                'quality': qualities(self._QUALITIES)(meta.get('quality')),
-                'preference': -10,
+                'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
             })
-            formats.append(f)
+            new_formats = [f]
+
+        formats.extend(merge_dicts(f, {
+            'format_note': join_nonempty(',', meta_map(('quality', 'class'))),
+            'language': meta.get('language'),
+            'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
+            'quality': qualities(self._QUALITIES)(meta.get('quality')),
+        }) for f in new_formats)
 
     def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
         ptmd = self._call_api(
@@ -107,6 +119,7 @@ class ZDFBaseIE(InfoExtractor):
                                 'type': f.get('type'),
                                 'mimeType': f.get('mimeType'),
                                 'quality': quality.get('quality'),
+                                'class': track.get('class'),
                                 'language': track.get('language'),
                             })
         self._sort_formats(formats)
@@ -171,6 +184,20 @@ class ZDFIE(ZDFBaseIE):
             'duration': 2615,
             'timestamp': 1465021200,
             'upload_date': '20160604',
+            'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
+        },
+    }, {
+        'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
+        'md5': '1b93bdec7d02fc0b703c5e7687461628',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': 'video_funk_1770473',
+            'duration': 1278,
+            'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
+            'title': 'Alles ist verzaubert',
+            'timestamp': 1635520560,
+            'upload_date': '20211029',
+            'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799',
         },
     }, {
         # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
@@ -204,6 +231,19 @@ class ZDFIE(ZDFBaseIE):
             'timestamp': 1641355200,
             'upload_date': '20220105',
         },
+        'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
+    }, {
+        'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
+        'info_dict': {
+            'id': '191205_1800_sendung_sok8',
+            'ext': 'mp4',
+            'title': 'Das Geld anderer Leute',
+            'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
+            'duration': 2581.0,
+            'timestamp': 1654790700,
+            'upload_date': '20220609',
+            'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
+        },
     }]
 
     def _extract_entry(self, url, player, content, video_id):
@@ -265,15 +305,16 @@ class ZDFIE(ZDFBaseIE):
             'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
             video_id)
 
-        document = video['document']
-
-        title = document['titel']
-        content_id = document['basename']
-
         formats = []
-        format_urls = set()
-        for f in document['formitaeten']:
-            self._extract_format(content_id, formats, format_urls, f)
+        formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
+        document = formitaeten and video['document']
+        if formitaeten:
+            title = document['titel']
+            content_id = document['basename']
+
+            format_urls = set()
+            for f in formitaeten or []:
+                self._extract_format(content_id, formats, format_urls, f)
         self._sort_formats(formats)
 
         thumbnails = []
@@ -320,9 +361,9 @@ class ZDFChannelIE(ZDFBaseIE):
         'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
         'info_dict': {
             'id': 'das-aktuelle-sportstudio',
-            'title': 'das aktuelle sportstudio | ZDF',
+            'title': 'das aktuelle sportstudio',
         },
-        'playlist_mincount': 23,
+        'playlist_mincount': 18,
     }, {
         'url': 'https://www.zdf.de/dokumentation/planet-e',
         'info_dict': {
@@ -330,6 +371,14 @@ class ZDFChannelIE(ZDFBaseIE):
             'title': 'planet e.',
         },
         'playlist_mincount': 50,
+    }, {
+        'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
+        'info_dict': {
+            'id': 'aktenzeichen-xy-ungeloest',
+            'title': 'Aktenzeichen XY... ungelöst',
+            'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
+        },
+        'playlist_mincount': 2,
     }, {
         'url': 'https://www.zdf.de/filme/taunuskrimi/',
         'only_matching': True,
@@ -339,60 +388,36 @@ class ZDFChannelIE(ZDFBaseIE):
     def suitable(cls, url):
         return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
 
+    def _og_search_title(self, webpage, fatal=False):
+        title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
+        return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
+
     def _real_extract(self, url):
         channel_id = self._match_id(url)
 
         webpage = self._download_webpage(url, channel_id)
 
-        entries = [
-            self.url_result(item_url, ie=ZDFIE.ie_key())
-            for item_url in orderedSet(re.findall(
-                r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
-
-        return self.playlist_result(
-            entries, channel_id, self._og_search_title(webpage, fatal=False))
-
-        r"""
-        player = self._extract_player(webpage, channel_id)
-
-        channel_id = self._search_regex(
-            r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
-            'channel id', group='id')
-
-        channel = self._call_api(
-            'https://api.zdf.de/content/documents/%s.json' % channel_id,
-            player, url, channel_id)
-
-        items = []
-        for module in channel['module']:
-            for teaser in try_get(module, lambda x: x['teaser'], list) or []:
-                t = try_get(
-                    teaser, lambda x: x['http://zdf.de/rels/target'], dict)
-                if not t:
-                    continue
-                items.extend(try_get(
-                    t,
-                    lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
-                    list) or [])
-            items.extend(try_get(
-                module,
-                lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
-                list) or [])
-
-        entries = []
-        entry_urls = set()
-        for item in items:
-            t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
-            if not t:
-                continue
-            sharing_url = t.get('http://zdf.de/rels/sharing-url')
-            if not sharing_url or not isinstance(sharing_url, compat_str):
-                continue
-            if sharing_url in entry_urls:
-                continue
-            entry_urls.add(sharing_url)
-            entries.append(self.url_result(
-                sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
-
-        return self.playlist_result(entries, channel_id, channel.get('title'))
-        """
+        matches = re.finditer(
+            r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
+            webpage)
+
+        if self._downloader.params.get('noplaylist', False):
+            entry = next(
+                (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
+                None)
+            self.to_screen('Downloading just the main video because of --no-playlist')
+            if entry:
+                return entry
+        else:
+            self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
+
+        def check_video(m):
+            v_ref = self._search_regex(
+                r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
+                webpage, 'check id', default='')
+            v_ref = extract_attributes(v_ref)
+            return v_ref.get('data-target-video-type') != 'novideo'
+
+        return self.playlist_from_matches(
+            (m.group('url') for m in matches if check_video(m)),
+            channel_id, self._og_search_title(webpage, fatal=False))

From 6e2626f092c63a5fa22a31df409610b5deaf3968 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Oct 2022 05:58:10 +0100
Subject: [PATCH 39/78] [JSInterp] Improve separation logic

Based on https://github.com/yt-dlp/yt-dlp/commit/0468a3b3253957bfbeb98b4a7c71542ff80e9e06
---
 youtube_dl/jsinterp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 99dd98435..530a705b4 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -214,7 +214,7 @@ class JSInterpreter(object):
         def __init__(self, msg, *args, **kwargs):
             expr = kwargs.pop('expr', None)
             if expr is not None:
-                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
+                msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     @classmethod
@@ -268,7 +268,7 @@ class JSInterpreter(object):
                 elif in_quote == '/' and char in '[]':
                     in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
+            after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -301,7 +301,7 @@ class JSInterpreter(object):
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
-            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
+            raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
     @staticmethod

From c94a459a248352fd97dccc79ed6604a558459bfd Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Oct 2022 12:18:12 +0000
Subject: [PATCH 40/78] [utils] Sanitize look-alike Unicode glyphs in non-ID
 filename fields when --restrict-filenames

Implements https://github.com/ytdl-org/youtube-dl/issues/31216#issuecomment-1236102822, which has a test.
---
 youtube_dl/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index fea38ed32..23a65a81c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -33,6 +33,7 @@ import sys
 import tempfile
 import time
 import traceback
+import unicodedata
 import xml.etree.ElementTree
 import zlib
 
@@ -2118,6 +2119,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
             return '_'
         return char
 
+    # Replace look-alike Unicode glyphs
+    if restricted and not is_id:
+        s = unicodedata.normalize('NFKC', s)
     # Handle timestamps
     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
     result = ''.join(map(replace_insane, s))

From 11b284c81fe2988813c817918536fc3a5630870a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Oct 2022 12:36:44 +0000
Subject: [PATCH 41/78] [Common:JWPlayer] Fix x1000 scaling error

See https://github.com/yt-dlp/yt-dlp/issues/5106#issuecomment-1264625161
---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 797c35fd5..1f33a1e06 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2844,7 +2844,7 @@ class InfoExtractor(object):
                     'url': source_url,
                     'width': int_or_none(source.get('width')),
                     'height': height,
-                    'tbr': int_or_none(source.get('bitrate')),
+                    'tbr': int_or_none(source.get('bitrate'), scale=1000),
                     'ext': ext,
                 }
                 if source_url.startswith('rtmp'):

From c91cbf60729af93c4677864aa6c8b74b576146ca Mon Sep 17 00:00:00 2001
From: Xie Yanbo <xieyanbo@gmail.com>
Date: Tue, 11 Oct 2022 20:55:09 +0800
Subject: [PATCH 42/78] [netease] Get netease music download url through player
 api (#31235)

* remove unplayable song from test
* compatible with python 2
* using standard User_Agent, fix imports
* use hash instead of long description
* fix lint
* fix hash
---
 test/test_aes.py                     |   9 +-
 youtube_dl/aes.py                    |  37 +++++++-
 youtube_dl/extractor/neteasemusic.py | 123 +++++++++++++++++++--------
 3 files changed, 129 insertions(+), 40 deletions(-)

diff --git a/test/test_aes.py b/test/test_aes.py
index cc89fb6ab..0f181466b 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -8,7 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text, aes_ecb_encrypt
 from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
 import base64
 
@@ -58,6 +58,13 @@ class TestAES(unittest.TestCase):
         decrypted = (aes_decrypt_text(encrypted, password, 32))
         self.assertEqual(decrypted, self.secret_msg)
 
+    def test_ecb_encrypt(self):
+        data = bytes_to_intlist(self.secret_msg)
+        encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
+        self.assertEqual(
+            encrypted,
+            b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index d0de2d93f..a94a41079 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -8,6 +8,18 @@ from .utils import bytes_to_intlist, intlist_to_bytes
 BLOCK_SIZE_BYTES = 16
 
 
+def pkcs7_padding(data):
+    """
+    PKCS#7 padding
+
+    @param {int[]} data        cleartext
+    @returns {int[]}           padding data
+    """
+
+    remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
+    return data + [remaining_length] * remaining_length
+
+
 def aes_ctr_decrypt(data, key, counter):
     """
     Decrypt with aes in counter mode
@@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv):
     previous_cipher_block = iv
     for i in range(block_count):
         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
-        remaining_length = BLOCK_SIZE_BYTES - len(block)
-        block += [remaining_length] * remaining_length
+        block = pkcs7_padding(block)
         mixed_block = xor(block, previous_cipher_block)
 
         encrypted_block = aes_encrypt(mixed_block, expanded_key)
@@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv):
     return encrypted_data
 
 
+def aes_ecb_encrypt(data, key):
+    """
+    Encrypt with aes in ECB mode. Using PKCS#7 padding
+
+    @param {int[]} data        cleartext
+    @param {int[]} key         16/24/32-Byte cipher key
+    @returns {int[]}           encrypted data
+    """
+    expanded_key = key_expansion(key)
+    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+    encrypted_data = []
+    for i in range(block_count):
+        block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+        block = pkcs7_padding(block)
+
+        encrypted_block = aes_encrypt(block, expanded_key)
+        encrypted_data += encrypted_block
+
+    return encrypted_data
+
+
 def key_expansion(data):
     """
     Generate key schedule
diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py
index 978a05841..fad22a2cd 100644
--- a/youtube_dl/extractor/neteasemusic.py
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -1,20 +1,31 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from hashlib import md5
 from base64 import b64encode
+from binascii import hexlify
 from datetime import datetime
+from hashlib import md5
+from random import randint
+import json
 import re
+import time
 
 from .common import InfoExtractor
+from ..aes import aes_ecb_encrypt, pkcs7_padding
 from ..compat import (
     compat_urllib_parse_urlencode,
     compat_str,
     compat_itertools_count,
 )
 from ..utils import (
-    sanitized_Request,
+    ExtractorError,
+    bytes_to_intlist,
     float_or_none,
+    int_or_none,
+    intlist_to_bytes,
+    sanitized_Request,
+    std_headers,
+    try_get,
 )
 
 
@@ -35,32 +46,85 @@ class NetEaseMusicBaseIE(InfoExtractor):
         result = b64encode(m.digest()).decode('ascii')
         return result.replace('/', '_').replace('+', '-')
 
+    @classmethod
+    def make_player_api_request_data_and_headers(cls, song_id, bitrate):
+        KEY = b'e82ckenh8dichen8'
+        URL = '/api/song/enhance/player/url'
+        now = int(time.time() * 1000)
+        rand = randint(0, 1000)
+        cookie = {
+            'osver': None,
+            'deviceId': None,
+            'appver': '8.0.0',
+            'versioncode': '140',
+            'mobilename': None,
+            'buildver': '1623435496',
+            'resolution': '1920x1080',
+            '__csrf': '',
+            'os': 'pc',
+            'channel': None,
+            'requestId': '{0}_{1:04}'.format(now, rand),
+        }
+        request_text = json.dumps(
+            {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
+            separators=(',', ':'))
+        message = 'nobody{0}use{1}md5forencrypt'.format(
+            URL, request_text).encode('latin1')
+        msg_digest = md5(message).hexdigest()
+
+        data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
+            URL, request_text, msg_digest)
+        data = pkcs7_padding(bytes_to_intlist(data))
+        encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
+        encrypted_params = hexlify(encrypted).decode('ascii').upper()
+
+        cookie = '; '.join(
+            ['{0}={1}'.format(k, v if v is not None else 'undefined')
+             for [k, v] in cookie.items()])
+
+        headers = {
+            'User-Agent': std_headers['User-Agent'],
+            'Content-Type': 'application/x-www-form-urlencoded',
+            'Referer': 'https://music.163.com',
+            'Cookie': cookie,
+        }
+        return ('params={0}'.format(encrypted_params), headers)
+
+    def _call_player_api(self, song_id, bitrate):
+        url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
+        data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
+        try:
+            return self._download_json(
+                url, song_id, data=data.encode('ascii'), headers=headers)
+        except ExtractorError as e:
+            if type(e.cause) in (ValueError, TypeError):
+                # JSON load failure
+                raise
+        except Exception:
+            pass
+        return {}
+
     def extract_formats(self, info):
         formats = []
+        song_id = info['id']
         for song_format in self._FORMATS:
             details = info.get(song_format)
             if not details:
                 continue
-            song_file_path = '/%s/%s.%s' % (
-                self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
-
-            # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
-            # from NetEase's CDN provider that can be used if m5.music.126.net does not
-            # work, especially for users outside of Mainland China
-            # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
-            for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
-                         'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
-                song_url = host + song_file_path
+
+            bitrate = int_or_none(details.get('bitrate')) or 999000
+            data = self._call_player_api(song_id, bitrate)
+            for song in try_get(data, lambda x: x['data'], list) or []:
+                song_url = try_get(song, lambda x: x['url'])
                 if self._is_valid_url(song_url, info['id'], 'song'):
                     formats.append({
                         'url': song_url,
                         'ext': details.get('extension'),
-                        'abr': float_or_none(details.get('bitrate'), scale=1000),
+                        'abr': float_or_none(song.get('br'), scale=1000),
                         'format_id': song_format,
-                        'filesize': details.get('size'),
-                        'asr': details.get('sr')
+                        'filesize': int_or_none(song.get('size')),
+                        'asr': int_or_none(details.get('sr')),
                     })
-                    break
         return formats
 
     @classmethod
@@ -79,30 +143,16 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
     _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://music.163.com/#/song?id=32102397',
-        'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
+        'md5': '3e909614ce09b1ccef4a3eb205441190',
         'info_dict': {
             'id': '32102397',
             'ext': 'mp3',
-            'title': 'Bad Blood (feat. Kendrick Lamar)',
+            'title': 'Bad Blood',
             'creator': 'Taylor Swift / Kendrick Lamar',
-            'upload_date': '20150517',
-            'timestamp': 1431878400,
-            'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
+            'upload_date': '20150516',
+            'timestamp': 1431792000,
+            'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
         },
-        'skip': 'Blocked outside Mainland China',
-    }, {
-        'note': 'No lyrics translation.',
-        'url': 'http://music.163.com/#/song?id=29822014',
-        'info_dict': {
-            'id': '29822014',
-            'ext': 'mp3',
-            'title': '听见下雨的声音',
-            'creator': '周杰伦',
-            'upload_date': '20141225',
-            'timestamp': 1419523200,
-            'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
-        },
-        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'No lyrics.',
         'url': 'http://music.163.com/song?id=17241424',
@@ -112,9 +162,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'title': 'Opus 28',
             'creator': 'Dustin O\'Halloran',
             'upload_date': '20080211',
+            'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
             'timestamp': 1202745600,
         },
-        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Has translated name.',
         'url': 'http://music.163.com/#/song?id=22735043',
@@ -128,7 +178,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'timestamp': 1264608000,
             'alt_title': '说出愿望吧(Genie)',
         },
-        'skip': 'Blocked outside Mainland China',
     }]
 
     def _process_lyrics(self, lyrics_info):

From 7bbd5b13d4c6cfc3e24f56413ff1a1eace8472b8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 12 Oct 2022 01:09:55 +0100
Subject: [PATCH 43/78] [Motherless] Pull from yt-dlp, etc

* use username field
* loosen regexes
* warn on page count 0 in group
* avoid reloading group page 1
Closes #29626
---
 youtube_dl/extractor/motherless.py | 33 +++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 35d2b46ed..d352cb180 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -126,9 +126,10 @@ class MotherlessIE(InfoExtractor):
                 kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
                 upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
 
-        comment_count = webpage.count('class="media-comment-contents"')
+        comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
         uploader_id = self._html_search_regex(
-            r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)''',
+            (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
+             r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
             webpage, 'uploader_id')
 
         categories = self._html_search_meta('keywords', webpage, default=None)
@@ -171,6 +172,17 @@ class MotherlessGroupIE(InfoExtractor):
                            'any kind!'
         },
         'playlist_mincount': 0,
+        'expected_warnings': [
+            'This group has no videos.',
+        ]
+    }, {
+        'url': 'https://motherless.com/g/beautiful_cock',
+        'info_dict': {
+            'id': 'beautiful_cock',
+            'title': 'Beautiful Cock',
+            'description': 'Group for lovely cocks yours, mine, a friends anything human',
+        },
+        'playlist_mincount': 2500,
     }]
 
     @classmethod
@@ -211,14 +223,21 @@ class MotherlessGroupIE(InfoExtractor):
             'description', webpage, fatal=False)
         page_count = str_to_int(self._search_regex(
             r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
-            webpage, 'page_count', default='1'))
+            webpage, 'page_count', default=0))
+        if not page_count:
+            message = self._search_regex(
+                r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
+                webpage, 'error_msg', default=None) or 'This group has no videos.'
+            self.report_warning(message, group_id)
+            page_count = 1
         PAGE_SIZE = 80
 
         def _get_page(idx):
-            webpage = self._download_webpage(
-                page_url, group_id, query={'page': idx + 1},
-                note='Downloading page %d/%d' % (idx + 1, page_count)
-            )
+            if idx > 0:
+                webpage = self._download_webpage(
+                    page_url, group_id, query={'page': idx + 1},
+                    note='Downloading page %d/%d' % (idx + 1, page_count)
+                )
             for entry in self._extract_entries(webpage, url):
                 yield entry
 

From 7135277fec497bd7649c31087aba52daa7897484 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Oct 2022 01:59:01 +0000
Subject: [PATCH 44/78] [ManyVids] Support new single-page app structure

See https://github.com/yt-dlp/yt-dlp/issues/5210#issuecomment-1276919962.
---
 youtube_dl/extractor/manyvids.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py
index 6805102ba..608a02a8d 100644
--- a/youtube_dl/extractor/manyvids.py
+++ b/youtube_dl/extractor/manyvids.py
@@ -47,7 +47,12 @@ class ManyVidsIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
+        real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
+        try:
+            webpage = self._download_webpage(real_url, video_id)
+        except:
+            # probably useless fallback
+            webpage = self._download_webpage(url, video_id)
 
         info = self._search_regex(
             r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
@@ -98,7 +103,8 @@ class ManyVidsIE(InfoExtractor):
             # Sets some cookies
             self._download_webpage(
                 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
-                video_id, fatal=False, data=urlencode_postdata({
+                video_id, note='Setting format cookies', fatal=False,
+                data=urlencode_postdata({
                     'mvtoken': mv_token,
                     'vid': video_id,
                 }), headers={

From ee8560d01eec511587f8207c3d84219ec620a9a6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Oct 2022 02:42:49 +0000
Subject: [PATCH 45/78] [ManyVids] Support new single-page app structure

---
 youtube_dl/extractor/manyvids.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py
index 608a02a8d..75978cfd6 100644
--- a/youtube_dl/extractor/manyvids.py
+++ b/youtube_dl/extractor/manyvids.py
@@ -50,7 +50,7 @@ class ManyVidsIE(InfoExtractor):
         real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
         try:
             webpage = self._download_webpage(real_url, video_id)
-        except:
+        except Exception:
             # probably useless fallback
             webpage = self._download_webpage(url, video_id)
 

From 447edc48e63f5f21797ea0d9ee84e37ed1547035 Mon Sep 17 00:00:00 2001
From: ache <ache@ache.one>
Date: Tue, 18 Oct 2022 15:06:27 +0000
Subject: [PATCH 46/78] Fix ADN extractor (#31275)

* Rename Anime Digital Network to Animation Digital Network, animationdigitalnetwork.fr
* Update the test to an available video
* Update the decoding key of subtitles
* Keep the support of old URLs
* Add a test to match the old URL
* Reduce redundancy of the URL name
* Fix md5 ^^"
* Fix undefined _BASE
* Process HTTP error text (eg geo-block) correctly and uniformly in Py3, Py2
* Skip test for CI since geo-blocked

Signed-off-by: ache <ache@ache.one>
Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/adn.py | 57 +++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py
index a55ebbcbd..5ff419f19 100644
--- a/youtube_dl/extractor/adn.py
+++ b/youtube_dl/extractor/adn.py
@@ -31,30 +31,34 @@ from ..utils import (
 
 
 class ADNIE(InfoExtractor):
-    IE_DESC = 'Anime Digital Network'
-    _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
-        'md5': '0319c99885ff5547565cacb4f3f9348d',
+    IE_DESC = 'Animation Digital Network'
+    _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
+        'md5': '1c9ef066ceb302c86f80c2b371615261',
         'info_dict': {
-            'id': '7778',
+            'id': '9841',
             'ext': 'mp4',
-            'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
-            'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
-            'series': 'Blue Exorcist - Kyôto Saga',
-            'duration': 1467,
-            'release_date': '20170106',
+            'title': 'Fruits Basket - Episode 1',
+            'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
+            'series': 'Fruits Basket',
+            'duration': 1437,
+            'release_date': '20190405',
             'comment_count': int,
             'average_rating': float,
-            'season_number': 2,
-            'episode': 'Début des hostilités',
+            'season_number': 1,
+            'episode': 'À ce soir !',
             'episode_number': 1,
-        }
-    }
+        },
+        'skip': 'Only available in region (FR, ...)',
+    }, {
+        'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+        'only_matching': True,
+    }]
 
-    _NETRC_MACHINE = 'animedigitalnetwork'
-    _BASE_URL = 'http://animedigitalnetwork.fr'
-    _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
+    _NETRC_MACHINE = 'animationdigitalnetwork'
+    _BASE = 'animationdigitalnetwork.fr'
+    _API_BASE_URL = 'https://gw.api.' + _BASE + '/'
     _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
     _HEADERS = {}
     _LOGIN_ERR_MESSAGE = 'Unable to log in'
@@ -82,14 +86,14 @@ class ADNIE(InfoExtractor):
         if subtitle_location:
             enc_subtitles = self._download_webpage(
                 subtitle_location, video_id, 'Downloading subtitles data',
-                fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
+                fatal=False, headers={'Origin': 'https://' + self._BASE})
         if not enc_subtitles:
             return None
 
-        # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
+        # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
-            bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
+            bytes_to_intlist(binascii.unhexlify(self._K + '7fac1178830cfe0c')),
             bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
         ))
         subtitles_json = self._parse_json(
@@ -138,9 +142,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
         if not username:
             return
         try:
+            url = self._API_BASE_URL + 'authentication/login'
             access_token = (self._download_json(
-                self._API_BASE_URL + 'authentication/login', None,
-                'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+                url, None, 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
                 data=urlencode_postdata({
                     'password': password,
                     'rememberMe': False,
@@ -153,7 +157,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
             message = None
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
                 resp = self._parse_json(
-                    e.cause.read().decode(), None, fatal=False) or {}
+                    self._webpage_read_content(e.cause, url, username),
+                    username, fatal=False) or {}
                 message = resp.get('message') or resp.get('code')
             self.report_warning(message or self._LOGIN_ERR_MESSAGE)
 
@@ -211,7 +216,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
                     # This usually goes away with a different random pkcs1pad, so retry
                     continue
 
-                error = self._parse_json(e.cause.read(), video_id)
+                error = self._parse_json(
+                    self._webpage_read_content(e.cause, links_url, video_id),
+                    video_id, fatal=False) or {}
                 message = error.get('message')
                 if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
                     self.raise_geo_restricted(msg=message)

From 0faa45d6c08f518b73d20e341944ea7292f9f2b2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Oct 2022 11:06:44 +0000
Subject: [PATCH 47/78] [BongaCams] Support new .net domain

Resolves #31262.
---
 youtube_dl/extractor/bongacams.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/bongacams.py b/youtube_dl/extractor/bongacams.py
index 180542fbc..016999d55 100644
--- a/youtube_dl/extractor/bongacams.py
+++ b/youtube_dl/extractor/bongacams.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -12,13 +13,28 @@ from ..utils import (
 
 
 class BongaCamsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
+    _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
     _TESTS = [{
         'url': 'https://de.bongacams.com/azumi-8',
         'only_matching': True,
     }, {
         'url': 'https://cn.bongacams.com/azumi-8',
         'only_matching': True,
+    }, {
+        'url': 'https://de.bongacams.net/claireashton',
+        'info_dict': {
+            'id': 'claireashton',
+            'ext': 'mp4',
+            'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+            'age_limit': 18,
+            'uploader_id': 'ClaireAshton',
+            'uploader': 'ClaireAshton',
+            'like_count': int,
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):

From 502cefa41f1d24057b6158748b2072dc911af682 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 27 Oct 2022 14:33:00 +0000
Subject: [PATCH 48/78] [Vimeo] Update variable name in hydration JSON pattern

Fixes #31311
---
 youtube_dl/extractor/vimeo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index a66912502..853b38402 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -663,7 +663,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
         if '//player.vimeo.com/video/' in url:
             config = self._parse_json(self._search_regex(
-                r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+                r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
             if config.get('view') == 4:
                 config = self._verify_player_video_password(
                     redirect_url, video_id, headers)

From d25cf62086443d86a633b8176b5c7e79f4cc569e Mon Sep 17 00:00:00 2001
From: Xie Yanbo <xieyanbo@gmail.com>
Date: Sun, 30 Oct 2022 19:46:46 +0800
Subject: [PATCH 49/78] [netease] Impove error handling (#31303)

* add warnings for users outside of China
* skip empty song urls

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/neteasemusic.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py
index fad22a2cd..2bbfc7858 100644
--- a/youtube_dl/extractor/neteasemusic.py
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -20,6 +20,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     bytes_to_intlist,
+    error_to_compat_str,
     float_or_none,
     int_or_none,
     intlist_to_bytes,
@@ -94,17 +95,23 @@ class NetEaseMusicBaseIE(InfoExtractor):
         url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
         data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
         try:
-            return self._download_json(
+            msg = 'empty result'
+            result = self._download_json(
                 url, song_id, data=data.encode('ascii'), headers=headers)
+            if result:
+                return result
         except ExtractorError as e:
             if type(e.cause) in (ValueError, TypeError):
                 # JSON load failure
                 raise
-        except Exception:
-            pass
+        except Exception as e:
+            msg = error_to_compat_str(e)
+            self.report_warning('%s API call (%s) failed: %s' % (
+                song_id, bitrate, msg))
         return {}
 
     def extract_formats(self, info):
+        err = 0
         formats = []
         song_id = info['id']
         for song_format in self._FORMATS:
@@ -116,6 +123,8 @@ class NetEaseMusicBaseIE(InfoExtractor):
             data = self._call_player_api(song_id, bitrate)
             for song in try_get(data, lambda x: x['data'], list) or []:
                 song_url = try_get(song, lambda x: x['url'])
+                if not song_url:
+                    continue
                 if self._is_valid_url(song_url, info['id'], 'song'):
                     formats.append({
                         'url': song_url,
@@ -125,6 +134,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
                         'filesize': int_or_none(song.get('size')),
                         'asr': int_or_none(details.get('sr')),
                     })
+                elif err == 0:
+                    err = try_get(song, lambda x: x['code'], int)
+
+        if not formats:
+            msg = 'No media links found'
+            if err != 0 and (err < 200 or err >= 400):
+                raise ExtractorError(
+                    '%s (site code %d)' % (msg, err, ), expected=True)
+            else:
+                self.raise_geo_restricted(
+                    msg + ': probably this video is not available from your location due to geo restriction.',
+                    countries=['CN'])
+
         return formats
 
     @classmethod

From ce5d36486ea95b8961c639d118bad262c8d7a067 Mon Sep 17 00:00:00 2001
From: Xie Yanbo <xieyanbo@gmail.com>
Date: Sun, 30 Oct 2022 19:48:44 +0800
Subject: [PATCH 50/78] [netease] Support urls shared from mobile app (#31304)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/neteasemusic.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py
index 2bbfc7858..5e5c6271b 100644
--- a/youtube_dl/extractor/neteasemusic.py
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -162,7 +162,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
 class NetEaseMusicIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:song'
     IE_DESC = '网易云音乐'
-    _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://music.163.com/#/song?id=32102397',
         'md5': '3e909614ce09b1ccef4a3eb205441190',
@@ -200,6 +200,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'timestamp': 1264608000,
             'alt_title': '说出愿望吧(Genie)',
         },
+    }, {
+        'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
+        'md5': '95826c73ea50b1c288b22180ec9e754d',
+        'info_dict': {
+            'id': '95670',
+            'ext': 'mp3',
+            'title': '国际歌',
+            'creator': '马备',
+            'upload_date': '19911130',
+            'timestamp': 691516800,
+            'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
+        },
     }]
 
     def _process_lyrics(self, lyrics_info):

From a19855f0f50fe7a6eb05a1d8fee554897e4dbdda Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Oct 2022 21:18:36 +0000
Subject: [PATCH 51/78] [compat] Add Python 2 Unicode casefold using a trivial
 wrapper around icu/CaseFolding.txt

---
 youtube_dl/casefold.py | 1643 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1643 insertions(+)
 create mode 100644 youtube_dl/casefold.py

diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
new file mode 100644
index 000000000..546269a3c
--- /dev/null
+++ b/youtube_dl/casefold.py
@@ -0,0 +1,1643 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .compat import compat_str
+
+# CaseFolding-15.0.0.txt
+# Date: 2022-02-02, 23:35:35 GMT
+# © 2022 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+#   For documentation, see https://www.unicode.org/reports/tr44/
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
+#
+# All code points not listed in this file map to themselves.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, including how to have case folding
+# preserve normalization formats, see Section 3.13 Default Case Algorithms in
+# The Unicode Standard.
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+#    - For non-Turkic languages, this mapping is normally not used.
+#    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
+#      Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
+#      See the discussions of case mapping in the Unicode Standard for more information.
+#
+# Usage:
+#  A. To do a simple case folding, use the mappings with status C + S.
+#  B. To do a full case folding, use the mappings with status C + F.
+#
+#    The mappings with status T can be used or omitted depending on the desired case-folding
+#    behavior. (The default option is to exclude them.)
+#
+# =================================================================
+
+# Property: Case_Folding
+
+#  All code points not explicitly listed for Case_Folding
+#  have the value C for the status field, and the code point itself for the mapping field.
+
+# =================================================================
+_map_str = '''
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0049; T; 0131; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
+0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
+0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
+0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
+0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
+010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
+0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
+0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
+0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
+0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
+011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
+011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
+0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
+0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
+0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
+012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
+012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
+012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
+0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
+0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
+013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
+013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
+013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
+0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
+0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
+0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
+0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; C; 014B; # LATIN CAPITAL LETTER ENG
+014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
+014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
+0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; C; 0153; # LATIN CAPITAL LIGATURE OE
+0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
+0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
+0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
+015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
+015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
+0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
+0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
+0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
+0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
+0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
+016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
+016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
+016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
+0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
+017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
+017F; C; 0073; # LATIN SMALL LETTER LONG S
+0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
+0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
+0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
+0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
+0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
+0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
+018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
+018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
+018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
+018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
+0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
+0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
+0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
+0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
+0196; C; 0269; # LATIN CAPITAL LETTER IOTA
+0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
+0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
+019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
+019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
+01A2; C; 01A3; # LATIN CAPITAL LETTER OI
+01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
+01A6; C; 0280; # LATIN LETTER YR
+01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
+01A9; C; 0283; # LATIN CAPITAL LETTER ESH
+01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
+01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
+01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
+01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
+01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
+01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
+01B7; C; 0292; # LATIN CAPITAL LETTER EZH
+01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
+01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
+01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
+01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
+01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
+01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
+01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
+01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
+01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
+01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
+01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
+01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
+01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
+01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
+01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
+01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
+01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
+01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
+01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
+01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
+01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
+01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; C; 021D; # LATIN CAPITAL LETTER YOGH
+021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
+0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222; C; 0223; # LATIN CAPITAL LETTER OU
+0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
+0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
+022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
+023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
+023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
+023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
+023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
+0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
+0244; C; 0289; # LATIN CAPITAL LETTER U BAR
+0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
+0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
+0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
+024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
+024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
+0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
+0370; C; 0371; # GREEK CAPITAL LETTER HETA
+0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
+0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+037F; C; 03F3; # GREEK CAPITAL LETTER YOT
+0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
+038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
+0392; C; 03B2; # GREEK CAPITAL LETTER BETA
+0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
+0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
+0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
+0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
+0397; C; 03B7; # GREEK CAPITAL LETTER ETA
+0398; C; 03B8; # GREEK CAPITAL LETTER THETA
+0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
+039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
+039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
+039C; C; 03BC; # GREEK CAPITAL LETTER MU
+039D; C; 03BD; # GREEK CAPITAL LETTER NU
+039E; C; 03BE; # GREEK CAPITAL LETTER XI
+039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
+03A0; C; 03C0; # GREEK CAPITAL LETTER PI
+03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
+03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
+03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
+03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
+03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
+03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
+03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
+03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
+03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
+03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL
+03D0; C; 03B2; # GREEK BETA SYMBOL
+03D1; C; 03B8; # GREEK THETA SYMBOL
+03D5; C; 03C6; # GREEK PHI SYMBOL
+03D6; C; 03C0; # GREEK PI SYMBOL
+03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
+03DA; C; 03DB; # GREEK LETTER STIGMA
+03DC; C; 03DD; # GREEK LETTER DIGAMMA
+03DE; C; 03DF; # GREEK LETTER KOPPA
+03E0; C; 03E1; # GREEK LETTER SAMPI
+03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
+03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
+03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
+03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
+03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
+03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
+03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
+03F0; C; 03BA; # GREEK KAPPA SYMBOL
+03F1; C; 03C1; # GREEK RHO SYMBOL
+03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
+03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
+03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
+03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
+03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
+03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
+03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
+03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
+0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
+0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
+0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
+0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
+0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
+0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
+040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
+040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
+040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
+040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
+040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
+0410; C; 0430; # CYRILLIC CAPITAL LETTER A
+0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
+0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
+0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
+0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
+0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
+0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
+0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
+0418; C; 0438; # CYRILLIC CAPITAL LETTER I
+0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
+041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
+041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
+041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
+041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
+041E; C; 043E; # CYRILLIC CAPITAL LETTER O
+041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
+0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
+0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
+0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
+0423; C; 0443; # CYRILLIC CAPITAL LETTER U
+0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
+0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
+0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
+0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
+0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
+0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
+042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
+042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
+042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; C; 044D; # CYRILLIC CAPITAL LETTER E
+042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
+042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
+0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
+0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
+0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
+046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
+0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
+0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
+0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
+0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
+047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
+0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
+048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
+04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
+04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
+04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
+04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
+04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
+04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
+0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
+0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
+0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
+0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
+0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
+050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
+050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
+050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
+0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
+0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
+0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA
+0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA
+0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE
+051A; C; 051B; # CYRILLIC CAPITAL LETTER QA
+051C; C; 051D; # CYRILLIC CAPITAL LETTER WE
+051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA
+0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
+0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
+052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE
+052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE
+052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
+0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
+0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
+0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
+0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
+0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
+0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
+0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
+0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
+0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
+053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
+053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
+053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
+053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
+053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
+053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
+0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
+0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
+0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
+0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
+0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
+0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
+0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
+0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
+0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
+0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
+054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
+054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
+054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
+054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
+054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
+054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
+0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
+0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
+0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
+0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
+0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
+0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
+0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
+0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
+10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
+10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
+10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
+10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
+10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
+10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
+10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
+10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
+10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
+10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
+10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
+10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
+10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
+10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
+10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
+10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
+10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
+10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
+10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
+10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
+10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
+10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
+10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
+10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
+10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
+10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
+10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
+10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
+10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
+10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
+10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
+10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
+10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
+10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
+10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
+10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
+10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
+10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
+10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN
+10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN
+13F8; C; 13F0; # CHEROKEE SMALL LETTER YE
+13F9; C; 13F1; # CHEROKEE SMALL LETTER YI
+13FA; C; 13F2; # CHEROKEE SMALL LETTER YO
+13FB; C; 13F3; # CHEROKEE SMALL LETTER YU
+13FC; C; 13F4; # CHEROKEE SMALL LETTER YV
+13FD; C; 13F5; # CHEROKEE SMALL LETTER MV
+1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE
+1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE
+1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O
+1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES
+1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE
+1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE
+1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
+1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
+1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
+1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN
+1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN
+1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN
+1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON
+1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN
+1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN
+1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN
+1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN
+1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN
+1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN
+1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS
+1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN
+1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR
+1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON
+1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR
+1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR
+1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE
+1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN
+1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR
+1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN
+1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR
+1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR
+1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN
+1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR
+1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN
+1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN
+1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN
+1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL
+1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL
+1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR
+1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN
+1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN
+1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE
+1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE
+1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE
+1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE
+1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR
+1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE
+1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI
+1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN
+1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI
+1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN
+1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN
+1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN
+1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN
+1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
+1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
+1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
+1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
+1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
+1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
+1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
+1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
+1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S
+1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S
+1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
+1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
+1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL
+1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V
+1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP
+1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
+1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; C; 03C9; # OHM SIGN
+212A; C; 006B; # KELVIN SIGN
+212B; C; 00E5; # ANGSTROM SIGN
+2132; C; 214E; # TURNED CAPITAL F
+2160; C; 2170; # ROMAN NUMERAL ONE
+2161; C; 2171; # ROMAN NUMERAL TWO
+2162; C; 2172; # ROMAN NUMERAL THREE
+2163; C; 2173; # ROMAN NUMERAL FOUR
+2164; C; 2174; # ROMAN NUMERAL FIVE
+2165; C; 2175; # ROMAN NUMERAL SIX
+2166; C; 2176; # ROMAN NUMERAL SEVEN
+2167; C; 2177; # ROMAN NUMERAL EIGHT
+2168; C; 2178; # ROMAN NUMERAL NINE
+2169; C; 2179; # ROMAN NUMERAL TEN
+216A; C; 217A; # ROMAN NUMERAL ELEVEN
+216B; C; 217B; # ROMAN NUMERAL TWELVE
+216C; C; 217C; # ROMAN NUMERAL FIFTY
+216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
+216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
+216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
+2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
+24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
+24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
+24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
+24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
+24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
+24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
+24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
+24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
+24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
+24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
+24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
+24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
+24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
+24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
+24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
+24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
+24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
+24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
+24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
+24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
+24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
+24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
+24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
+24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
+24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
+24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
+2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
+2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
+2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
+2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
+2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
+2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
+2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
+2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
+2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
+2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
+2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
+2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
+2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
+2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
+2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
+2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
+2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
+2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
+2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
+2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
+2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
+2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
+2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
+2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
+2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
+2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
+2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
+2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
+2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
+2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
+2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
+2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
+2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
+2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
+2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
+2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
+2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
+2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
+2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
+2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
+2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
+2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
+2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
+2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
+2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
+2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
+2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
+2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
+2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
+2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
+2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
+2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
+2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
+2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA
+2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK
+2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A
+2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA
+2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK
+2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
+2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL
+2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL
+2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
+2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
+2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
+2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
+2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
+2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
+2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
+2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
+2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
+2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
+2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
+2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
+2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
+2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
+2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
+2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
+2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
+2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
+2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
+2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
+2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
+2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
+2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
+2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
+2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
+2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
+2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
+2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
+2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
+2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
+2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
+2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
+2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
+2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
+2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
+2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
+2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
+2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
+2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
+2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
+2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
+2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
+2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
+2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
+2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
+2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI
+A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA
+A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO
+A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE
+A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA
+A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV
+A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK
+A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA
+A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER
+A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT
+A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU
+A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A
+A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS
+A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN
+A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE
+A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE
+A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL
+A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM
+A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O
+A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O
+A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+A680; C; A681; # CYRILLIC CAPITAL LETTER DWE
+A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE
+A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE
+A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE
+A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE
+A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE
+A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE
+A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
+A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
+A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
+A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
+A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O
+A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O
+A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+A726; C; A727; # LATIN CAPITAL LETTER HENG
+A728; C; A729; # LATIN CAPITAL LETTER TZ
+A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO
+A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO
+A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+A732; C; A733; # LATIN CAPITAL LETTER AA
+A734; C; A735; # LATIN CAPITAL LETTER AO
+A736; C; A737; # LATIN CAPITAL LETTER AU
+A738; C; A739; # LATIN CAPITAL LETTER AV
+A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+A73C; C; A73D; # LATIN CAPITAL LETTER AY
+A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT
+A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE
+A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+A746; C; A747; # LATIN CAPITAL LETTER BROKEN L
+A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE
+A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP
+A74E; C; A74F; # LATIN CAPITAL LETTER OO
+A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH
+A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA
+A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA
+A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+A760; C; A761; # LATIN CAPITAL LETTER VY
+A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z
+A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE
+A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+A768; C; A769; # LATIN CAPITAL LETTER VEND
+A76A; C; A76B; # LATIN CAPITAL LETTER ET
+A76C; C; A76D; # LATIN CAPITAL LETTER IS
+A76E; C; A76F; # LATIN CAPITAL LETTER CON
+A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D
+A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F
+A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G
+A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G
+A780; C; A781; # LATIN CAPITAL LETTER TURNED L
+A782; C; A783; # LATIN CAPITAL LETTER INSULAR R
+A784; C; A785; # LATIN CAPITAL LETTER INSULAR S
+A786; C; A787; # LATIN CAPITAL LETTER INSULAR T
+A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
+A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
+A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
+A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
+A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH
+A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE
+A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE
+A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE
+A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE
+A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
+A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
+A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
+A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
+A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I
+A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
+A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
+A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
+A7B3; C; AB53; # LATIN CAPITAL LETTER CHI
+A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA
+A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA
+A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE
+A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A
+A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I
+A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U
+A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O
+A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
+A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
+A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
+A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
+A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
+A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
+A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
+A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
+A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
+AB70; C; 13A0; # CHEROKEE SMALL LETTER A
+AB71; C; 13A1; # CHEROKEE SMALL LETTER E
+AB72; C; 13A2; # CHEROKEE SMALL LETTER I
+AB73; C; 13A3; # CHEROKEE SMALL LETTER O
+AB74; C; 13A4; # CHEROKEE SMALL LETTER U
+AB75; C; 13A5; # CHEROKEE SMALL LETTER V
+AB76; C; 13A6; # CHEROKEE SMALL LETTER GA
+AB77; C; 13A7; # CHEROKEE SMALL LETTER KA
+AB78; C; 13A8; # CHEROKEE SMALL LETTER GE
+AB79; C; 13A9; # CHEROKEE SMALL LETTER GI
+AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO
+AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU
+AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV
+AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA
+AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE
+AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI
+AB80; C; 13B0; # CHEROKEE SMALL LETTER HO
+AB81; C; 13B1; # CHEROKEE SMALL LETTER HU
+AB82; C; 13B2; # CHEROKEE SMALL LETTER HV
+AB83; C; 13B3; # CHEROKEE SMALL LETTER LA
+AB84; C; 13B4; # CHEROKEE SMALL LETTER LE
+AB85; C; 13B5; # CHEROKEE SMALL LETTER LI
+AB86; C; 13B6; # CHEROKEE SMALL LETTER LO
+AB87; C; 13B7; # CHEROKEE SMALL LETTER LU
+AB88; C; 13B8; # CHEROKEE SMALL LETTER LV
+AB89; C; 13B9; # CHEROKEE SMALL LETTER MA
+AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME
+AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI
+AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO
+AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU
+AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA
+AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA
+AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH
+AB91; C; 13C1; # CHEROKEE SMALL LETTER NE
+AB92; C; 13C2; # CHEROKEE SMALL LETTER NI
+AB93; C; 13C3; # CHEROKEE SMALL LETTER NO
+AB94; C; 13C4; # CHEROKEE SMALL LETTER NU
+AB95; C; 13C5; # CHEROKEE SMALL LETTER NV
+AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA
+AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE
+AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI
+AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO
+AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU
+AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV
+AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA
+AB9D; C; 13CD; # CHEROKEE SMALL LETTER S
+AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE
+AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI
+ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO
+ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU
+ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV
+ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA
+ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA
+ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE
+ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE
+ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI
+ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI
+ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO
+ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU
+ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV
+ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA
+ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA
+ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE
+ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI
+ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO
+ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU
+ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV
+ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA
+ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE
+ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI
+ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO
+ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU
+ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV
+ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA
+ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE
+ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI
+ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO
+ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU
+ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV
+ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA
+FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
+FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
+FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
+FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
+FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
+FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
+FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
+FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
+FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
+FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
+FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
+FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
+FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
+FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
+FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
+FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
+FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
+FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
+FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
+FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
+FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
+FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
+FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
+FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
+FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
+FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
+10400; C; 10428; # DESERET CAPITAL LETTER LONG I
+10401; C; 10429; # DESERET CAPITAL LETTER LONG E
+10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
+10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
+10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
+10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
+10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
+10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
+10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
+10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
+1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
+1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
+1040C; C; 10434; # DESERET CAPITAL LETTER AY
+1040D; C; 10435; # DESERET CAPITAL LETTER OW
+1040E; C; 10436; # DESERET CAPITAL LETTER WU
+1040F; C; 10437; # DESERET CAPITAL LETTER YEE
+10410; C; 10438; # DESERET CAPITAL LETTER H
+10411; C; 10439; # DESERET CAPITAL LETTER PEE
+10412; C; 1043A; # DESERET CAPITAL LETTER BEE
+10413; C; 1043B; # DESERET CAPITAL LETTER TEE
+10414; C; 1043C; # DESERET CAPITAL LETTER DEE
+10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
+10416; C; 1043E; # DESERET CAPITAL LETTER JEE
+10417; C; 1043F; # DESERET CAPITAL LETTER KAY
+10418; C; 10440; # DESERET CAPITAL LETTER GAY
+10419; C; 10441; # DESERET CAPITAL LETTER EF
+1041A; C; 10442; # DESERET CAPITAL LETTER VEE
+1041B; C; 10443; # DESERET CAPITAL LETTER ETH
+1041C; C; 10444; # DESERET CAPITAL LETTER THEE
+1041D; C; 10445; # DESERET CAPITAL LETTER ES
+1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
+1041F; C; 10447; # DESERET CAPITAL LETTER ESH
+10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
+10421; C; 10449; # DESERET CAPITAL LETTER ER
+10422; C; 1044A; # DESERET CAPITAL LETTER EL
+10423; C; 1044B; # DESERET CAPITAL LETTER EM
+10424; C; 1044C; # DESERET CAPITAL LETTER EN
+10425; C; 1044D; # DESERET CAPITAL LETTER ENG
+10426; C; 1044E; # DESERET CAPITAL LETTER OI
+10427; C; 1044F; # DESERET CAPITAL LETTER EW
+104B0; C; 104D8; # OSAGE CAPITAL LETTER A
+104B1; C; 104D9; # OSAGE CAPITAL LETTER AI
+104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN
+104B3; C; 104DB; # OSAGE CAPITAL LETTER AH
+104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA
+104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA
+104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA
+104B7; C; 104DF; # OSAGE CAPITAL LETTER E
+104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN
+104B9; C; 104E1; # OSAGE CAPITAL LETTER HA
+104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA
+104BB; C; 104E3; # OSAGE CAPITAL LETTER I
+104BC; C; 104E4; # OSAGE CAPITAL LETTER KA
+104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA
+104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA
+104BF; C; 104E7; # OSAGE CAPITAL LETTER LA
+104C0; C; 104E8; # OSAGE CAPITAL LETTER MA
+104C1; C; 104E9; # OSAGE CAPITAL LETTER NA
+104C2; C; 104EA; # OSAGE CAPITAL LETTER O
+104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN
+104C4; C; 104EC; # OSAGE CAPITAL LETTER PA
+104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA
+104C6; C; 104EE; # OSAGE CAPITAL LETTER SA
+104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA
+104C8; C; 104F0; # OSAGE CAPITAL LETTER TA
+104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA
+104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA
+104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA
+104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA
+104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA
+104CE; C; 104F6; # OSAGE CAPITAL LETTER U
+104CF; C; 104F7; # OSAGE CAPITAL LETTER WA
+104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA
+104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
+104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
+104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
+10570; C; 10597; # VITHKUQI CAPITAL LETTER A
+10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE
+10572; C; 10599; # VITHKUQI CAPITAL LETTER BE
+10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE
+10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE
+10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE
+10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE
+10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI
+10578; C; 1059F; # VITHKUQI CAPITAL LETTER E
+10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE
+1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA
+1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA
+1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA
+1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I
+1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE
+10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE
+10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA
+10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA
+10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA
+10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME
+10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE
+10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE
+10587; C; 105AE; # VITHKUQI CAPITAL LETTER O
+10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE
+10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA
+1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE
+1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE
+1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE
+1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE
+1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE
+10590; C; 105B7; # VITHKUQI CAPITAL LETTER U
+10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE
+10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE
+10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y
+10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE
+10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
+10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
+10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
+10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB
+10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC
+10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC
+10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS
+10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED
+10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND
+10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E
+10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E
+10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE
+10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF
+10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG
+10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY
+10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH
+10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I
+10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II
+10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ
+10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK
+10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK
+10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK
+10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL
+10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY
+10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM
+10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN
+10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY
+10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O
+10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO
+10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE
+10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE
+10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE
+10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP
+10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP
+10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER
+10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER
+10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES
+10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ
+10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET
+10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT
+10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY
+10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH
+10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U
+10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU
+10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE
+10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE
+10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV
+10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ
+10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS
+10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN
+10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US
+118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
+118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
+118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
+118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU
+118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA
+118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO
+118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II
+118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU
+118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E
+118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O
+118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG
+118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA
+118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO
+118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY
+118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ
+118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC
+118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN
+118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD
+118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE
+118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG
+118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA
+118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT
+118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM
+118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU
+118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU
+118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO
+118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO
+118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR
+118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR
+118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
+118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
+118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
+16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M
+16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S
+16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V
+16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W
+16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU
+16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z
+16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP
+16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P
+16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T
+16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G
+16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F
+16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I
+16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K
+16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A
+16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J
+16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E
+16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B
+16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C
+16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U
+16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU
+16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L
+16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q
+16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP
+16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY
+16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X
+16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D
+16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE
+16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N
+16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R
+16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
+16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
+16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
+1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
+1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
+1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
+1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM
+1E904; C; 1E926; # ADLAM CAPITAL LETTER BA
+1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE
+1E906; C; 1E928; # ADLAM CAPITAL LETTER PE
+1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE
+1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA
+1E909; C; 1E92B; # ADLAM CAPITAL LETTER E
+1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA
+1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I
+1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O
+1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA
+1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE
+1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW
+1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN
+1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF
+1E912; C; 1E934; # ADLAM CAPITAL LETTER YA
+1E913; C; 1E935; # ADLAM CAPITAL LETTER U
+1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM
+1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI
+1E916; C; 1E938; # ADLAM CAPITAL LETTER HA
+1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF
+1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA
+1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA
+1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU
+1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA
+1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA
+1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA
+1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE
+1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL
+1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
+1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
+'''
+_map = dict(
+    (unichr(int(from_, 16)), ''.join((unichr(int(v, 16)) for v in to_.split(' '))))
+    for from_, type_, to_, _ in (
+        l.split('; ', 3) for l in _map_str.splitlines() if l)
+    if type_ in ('C', 'F'))
+del _map_str
+
+def casefold(s):
+    assert isinstance(s, compat_str)
+    return ''.join((_map.get(c, c) for c in s))
+
+__all__ = [
+    casefold
+]

From f102e3dc4efe27e6c8697d6d117f05d1bb6d1b91 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Oct 2022 21:27:14 +0000
Subject: [PATCH 52/78] [compat] Add compat_casefold and compat_re_Match, for
 traverse_obj() port

---
 youtube_dl/compat.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index eca6d63de..4b5e1f6ed 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2375,8 +2375,10 @@ except ImportError:
 
 try:
     compat_str = unicode  # Python 2
+    from .casefold import casefold as compat_casefold
 except NameError:
     compat_str = str
+    compat_casefold = lambda s: s.casefold()
 
 try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
@@ -3066,6 +3068,9 @@ except ImportError:
 
 # Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
 compat_re_Pattern = type(re.compile(''))
+# and on the type of a match
+compat_re_Match = type(re.match('a', 'a'))
+
 
 if sys.version_info < (3, 3):
     def compat_b64decode(s, *args, **kwargs):
@@ -3101,6 +3106,7 @@ __all__ = [
     'compat_Struct',
     'compat_b64decode',
     'compat_basestring',
+    'compat_casefold',
     'compat_chr',
     'compat_collections_abc',
     'compat_collections_chain_map',
@@ -3132,6 +3138,7 @@ __all__ = [
     'compat_os_name',
     'compat_parse_qs',
     'compat_print',
+    'compat_re_Match',
     'compat_re_Pattern',
     'compat_realpath',
     'compat_setenv',

From b7c25959f0f76aad4ee24e254f82e6c5cca2c1ff Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Nov 2022 12:40:23 +0000
Subject: [PATCH 53/78] [compat] Unify unicode/str compat and move up

---
 youtube_dl/compat.py | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 4b5e1f6ed..28942a8c1 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -21,6 +21,19 @@ import subprocess
 import sys
 import xml.etree.ElementTree
 
+# deal with critical unicode/str things first
+try:
+    # Python 2
+    compat_str, compat_basestring, compat_chr = (
+        unicode, basestring, unichr
+    )
+    from .casefold import casefold as compat_casefold
+except NameError:
+    compat_str, compat_basestring, compat_chr = (
+        str, str, chr
+    )
+    compat_casefold = lambda s: s.casefold()
+
 try:
     import collections.abc as compat_collections_abc
 except ImportError:
@@ -2373,13 +2386,6 @@ try:
 except ImportError:
     import BaseHTTPServer as compat_http_server
 
-try:
-    compat_str = unicode  # Python 2
-    from .casefold import casefold as compat_casefold
-except NameError:
-    compat_str = str
-    compat_casefold = lambda s: s.casefold()
-
 try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
     from urllib.parse import unquote as compat_urllib_parse_unquote
@@ -2510,22 +2516,11 @@ except ImportError:  # Python < 3.4
 
             return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
 
-try:
-    compat_basestring = basestring  # Python 2
-except NameError:
-    compat_basestring = str
-
-try:
-    compat_chr = unichr  # Python 2
-except NameError:
-    compat_chr = chr
-
 try:
     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 
-
 etree = xml.etree.ElementTree
 
 

From a874871801b8b05d06e8ffe52bed94fdfc26611e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Nov 2022 19:25:59 +0000
Subject: [PATCH 54/78] [compat] Reformat casefold.py for easier updating

---
 youtube_dl/casefold.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
index 546269a3c..7e91c3811 100644
--- a/youtube_dl/casefold.py
+++ b/youtube_dl/casefold.py
@@ -1,8 +1,20 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .compat import compat_str
+from .compat import (
+    compat_str,
+    compat_chr,
+)
 
+# Below is included the text of icu/CaseFolding.txt retrieved from
+# https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/CaseFolding.txt
+# In case newly foldable Unicode characters are defined, paste the new version
+# of the text inside the ''' marks.
+# The text is expected to have only blank lines andlines with 1st character #,
+# all ignored, and fold definitions like this:
+# `from_hex_code; space_separated_to_hex_code_list; comment`
+
+_map_str = '''
 # CaseFolding-15.0.0.txt
 # Date: 2022-02-02, 23:35:35 GMT
 # © 2022 Unicode®, Inc.
@@ -65,7 +77,6 @@ from .compat import compat_str
 #  have the value C for the status field, and the code point itself for the mapping field.
 
 # =================================================================
-_map_str = '''
 0041; C; 0061; # LATIN CAPITAL LETTER A
 0042; C; 0062; # LATIN CAPITAL LETTER B
 0043; C; 0063; # LATIN CAPITAL LETTER C
@@ -1627,17 +1638,22 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
 1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
 1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
 '''
+
+_parse_unichr = lambda s: compat_chr(int(s, 16))
+
 _map = dict(
-    (unichr(int(from_, 16)), ''.join((unichr(int(v, 16)) for v in to_.split(' '))))
+    (_parse_unichr(from_), ''.join(map(_parse_unichr, to_.split(' '))))
     for from_, type_, to_, _ in (
-        l.split('; ', 3) for l in _map_str.splitlines() if l)
+        l.split('; ', 3) for l in _map_str.splitlines() if l and not l[0] == '#')
     if type_ in ('C', 'F'))
 del _map_str
 
+
 def casefold(s):
     assert isinstance(s, compat_str)
     return ''.join((_map.get(c, c) for c in s))
 
+
 __all__ = [
     casefold
 ]

From 65ccb0dd4eb52cced7d0e11af021c09dbe2aed4a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Nov 2022 21:33:39 +0000
Subject: [PATCH 55/78] [compat] Add test for compat_casefold()

---
 test/test_compat.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 86ff389fd..05995372a 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -118,9 +118,21 @@ class TestCompat(unittest.TestCase):
 <smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
         compat_etree_fromstring(xml)
 
-    def test_struct_unpack(self):
+    def test_compat_struct_unpack(self):
         self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
 
+    def test_compat_casefold(self):
+        if hasattr(compat_str, 'casefold'):
+            # don't bother to test str.casefold() (again)
+            return
+        # thanks https://bugs.python.org/file24232/casefolding.patch
+        self.assertEqual(compat_casefold('hello'), 'hello')
+        self.assertEqual(compat_casefold('hELlo'), 'hello')
+        self.assertEqual(compat_casefold('ß'), 'ss')
+        self.assertEqual(compat_casefold('ﬁ'), 'fi')
+        self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
+        self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
+
 
 if __name__ == '__main__':
     unittest.main()

From 087ddc237132103859cc00183d8d70bd75c0e44e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Nov 2022 22:47:02 +0000
Subject: [PATCH 56/78] [compat] Add test for compat_casefold()

---
 test/test_compat.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_compat.py b/test/test_compat.py
index 05995372a..0986cff37 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
 from youtube_dl.compat import (
+    compat_casefold,
     compat_getenv,
     compat_setenv,
     compat_etree_Element,

From c4b19a88169fa76c5eb665d274e7270a0fe452c4 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 2 Nov 2022 11:56:26 +0000
Subject: [PATCH 57/78] [compat] Work around in case folding for narrow Python
 build

Resolves #31324.
---
 youtube_dl/casefold.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
index 7e91c3811..748c2d491 100644
--- a/youtube_dl/casefold.py
+++ b/youtube_dl/casefold.py
@@ -1639,7 +1639,15 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
 1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
 '''
 
-_parse_unichr = lambda s: compat_chr(int(s, 16))
+
+def _parse_unichr(s):
+    s = int(s, 16)
+    try:
+        return compat_chr(s)
+    except ValueError:
+        # work around "unichr() arg not in range(0x10000) (narrow Python build)"
+        return ('\\U%08x' % s).decode('unicode-escape')
+
 
 _map = dict(
     (_parse_unichr(from_), ''.join(map(_parse_unichr, to_.split(' '))))

From 27ed77aabba8c9eb08d66f34092b1bfcc22c482e Mon Sep 17 00:00:00 2001
From: Andrei Lebedev <lebdron@gmail.com>
Date: Thu, 3 Nov 2022 11:09:37 +0100
Subject: [PATCH 58/78] [utils] Backport traverse_obj (etc) from yt-dlp
 (#31156)

* Backport traverse_obj and closely related function from yt-dlp (code by pukkandan)
* Backport LazyList, variadic(), try_call (code by pukkandan)
* Recast using yt-dlp's newer traverse_obj() implementation and tests (code by grub4k)
* Add tests for Unicode case folding support matching Py3.5+ (requires f102e3d)
* Improve/add tests for variadic, try_call, join_nonempty

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 test/test_utils.py  | 323 +++++++++++++++++++++++++++++++++++++++++
 youtube_dl/utils.py | 339 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 662 insertions(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index f1a748dde..9d364c863 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -12,7 +12,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 # Various small unit tests
 import io
+import itertools
 import json
+import re
 import xml.etree.ElementTree
 
 from youtube_dl.utils import (
@@ -40,11 +42,14 @@ from youtube_dl.utils import (
     get_element_by_attribute,
     get_elements_by_class,
     get_elements_by_attribute,
+    get_first,
     InAdvancePagedList,
     int_or_none,
     intlist_to_bytes,
     is_html,
+    join_nonempty,
     js_to_json,
+    LazyList,
     limit_length,
     merge_dicts,
     mimetype2ext,
@@ -79,6 +84,8 @@ from youtube_dl.utils import (
     strip_or_none,
     subtitles_filename,
     timeconvert,
+    traverse_obj,
+    try_call,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
@@ -92,6 +99,7 @@ from youtube_dl.utils import (
     urlencode_postdata,
     urshift,
     update_url_query,
+    variadic,
     version_tuple,
     xpath_with_ns,
     xpath_element,
@@ -112,12 +120,18 @@ from youtube_dl.compat import (
     compat_getenv,
     compat_os_name,
     compat_setenv,
+    compat_str,
     compat_urlparse,
     compat_parse_qs,
 )
 
 
 class TestUtil(unittest.TestCase):
+
+    # yt-dlp shim
+    def assertCountEqual(self, expected, got, msg='count should be the same'):
+        return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
+
     def test_timeconvert(self):
         self.assertTrue(timeconvert('') is None)
         self.assertTrue(timeconvert('bougrg') is None)
@@ -1478,6 +1492,315 @@ Line 1
         self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
         self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
 
+    def test_LazyList(self):
+        it = list(range(10))
+
+        self.assertEqual(list(LazyList(it)), it)
+        self.assertEqual(LazyList(it).exhaust(), it)
+        self.assertEqual(LazyList(it)[5], it[5])
+
+        self.assertEqual(LazyList(it)[5:], it[5:])
+        self.assertEqual(LazyList(it)[:5], it[:5])
+        self.assertEqual(LazyList(it)[::2], it[::2])
+        self.assertEqual(LazyList(it)[1::2], it[1::2])
+        self.assertEqual(LazyList(it)[5::-1], it[5::-1])
+        self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
+        self.assertEqual(LazyList(it)[::-1], it[::-1])
+
+        self.assertTrue(LazyList(it))
+        self.assertFalse(LazyList(range(0)))
+        self.assertEqual(len(LazyList(it)), len(it))
+        self.assertEqual(repr(LazyList(it)), repr(it))
+        self.assertEqual(compat_str(LazyList(it)), compat_str(it))
+
+        self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
+        self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
+        self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
+
+    def test_LazyList_laziness(self):
+
+        def test(ll, idx, val, cache):
+            self.assertEqual(ll[idx], val)
+            self.assertEqual(ll._cache, list(cache))
+
+        ll = LazyList(range(10))
+        test(ll, 0, 0, range(1))
+        test(ll, 5, 5, range(6))
+        test(ll, -3, 7, range(10))
+
+        ll = LazyList(range(10), reverse=True)
+        test(ll, -1, 0, range(1))
+        test(ll, 3, 6, range(10))
+
+        ll = LazyList(itertools.count())
+        test(ll, 10, 10, range(11))
+        ll = reversed(ll)
+        test(ll, -15, 14, range(15))
+
+    def test_try_call(self):
+        def total(*x, **kwargs):
+            return sum(x) + sum(kwargs.values())
+
+        self.assertEqual(try_call(None), None,
+                         msg='not a fn should give None')
+        self.assertEqual(try_call(lambda: 1), 1,
+                         msg='int fn with no expected_type should give int')
+        self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
+                         msg='int fn with expected_type int should give int')
+        self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+                         msg='int fn with wrong expected_type should give None')
+        self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
+                         msg='fn should accept arglist')
+        self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
+                         msg='fn should accept kwargs')
+        self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+                         msg='int fn with no expected_type should give None')
+        self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
+                         msg='expect first int result with expected_type int')
+
+    def test_variadic(self):
+        self.assertEqual(variadic(None), (None, ))
+        self.assertEqual(variadic('spam'), ('spam', ))
+        self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
+
+    def test_traverse_obj(self):
+        _TEST_DATA = {
+            100: 100,
+            1.2: 1.2,
+            'str': 'str',
+            'None': None,
+            '...': Ellipsis,
+            'urls': [
+                {'index': 0, 'url': 'https://www.example.com/0'},
+                {'index': 1, 'url': 'https://www.example.com/1'},
+            ],
+            'data': (
+                {'index': 2},
+                {'index': 3},
+            ),
+            'dict': {},
+        }
+
+        # Test base functionality
+        self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
+                         msg='allow tuple path')
+        self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
+                         msg='allow list path')
+        self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
+                         msg='allow iterable path')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
+                         msg='single items should be treated as a path')
+        self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
+        self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
+        self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
+
+        # Test Ellipsis behavior
+        self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
+                              (item for item in _TEST_DATA.values() if item is not None),
+                              msg='`...` should give all values except `None`')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
+                              msg='`...` selection for dicts should select all values')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
+                         ['https://www.example.com/0', 'https://www.example.com/1'],
+                         msg='nested `...` queries should work')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
+                              msg='`...` query result should be flattened')
+
+        # Test function as key
+        self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
+                         [_TEST_DATA['urls']],
+                         msg='function as query key should perform a filter based on (key, value)')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
+                              msg='exceptions in the query function should be caught')
+
+        # Test alternative paths
+        self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
+                         msg='multiple `paths` should be treated as alternative paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
+                         msg='alternatives should exit early')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
+                         msg='alternatives should return `default` if exhausted')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
+                         msg='alternatives should track their own branching return')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
+                         msg='alternatives on empty objects should search further')
+
+        # Test branch and path nesting
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
+                         msg='tuple as key should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
+                         msg='list as key should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
+                         msg='double nesting in path should be treated as paths')
+        self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
+                         msg='do not fail early on branching')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
+                              ['https://www.example.com/0', 'https://www.example.com/1'],
+                              msg='triple nesting in path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
+                         ['https://www.example.com/0', 'https://www.example.com/1'],
+                         msg='ellipsis as branch path start gets flattened')
+
+        # Test dictionary as key
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
+                         msg='dict key should result in a dict with the same keys')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
+                         {0: 'https://www.example.com/0'},
+                         msg='dict key should allow paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
+                         {0: ['https://www.example.com/0']},
+                         msg='tuple in dict path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
+                         {0: ['https://www.example.com/0']},
+                         msg='double nesting in dict path should be treated as paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
+                         {0: ['https://www.example.com/1', 'https://www.example.com/0']},
+                         msg='triple nesting in dict path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
+                         msg='remove `None` values when dict key')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
+                         msg='do not remove `None` values if `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
+                         msg='do not remove empty values when dict key')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
+                         msg='do not remove empty values when dict key and a default')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
+                         msg='if branch in dict key not successful, return `[]`')
+
+        # Testing default parameter behavior
+        _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
+                         msg='default value should be `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
+                         msg='chained fails should result in default')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
+                         msg='should not short cirquit on `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
+                         msg='invalid dict key should result in `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
+                         msg='`None` is a deliberate sentinel and should become `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
+                         msg='`IndexError` should result in `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
+                         msg='if branched but not successful return `default` if defined, not `[]`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
+                         msg='if branched but not successful return `default` even if `default` is `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
+                         msg='if branched but not successful return `[]`, not `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
+                         msg='if branched but object is empty return `[]`, not `default`')
+
+        # Testing expected_type behavior
+        _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
+                         msg='accept matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
+                         msg='reject non matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
+                         msg='transform type using type function')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
+                                      expected_type=lambda _: 1 / 0), None,
+                         msg='wrap expected_type function in try_call')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
+                         msg='eliminate items that expected_type fails on')
+
+        # Test get_all behavior
+        _GET_ALL_DATA = {'key': [0, 1, 2]}
+        self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
+                         msg='if not `get_all`, return only first matching value')
+        self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
+                         msg='do not overflatten if not `get_all`')
+
+        # Test casesense behavior
+        _CASESENSE_DATA = {
+            'KeY': 'value0',
+            0: {
+                'KeY': 'value1',
+                0: {'KeY': 'value2'},
+            },
+            # FULLWIDTH LATIN CAPITAL LETTER K
+            '\uff2bey': 'value3',
+        }
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
+                         msg='dict keys should be case sensitive unless `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
+                                      casesense=False), 'value0',
+                         msg='allow non matching key case if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey',  # FULLWIDTH LATIN SMALL LETTER K
+                                      casesense=False), 'value3',
+                         msg='allow non matching Unicode key case if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
+                                      casesense=False), ['value1'],
+                         msg='allow non matching key case in branch if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
+                                      casesense=False), ['value2'],
+                         msg='allow non matching key case in branch path if `casesense`')
+
+        # Test traverse_string behavior
+        _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
+                         msg='do not traverse into string if not `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
+                                      _traverse_string=True), 's',
+                         msg='traverse into string if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
+                                      _traverse_string=True), '.',
+                         msg='traverse into converted data if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
+                                      _traverse_string=True), list('str'),
+                         msg='`...` branching into string should result in list')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
+                                      _traverse_string=True), ['s', 'r'],
+                         msg='branching into string should result in list')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
+                                      _traverse_string=True), list('str'),
+                         msg='function branching into string should result in list')
+
+        # Test is_user_input behavior
+        _IS_USER_INPUT_DATA = {'range8': list(range(8))}
+        self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
+                                      _is_user_input=True), 3,
+                         msg='allow for string indexing if `is_user_input`')
+        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
+                                           _is_user_input=True), tuple(range(8))[3:],
+                              msg='allow for string slice if `is_user_input`')
+        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
+                                           _is_user_input=True), tuple(range(8))[:4:2],
+                              msg='allow step in string slice if `is_user_input`')
+        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
+                                           _is_user_input=True), range(8),
+                              msg='`:` should be treated as `...` if `is_user_input`')
+        with self.assertRaises(TypeError, msg='too many params should result in error'):
+            traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
+
+        # Test re.Match as input obj
+        mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
+        self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
+                         msg='`...` on a `re.Match` should give its `groups()`')
+        self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
+                         msg='function on a `re.Match` should give groupno, value starting at 0')
+        self.assertEqual(traverse_obj(mobj, 'group'), '3',
+                         msg='str key on a `re.Match` should give group with that name')
+        self.assertEqual(traverse_obj(mobj, 2), '3',
+                         msg='int key on a `re.Match` should give group with that name')
+        self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
+                         msg='str key on a `re.Match` should respect casesense')
+        self.assertEqual(traverse_obj(mobj, 'fail'), None,
+                         msg='failing str key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
+                         msg='failing str key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, 8), None,
+                         msg='failing int key on a `re.Match` should return `default`')
+
+    def test_get_first(self):
+        self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
+
+    def test_join_nonempty(self):
+        self.assertEqual(join_nonempty('a', 'b'), 'a-b')
+        self.assertEqual(join_nonempty(
+            'a', 'b', 'c', 'd',
+            from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 23a65a81c..e3c3ccff9 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -43,6 +43,7 @@ from .compat import (
     compat_HTTPError,
     compat_basestring,
     compat_chr,
+    compat_collections_abc,
     compat_cookiejar,
     compat_ctypes_WINFUNCTYPE,
     compat_etree_fromstring,
@@ -1685,6 +1686,7 @@ USER_AGENTS = {
 
 
 NO_DEFAULT = object()
+IDENTITY = lambda x: x
 
 ENGLISH_MONTH_NAMES = [
     'January', 'February', 'March', 'April', 'May', 'June',
@@ -3867,6 +3869,105 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
         return unrecognized
 
 
+class LazyList(compat_collections_abc.Sequence):
+    """Lazy immutable list from an iterable
+    Note that slices of a LazyList are lists and not LazyList"""
+
+    class IndexError(IndexError):
+        def __init__(self, cause=None):
+            if cause:
+                # reproduce `raise from`
+                self.__cause__ = cause
+            super(IndexError, self).__init__()
+
+    def __init__(self, iterable, **kwargs):
+        # kwarg-only
+        reverse = kwargs.get('reverse', False)
+        _cache = kwargs.get('_cache')
+
+        self._iterable = iter(iterable)
+        self._cache = [] if _cache is None else _cache
+        self._reversed = reverse
+
+    def __iter__(self):
+        if self._reversed:
+            # We need to consume the entire iterable to iterate in reverse
+            for item in self.exhaust():
+                yield item
+            return
+        for item in self._cache:
+            yield item
+        for item in self._iterable:
+            self._cache.append(item)
+            yield item
+
+    def _exhaust(self):
+        self._cache.extend(self._iterable)
+        self._iterable = []  # Discard the emptied iterable to make it pickle-able
+        return self._cache
+
+    def exhaust(self):
+        """Evaluate the entire iterable"""
+        return self._exhaust()[::-1 if self._reversed else 1]
+
+    @staticmethod
+    def _reverse_index(x):
+        return None if x is None else ~x
+
+    def __getitem__(self, idx):
+        if isinstance(idx, slice):
+            if self._reversed:
+                idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
+            start, stop, step = idx.start, idx.stop, idx.step or 1
+        elif isinstance(idx, int):
+            if self._reversed:
+                idx = self._reverse_index(idx)
+            start, stop, step = idx, idx, 0
+        else:
+            raise TypeError('indices must be integers or slices')
+        if ((start or 0) < 0 or (stop or 0) < 0
+                or (start is None and step < 0)
+                or (stop is None and step > 0)):
+            # We need to consume the entire iterable to be able to slice from the end
+            # Obviously, never use this with infinite iterables
+            self._exhaust()
+            try:
+                return self._cache[idx]
+            except IndexError as e:
+                raise self.IndexError(e)
+        n = max(start or 0, stop or 0) - len(self._cache) + 1
+        if n > 0:
+            self._cache.extend(itertools.islice(self._iterable, n))
+        try:
+            return self._cache[idx]
+        except IndexError as e:
+            raise self.IndexError(e)
+
+    def __bool__(self):
+        try:
+            self[-1] if self._reversed else self[0]
+        except self.IndexError:
+            return False
+        return True
+
+    def __len__(self):
+        self._exhaust()
+        return len(self._cache)
+
+    def __reversed__(self):
+        return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
+
+    def __copy__(self):
+        return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
+
+    def __repr__(self):
+        # repr and str should mimic a list. So we exhaust the iterable
+        return repr(self.exhaust())
+
+    def __str__(self):
+        return repr(self.exhaust())
+
+
 class PagedList(object):
     def __len__(self):
         # This is only useful for tests
@@ -4092,6 +4193,10 @@ def multipart_encode(data, boundary=None):
     return out, content_type
 
 
+def variadic(x, allowed_types=(compat_str, bytes, dict)):
+    return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+
+
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
     if isinstance(key_or_keys, (list, tuple)):
         for key in key_or_keys:
@@ -4102,6 +4207,23 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
     return d.get(key_or_keys, default)
 
 
+def try_call(*funcs, **kwargs):
+
+    # parameter defaults
+    expected_type = kwargs.get('expected_type')
+    fargs = kwargs.get('args', [])
+    fkwargs = kwargs.get('kwargs', {})
+
+    for f in funcs:
+        try:
+            val = f(*fargs, **fkwargs)
+        except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
+            pass
+        else:
+            if expected_type is None or isinstance(val, expected_type):
+                return val
+
+
 def try_get(src, getter, expected_type=None):
     if not isinstance(getter, (list, tuple)):
         getter = [getter]
@@ -5835,3 +5957,220 @@ def clean_podcast_url(url):
                 st\.fm # https://podsights.com/docs/
             )/e
         )/''', '', url)
+
+
+def traverse_obj(obj, *paths, **kwargs):
+    """
+    Safely traverse nested `dict`s and `Sequence`s
+
+    >>> obj = [{}, {"key": "value"}]
+    >>> traverse_obj(obj, (1, "key"))
+    "value"
+
+    Each of the provided `paths` is tested and the first producing a valid result will be returned.
+    The next path will also be tested if the path branched but no results could be found.
+    Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
+    A value of None is treated as the absence of a value.
+
+    The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
+
+    The keys in the path can be one of:
+        - `None`:           Return the current object.
+        - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
+        - `slice`:          Branch out and return all values in `obj[key]`.
+        - `Ellipsis`:       Branch out and return a list of all values.
+        - `tuple`/`list`:   Branch out and return a list of all matching values.
+                            Read as: `[traverse_obj(obj, branch) for branch in branches]`.
+        - `function`:       Branch out and return values filtered by the function.
+                            Read as: `[value for key, value in obj if function(key, value)]`.
+                            For `Sequence`s, `key` is the index of the value.
+        - `dict`            Transform the current object and return a matching dict.
+                            Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
+
+        `tuple`, `list`, and `dict` all support nested paths and branches.
+
+    @params paths           Paths which to traverse by.
+    Keyword arguments:
+    @param default          Value to return if the paths do not match.
+    @param expected_type    If a `type`, only accept final values of this type.
+                            If any other callable, try to call the function on each result.
+    @param get_all          If `False`, return the first matching result, otherwise all matching ones.
+    @param casesense        If `False`, consider string dictionary keys as case insensitive.
+
+    The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
+
+    @param _is_user_input    Whether the keys are generated from user input.
+                            If `True` strings get converted to `int`/`slice` if needed.
+    @param _traverse_string  Whether to traverse into objects as strings.
+                            If `True`, any non-compatible object will first be
+                            converted into a string and then traversed into.
+
+
+    @returns                The result of the object traversal.
+                            If successful, `get_all=True`, and the path branches at least once,
+                            then a list of results is returned instead.
+                            A list is always returned if the last path branches and no `default` is given.
+    """
+
+    # parameter defaults
+    default = kwargs.get('default', NO_DEFAULT)
+    expected_type = kwargs.get('expected_type')
+    get_all = kwargs.get('get_all', True)
+    casesense = kwargs.get('casesense', True)
+    _is_user_input = kwargs.get('_is_user_input', False)
+    _traverse_string = kwargs.get('_traverse_string', False)
+
+    # instant compat
+    str = compat_str
+
+    is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
+    # stand-in until compat_re_Match is added
+    compat_re_Match = type(re.match('a', 'a'))
+    # stand-in until casefold.py is added
+    try:
+        ''.casefold()
+        compat_casefold = lambda s: s.casefold()
+    except AttributeError:
+        compat_casefold = lambda s: s.lower()
+    casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
+
+    if isinstance(expected_type, type):
+        type_test = lambda val: val if isinstance(val, expected_type) else None
+    else:
+        type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
+
+    def from_iterable(iterables):
+        # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
+        for it in iterables:
+            for item in it:
+                yield item
+
+    def apply_key(key, obj):
+        if obj is None:
+            return
+
+        elif key is None:
+            yield obj
+
+        elif isinstance(key, (list, tuple)):
+            for branch in key:
+                _, result = apply_path(obj, branch)
+                for item in result:
+                    yield item
+
+        elif key is Ellipsis:
+            result = []
+            if isinstance(obj, compat_collections_abc.Mapping):
+                result = obj.values()
+            elif is_sequence(obj):
+                result = obj
+            elif isinstance(obj, compat_re_Match):
+                result = obj.groups()
+            elif _traverse_string:
+                result = str(obj)
+            for item in result:
+                yield item
+
+        elif callable(key):
+            if is_sequence(obj):
+                iter_obj = enumerate(obj)
+            elif isinstance(obj, compat_collections_abc.Mapping):
+                iter_obj = obj.items()
+            elif isinstance(obj, compat_re_Match):
+                iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
+            elif _traverse_string:
+                iter_obj = enumerate(str(obj))
+            else:
+                return
+            for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
+                yield item
+
+        elif isinstance(key, dict):
+            iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
+            yield dict((k, v if v is not None else default) for k, v in iter_obj
+                       if v is not None or default is not NO_DEFAULT)
+
+        elif isinstance(obj, compat_collections_abc.Mapping):
+            yield (obj.get(key) if casesense or (key in obj)
+                   else next((v for k, v in obj.items() if casefold(k) == key), None))
+
+        elif isinstance(obj, compat_re_Match):
+            if isinstance(key, int) or casesense:
+                try:
+                    yield obj.group(key)
+                    return
+                except IndexError:
+                    pass
+            if not isinstance(key, str):
+                return
+
+            yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
+
+        else:
+            if _is_user_input:
+                key = (int_or_none(key) if ':' not in key
+                       else slice(*map(int_or_none, key.split(':'))))
+
+            if not isinstance(key, (int, slice)):
+                return
+
+            if not is_sequence(obj):
+                if not _traverse_string:
+                    return
+                obj = str(obj)
+
+            try:
+                yield obj[key]
+            except IndexError:
+                pass
+
+    def apply_path(start_obj, path):
+        objs = (start_obj,)
+        has_branched = False
+
+        for key in variadic(path):
+            if _is_user_input and key == ':':
+                key = Ellipsis
+
+            if not casesense and isinstance(key, str):
+                key = compat_casefold(key)
+
+            if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
+                has_branched = True
+
+            key_func = functools.partial(apply_key, key)
+            objs = from_iterable(map(key_func, objs))
+
+        return has_branched, objs
+
+    def _traverse_obj(obj, path, use_list=True):
+        has_branched, results = apply_path(obj, path)
+        results = LazyList(x for x in map(type_test, results) if x is not None)
+
+        if get_all and has_branched:
+            return results.exhaust() if results or use_list else None
+
+        return results[0] if results else None
+
+    for index, path in enumerate(paths, 1):
+        use_list = default is NO_DEFAULT and index == len(paths)
+        result = _traverse_obj(obj, path, use_list)
+        if result is not None:
+            return result
+
+    return None if default is NO_DEFAULT else default
+
+
+def get_first(obj, keys, **kwargs):
+    return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
+
+
+def join_nonempty(*values, **kwargs):
+
+    # parameter defaults
+    delim = kwargs.get('delim', '-')
+    from_dict = kwargs.get('from_dict')
+
+    if from_dict is not None:
+        values = (traverse_obj(from_dict, variadic(v)) for v in values)
+    return delim.join(map(compat_str, filter(None, values)))

From de39d1281cea499cb1adfce5ff7e0a56f1bad5fe Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Nov 2022 10:13:07 +0000
Subject: [PATCH 59/78] [extractor/ceskatelevize] Back-port extractor from
 yt-dlp, etc (#30713)

* back-port extractor, removing CeskaTelevizePoradyIE
* follow redirect URL
* support liveBroadcast and videobonusDetail in __NEXT__ data
* return single video for singleton playlist
* fix/add tests
---
 youtube_dl/extractor/ceskatelevize.py | 170 ++++++++++++++------------
 youtube_dl/extractor/extractors.py    |   5 +-
 2 files changed, 92 insertions(+), 83 deletions(-)

diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 7cb4efb74..fe677d8e8 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -12,70 +12,136 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     sanitized_Request,
-    unescapeHTML,
-    update_url_query,
+    str_or_none,
+    traverse_obj,
     urlencode_postdata,
     USER_AGENTS,
 )
 
 
 class CeskaTelevizeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
     _TESTS = [{
-        'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
+        'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
         'info_dict': {
-            'id': '61924494877246241',
+            'id': '61924494877028507',
             'ext': 'mp4',
-            'title': 'Hyde Park Civilizace: Život v Grónsku',
-            'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
+            'title': 'Bonus 01 - En - Hyde Park Civilizace',
+            'description': 'English Subtittles',
             'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 3350,
+            'duration': 81.3,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         },
     }, {
-        'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
+        # live stream
+        'url': 'http://www.ceskatelevize.cz/zive/ct1/',
         'info_dict': {
-            'id': '61924494877028507',
+            'id': '102',
             'ext': 'mp4',
-            'title': 'Hyde Park Civilizace: Bonus 01 - En',
-            'description': 'English Subtittles',
-            'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 81.3,
+            'title': r'ČT1 - živé vysílání online',
+            'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
+            'is_live': True,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         },
     }, {
-        # live stream
+        # another
         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+        'only_matching': True,
         'info_dict': {
             'id': 402,
             'ext': 'mp4',
             'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
             'is_live': True,
         },
+        # 'skip': 'Georestricted to Czech Republic',
+    }, {
+        'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
+        'only_matching': True,
+    }, {
+        # video with 18+ caution trailer
+        'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
+        'info_dict': {
+            'id': '215562210900007-bogotart',
+            'title': 'Bogotart - Queer',
+            'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
+        },
+        'playlist': [{
+            'info_dict': {
+                'id': '61924494877311053',
+                'ext': 'mp4',
+                'title': 'Bogotart - Queer (Varování 18+)',
+                'duration': 11.9,
+            },
+        }, {
+            'info_dict': {
+                'id': '61924494877068022',
+                'ext': 'mp4',
+                'title': 'Bogotart - Queer (Queer)',
+                'thumbnail': r're:^https?://.*\.jpg',
+                'duration': 1558.3,
+            },
+        }],
         'params': {
             # m3u8 download
             'skip_download': True,
         },
-        'skip': 'Georestricted to Czech Republic',
     }, {
-        'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
+        # iframe embed
+        'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
         'only_matching': True,
     }]
 
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', **kw),
+            video_id, **kw)
+
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, playlist_id)
+        webpage, urlh = self._download_webpage_handle(url, playlist_id)
+        parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
+        site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
+        playlist_title = self._og_search_title(webpage, default=None)
+        if site_name and playlist_title:
+            playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
+        playlist_description = self._og_search_description(webpage, default=None)
+        if playlist_description:
+            playlist_description = playlist_description.replace('\xa0', ' ')
+
+        type_ = 'IDEC'
+        if re.search(r'(^/porady|/zive)/', parsed_url.path):
+            next_data = self._search_nextjs_data(webpage, playlist_id)
+            if '/zive/' in parsed_url.path:
+                idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
+            else:
+                idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+                if not idec:
+                    idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
+                    if idec:
+                        type_ = 'bonus'
+            if not idec:
+                raise ExtractorError('Failed to find IDEC id')
+            iframe_hash = self._download_webpage(
+                'https://www.ceskatelevize.cz/v-api/iframe-hash/',
+                playlist_id, note='Getting IFRAME hash')
+            query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
+            webpage = self._download_webpage(
+                'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
+                playlist_id, note='Downloading player', query=query)
 
         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
         if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
-            raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+            self.raise_geo_restricted(NOT_AVAILABLE_STRING)
+        if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
+            raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
 
         type_ = None
         episode_id = None
@@ -100,7 +166,7 @@ class CeskaTelevizeIE(InfoExtractor):
         data = {
             'playlist[0][type]': type_,
             'playlist[0][id]': episode_id,
-            'requestUrl': compat_urllib_parse_urlparse(url).path,
+            'requestUrl': parsed_url.path,
             'requestSource': 'iVysilani',
         }
 
@@ -108,7 +174,7 @@ class CeskaTelevizeIE(InfoExtractor):
 
         for user_agent in (None, USER_AGENTS['Safari']):
             req = sanitized_Request(
-                'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+                'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
                 data=urlencode_postdata(data))
 
             req.add_header('Content-type', 'application/x-www-form-urlencoded')
@@ -130,9 +196,6 @@ class CeskaTelevizeIE(InfoExtractor):
             req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
             req.add_header('Referer', url)
 
-            playlist_title = self._og_search_title(webpage, default=None)
-            playlist_description = self._og_search_description(webpage, default=None)
-
             playlist = self._download_json(req, playlist_id, fatal=False)
             if not playlist:
                 continue
@@ -167,7 +230,7 @@ class CeskaTelevizeIE(InfoExtractor):
                     entries[num]['formats'].extend(formats)
                     continue
 
-                item_id = item.get('id') or item['assetId']
+                item_id = str_or_none(item.get('id') or item['assetId'])
                 title = item['title']
 
                 duration = float_or_none(item.get('duration'))
@@ -181,8 +244,6 @@ class CeskaTelevizeIE(InfoExtractor):
 
                 if playlist_len == 1:
                     final_title = playlist_title or title
-                    if is_live:
-                        final_title = self._live_title(final_title)
                 else:
                     final_title = '%s (%s)' % (playlist_title, title)
 
@@ -200,6 +261,8 @@ class CeskaTelevizeIE(InfoExtractor):
         for e in entries:
             self._sort_formats(e['formats'])
 
+        if len(entries) == 1:
+            return entries[0]
         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
 
     def _get_subtitles(self, episode_id, subs):
@@ -236,54 +299,3 @@ class CeskaTelevizeIE(InfoExtractor):
                     yield line
 
         return '\r\n'.join(_fix_subtitle(subtitles))
-
-
-class CeskaTelevizePoradyIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
-    _TESTS = [{
-        # video with 18+ caution trailer
-        'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
-        'info_dict': {
-            'id': '215562210900007-bogotart',
-            'title': 'Queer: Bogotart',
-            'description': 'Alternativní průvodce současným queer světem',
-        },
-        'playlist': [{
-            'info_dict': {
-                'id': '61924494876844842',
-                'ext': 'mp4',
-                'title': 'Queer: Bogotart (Varování 18+)',
-                'duration': 10.2,
-            },
-        }, {
-            'info_dict': {
-                'id': '61924494877068022',
-                'ext': 'mp4',
-                'title': 'Queer: Bogotart (Queer)',
-                'thumbnail': r're:^https?://.*\.jpg',
-                'duration': 1558.3,
-            },
-        }],
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }, {
-        # iframe embed
-        'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        data_url = update_url_query(unescapeHTML(self._search_regex(
-            (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-             r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
-            webpage, 'iframe player url', group='url')), query={
-                'autoStart': 'true',
-        })
-
-        return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 751fc38b6..e36f86be4 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -208,10 +208,7 @@ from .ccc import (
 from .ccma import CCMAIE
 from .cctv import CCTVIE
 from .cda import CDAIE
-from .ceskatelevize import (
-    CeskaTelevizeIE,
-    CeskaTelevizePoradyIE,
-)
+from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .charlierose import CharlieRoseIE
 from .chaturbate import ChaturbateIE

From 47e70fff8ba3de769a31fab0b3572162094733f7 Mon Sep 17 00:00:00 2001
From: Moises Lima <mozlima@users.noreply.github.com>
Date: Wed, 9 Nov 2022 17:26:30 -0300
Subject: [PATCH 60/78] [PeekVids, PlayVids] Add new extractor (#29765)

* Merge back-port from yt-dlp
* Merge features from PR #29798
* Improve metadata extraction

Co-authored-by: dirkf <fieldhouse@gmx.net>
Co-authored by: AXDOOMER
---
 youtube_dl/extractor/extractors.py |   4 +
 youtube_dl/extractor/peekvids.py   | 193 +++++++++++++++++++++++++++++
 2 files changed, 197 insertions(+)
 create mode 100644 youtube_dl/extractor/peekvids.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index e36f86be4..4d9f37424 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -909,6 +909,10 @@ from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
 from .pearvideo import PearVideoIE
+from .peekvids import (
+    PeekVidsIE,
+    PlayVidsIE,
+)
 from .peertube import PeerTubeIE
 from .people import PeopleIE
 from .performgroup import PerformGroupIE
diff --git a/youtube_dl/extractor/peekvids.py b/youtube_dl/extractor/peekvids.py
new file mode 100644
index 000000000..c8aad564b
--- /dev/null
+++ b/youtube_dl/extractor/peekvids.py
@@ -0,0 +1,193 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    get_element_by_class,
+    int_or_none,
+    merge_dicts,
+    url_or_none,
+)
+
+
+class PeekVidsIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?peekvids\.com/
+        (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
+        (?P<id>[^/?&#]*)
+    '''
+    _TESTS = [{
+        'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
+        'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
+        'info_dict': {
+            'id': '1262717',
+            'display_id': 'BSyLMbN0YCd',
+            'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
+            'ext': 'mp4',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
+            'timestamp': 1642579329,
+            'upload_date': '20220119',
+            'duration': 416,
+            'view_count': int,
+            'age_limit': 18,
+            'uploader': 'SEXYhub.com',
+            'categories': list,
+            'tags': list,
+        },
+    }]
+    _DOMAIN = 'www.peekvids.com'
+
+    def _get_detail(self, html):
+        return get_element_by_class('detail-video-block', html)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id, expected_status=429)
+        if '>Rate Limit Exceeded' in webpage:
+            raise ExtractorError(
+                '[%s] %s: %s' % (self.IE_NAME, video_id, 'You are suspected as a bot. Wait, or pass the captcha test on the site and provide --cookies.'),
+                expected=True)
+
+        title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
+
+        display_id = video_id
+        video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
+        srcs = self._download_json(
+            'https://%s/v-alt/%s' % (self._DOMAIN, video_id), video_id,
+            note='Downloading list of source files')
+        formats = [{
+            'url': f_url,
+            'format_id': f_id,
+            'height': int_or_none(f_id),
+        } for f_url, f_id in (
+            (url_or_none(f_v), f_match.group(1))
+            for f_v, f_match in (
+                (v, re.match(r'^data-src(\d{3,})$', k))
+                for k, v in srcs.items() if v) if f_match)
+            if f_url
+        ]
+        if not formats:
+            formats = [{'url': url} for url in srcs.values()]
+        self._sort_formats(formats)
+
+        info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
+        info.pop('url', None)
+        # may not have found the thumbnail if it was in a list in the ld+json
+        info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
+        detail = self._get_detail(webpage) or ''
+        info['description'] = self._html_search_regex(
+            r'(?s)(.+?)(?:%s\s*<|<ul\b)' % (re.escape(info.get('description', '')), ),
+            detail, 'description', default=None) or None
+        info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
+
+        def cat_tags(name, html):
+            l = self._html_search_regex(
+                r'(?s)<span\b[^>]*>\s*%s\s*:\s*</span>(.+?)</li>' % (re.escape(name), ),
+                html, name, default='')
+            return [x for x in re.split(r'\s+', l) if x]
+
+        return merge_dicts({
+            'id': video_id,
+            'display_id': display_id,
+            'age_limit': 18,
+            'formats': formats,
+            'categories': cat_tags('Categories', detail),
+            'tags': cat_tags('Tags', detail),
+            'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
+        }, info)
+
+
+class PlayVidsIE(PeekVidsIE):
+    _VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
+    _TESTS = [{
+        'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
+        'md5': '2f12e50213dd65f142175da633c4564c',
+        'info_dict': {
+            'id': '1978030',
+            'display_id': 'U3pBrYhsjXM',
+            'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
+            'ext': 'mp4',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
+            'timestamp': 1640435839,
+            'upload_date': '20211225',
+            'duration': 416,
+            'view_count': int,
+            'age_limit': 18,
+            'uploader': 'SEXYhub.com',
+            'categories': list,
+            'tags': list,
+        },
+    }, {
+        'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
+        'md5': 'e783986e596cafbf46411a174ab42ba6',
+        'info_dict': {
+            'id': '762385',
+            'display_id': 'bKmGLe3IwjZ',
+            'ext': 'mp4',
+            'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
+            'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
+            'timestamp': 1516958544,
+            'upload_date': '20180126',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 480,
+            'uploader': 'Brazzers',
+            'age_limit': 18,
+            'view_count': int,
+            'age_limit': 18,
+            'categories': list,
+            'tags': list,
+        },
+    }, {
+        'url': 'https://www.playvids.com/v/47iUho33toY',
+        'md5': 'b056b5049d34b648c1e86497cf4febce',
+        'info_dict': {
+            'id': '700621',
+            'display_id': '47iUho33toY',
+            'ext': 'mp4',
+            'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
+            'description': None,
+            'timestamp': 1507052209,
+            'upload_date': '20171003',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 332,
+            'uploader': 'Cacerenele',
+            'age_limit': 18,
+            'view_count': int,
+            'categories': list,
+            'tags': list,
+        }
+    }, {
+        'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
+        'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
+        'info_dict': {
+            'id': '1523518',
+            'display_id': 'z3_7iwWCmqt',
+            'ext': 'mp4',
+            'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
+            'description': None,
+            'timestamp': 1607470323,
+            'upload_date': '20201208',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 593,
+            'uploader': 'yorours',
+            'age_limit': 18,
+            'view_count': int,
+            'categories': list,
+            'tags': list,
+        },
+    }]
+    _DOMAIN = 'www.playvids.com'
+
+    def _get_detail(self, html):
+        return get_element_by_class('detail-block', html)

From 604762a9f8fa21de3f7349bd612c4f34941a5d20 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 11 Nov 2022 00:49:13 +0000
Subject: [PATCH 61/78] [common:jwplayer] Improve jwplayer extraction and
 parsing (#31000)

* don't crash parser if jwplayer_data is invalid (empty, or no formats)
* use `label` in `sources[n]` as `format_id`
* relax `jwplayer().setup(...)` RE (also rework PR #27274 enhancement)
* detect more manifest formats in _parse_jwplayer_formats() (from PR #29596)
* improve metadata extraction (from PR #25433)
* remember URLs in a set
* use parse_resolution() in format
* extract filesize in format (from yt-dlp)

Co-authored-by: kikuyan <kikuyan@users.noreply.github.com>
Co-authored-by: martin54 <martin54@users.noreply.github.com>
---
 youtube_dl/extractor/common.py | 40 ++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1f33a1e06..a0a796d7b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -70,6 +70,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
+    try_get,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
@@ -2713,7 +2714,7 @@ class InfoExtractor(object):
 
     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
         mobj = re.search(
-            r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
+            r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
             webpage)
         if mobj:
             try:
@@ -2734,9 +2735,14 @@ class InfoExtractor(object):
 
     def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
                              m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        flat_pl = try_get(jwplayer_data, lambda x: x.get('playlist') or True)
+        if flat_pl is None:
+            # not even a dict
+            return []
+
         # JWPlayer backward compatibility: flattened playlists
         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
-        if 'playlist' not in jwplayer_data:
+        if flat_pl is True:
             jwplayer_data = {'playlist': [jwplayer_data]}
 
         entries = []
@@ -2784,6 +2790,13 @@ class InfoExtractor(object):
                 'timestamp': int_or_none(video_data.get('pubdate')),
                 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
                 'subtitles': subtitles,
+                'alt_title': clean_html(video_data.get('subtitle')),  # attributes used e.g. by Tele5 ...
+                'genre': clean_html(video_data.get('genre')),
+                'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
+                'season_number': int_or_none(video_data.get('season')),
+                'episode_number': int_or_none(video_data.get('episode')),
+                'release_year': int_or_none(video_data.get('releasedate')),
+                'age_limit': int_or_none(video_data.get('age_restriction')),
             }
             # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
             if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
@@ -2792,7 +2805,9 @@ class InfoExtractor(object):
                     'url': formats[0]['url'],
                 })
             else:
-                self._sort_formats(formats)
+                # avoid exception in case of only sttls
+                if formats:
+                    self._sort_formats(formats)
                 entry['formats'] = formats
             entries.append(entry)
         if len(entries) == 1:
@@ -2802,7 +2817,7 @@ class InfoExtractor(object):
 
     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
-        urls = []
+        urls = set()
         formats = []
         for source in jwplayer_sources_data:
             if not isinstance(source, dict):
@@ -2811,14 +2826,14 @@ class InfoExtractor(object):
                 base_url, self._proto_relative_url(source.get('file')))
             if not source_url or source_url in urls:
                 continue
-            urls.append(source_url)
+            urls.add(source_url)
             source_type = source.get('type') or ''
             ext = mimetype2ext(source_type) or determine_ext(source_url)
-            if source_type == 'hls' or ext == 'm3u8':
+            if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
                 formats.extend(self._extract_m3u8_formats(
                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id=m3u8_id, fatal=False))
-            elif source_type == 'dash' or ext == 'mpd':
+            elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
                 formats.extend(self._extract_mpd_formats(
                     source_url, video_id, mpd_id=mpd_id, fatal=False))
             elif ext == 'smil':
@@ -2833,20 +2848,23 @@ class InfoExtractor(object):
                     'ext': ext,
                 })
             else:
+                format_id = str_or_none(source.get('label'))
                 height = int_or_none(source.get('height'))
-                if height is None:
+                if height is None and format_id:
                     # Often no height is provided but there is a label in
                     # format like "1080p", "720p SD", or 1080.
-                    height = int_or_none(self._search_regex(
-                        r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
-                        'height', default=None))
+                    height = parse_resolution(format_id).get('height')
                 a_format = {
                     'url': source_url,
                     'width': int_or_none(source.get('width')),
                     'height': height,
                     'tbr': int_or_none(source.get('bitrate'), scale=1000),
+                    'filesize': int_or_none(source.get('filesize')),
                     'ext': ext,
                 }
+                if format_id:
+                    a_format['format_id'] = format_id
+
                 if source_url.startswith('rtmp'):
                     a_format['ext'] = 'flv'
                     # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as

From c2f9be3e63a000cf20e9e4ad789a4f5453d00eb7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 12 Nov 2022 11:55:05 +0000
Subject: [PATCH 62/78] [generic] Add KVS player extraction

---
 youtube_dl/extractor/generic.py | 183 ++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index a9c064105..01e406750 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -28,6 +28,7 @@ from ..utils import (
     mimetype2ext,
     orderedSet,
     parse_duration,
+    parse_resolution,
     sanitized_Request,
     smuggle_url,
     unescapeHTML,
@@ -2227,6 +2228,97 @@ class GenericIE(InfoExtractor):
             # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
             'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
             'only_matching': True,
+        }, {
+            # KVS Player
+            'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
+            'info_dict': {
+                'id': '105',
+                'display_id': 'kelis-4th-of-july',
+                'ext': 'mp4',
+                'title': 'Kelis - 4th Of July',
+                'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+            },
+        }, {
+            # KVS Player
+            'url': 'https://www.kvs-demo.com/embed/105/',
+            'info_dict': {
+                'id': '105',
+                'display_id': 'kelis-4th-of-july',
+                'ext': 'mp4',
+                'title': 'Kelis - 4th Of July / Embed Player',
+                'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        }, {
+            # KVS Player
+            'url': 'https://thisvid.com/videos/fruit-is-healthy/',
+            'md5': 'f83e52f409b9139a7efee58ef926a72e',
+            'info_dict': {
+                'id': '7079579',
+                'display_id': 'fruit-is-healthy',
+                'ext': 'mp4',
+                'title': 'Fruit is healthy - ThisVid.com',
+                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/7079000/7079579/preview.jpg',
+            }
+        }, {
+            # KVS Player
+            'url': 'https://thisvid.com/embed/7079579/',
+            'info_dict': {
+                'id': '7079579',
+                'display_id': 'fruit-is-healthy',
+                'ext': 'mp4',
+                'title': 'Fruit is healthy - ThisVid.com',
+                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/7079000/7079579/preview.jpg',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        }, {
+            # KVS Player
+            'url': 'https://youix.com/video/leningrad-zoj/',
+            'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+            'info_dict': {
+                'id': '18485',
+                'display_id': 'leningrad-zoj',
+                'ext': 'mp4',
+                'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
+                'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
+            },
+        }, {
+            # KVS Player
+            'url': 'https://youix.com/embed/18485',
+            'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+            'info_dict': {
+                'id': '18485',
+                'display_id': 'leningrad-zoj',
+                'ext': 'mp4',
+                'title': 'Ленинград - ЗОЖ',
+                'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
+            },
+        }, {
+            # KVS Player
+            'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
+            'md5': '94166bdb26b4cb1fb9214319a629fc51',
+            'info_dict': {
+                'id': '21217',
+                'display_id': '40-nochey-2016',
+                'ext': 'mp4',
+                'title': '40 ночей (2016) - BogMedia.org',
+                'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
+            },
+        }, {
+            # KVS Player (for sites that serve kt_player.js via non-https urls)
+            'url': 'http://www.camhub.world/embed/389508',
+            'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
+            'info_dict': {
+                'id': '389508',
+                'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
+                'ext': 'mp4',
+                'title': 'Syren De Mer  onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
+                'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
+            },
         },
     ]
 
@@ -2332,6 +2424,87 @@ class GenericIE(InfoExtractor):
             'title': title,
         }
 
+    def _extract_kvs(self, url, webpage, video_id):
+
+        def getlicensetoken(license):
+            modlicense = license.replace('$', '').replace('0', '1')
+            center = int(len(modlicense) / 2)
+            fronthalf = int(modlicense[:center + 1])
+            backhalf = int(modlicense[center:])
+
+            modlicense = compat_str(4 * abs(fronthalf - backhalf))
+
+            def parts():
+                for o in range(0, center + 1):
+                    for i in range(1, 5):
+                        yield compat_str((int(license[o + i]) + int(modlicense[o])) % 10)
+
+            return ''.join(parts())
+
+        def getrealurl(video_url, license_code):
+            if not video_url.startswith('function/0/'):
+                return video_url  # not obfuscated
+
+            url_path, _, url_query = video_url.partition('?')
+            urlparts = url_path.split('/')[2:]
+            license = getlicensetoken(license_code)
+            newmagic = urlparts[5][:32]
+
+            def spells(x, o):
+                l = (o + sum(int(n) for n in license[o:])) % 32
+                for i in range(0, len(x)):
+                    yield {l: x[o], o: x[l]}.get(i, x[i])
+
+            for o in range(len(newmagic) - 1, -1, -1):
+                newmagic = ''.join(spells(newmagic, o))
+
+            urlparts[5] = newmagic + urlparts[5][32:]
+            return '/'.join(urlparts) + '?' + url_query
+
+        flashvars = self._search_regex(
+            r'(?s)<script\b[^>]*>.*?var\s+flashvars\s*=\s*(\{.+?\});.*?</script>',
+            webpage, 'flashvars')
+        flashvars = self._parse_json(flashvars, video_id, transform_source=js_to_json)
+
+        # extract the part after the last / as the display_id from the
+        # canonical URL.
+        display_id = self._search_regex(
+            r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
+            r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
+            webpage, 'display_id', fatal=False
+        )
+        title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
+
+        thumbnail = flashvars['preview_url']
+        if thumbnail.startswith('//'):
+            protocol, _, _ = url.partition('/')
+            thumbnail = protocol + thumbnail
+
+        url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys()))
+        formats = []
+        for key in url_keys:
+            if '/get_file/' not in flashvars[key]:
+                continue
+            format_id = flashvars.get(key + '_text', key)
+            formats.append(merge_dicts(
+                parse_resolution(format_id) or parse_resolution(flashvars[key]), {
+                    'url': getrealurl(flashvars[key], flashvars['license_code']),
+                    'format_id': format_id,
+                    'ext': 'mp4',
+                }))
+            if not formats[-1].get('height'):
+                formats[-1]['quality'] = 1
+
+        self._sort_formats(formats)
+
+        return {
+            'id': flashvars['video_id'],
+            'display_id': display_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
+
     def _real_extract(self, url):
         if url.startswith('//'):
             return self.url_result(self.http_scheme() + url)
@@ -3389,6 +3562,16 @@ class GenericIE(InfoExtractor):
                 info_dict['formats'] = formats
                 return info_dict
 
+        # Look for generic KVS player (before ld+json for tests)
+        found = re.search(
+            r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)\1[^>]*>',
+            webpage)
+        if found:
+            self.report_extraction('KVS Player')
+            if found.group('maj_ver') not in ('4', '5', '6'):
+                self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found.group('ver'), ))
+            return self._extract_kvs(url, webpage, video_id)
+
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
             webpage, video_id, default={}, expected_type='VideoObject')

From 1a4fbe8462f5e531a891aeac7db6c0bde49c5536 Mon Sep 17 00:00:00 2001
From: FraFraFra-LongD <85188920+FraFraFra-LongD@users.noreply.github.com>
Date: Sun, 13 Nov 2022 14:22:04 +0100
Subject: [PATCH 63/78] Added ThisVid.com support (#29187)

* add ThisVidIE, ThisVidMemberIE, ThisVidPlaylistIE
* redirect embed to main page for more metadata
* use KVS extraction newly added to GenericIE and remove duplicate tests
* also add MrDeepFake etc compat to GenericIE
(closes #22390)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |   5 +
 youtube_dl/extractor/generic.py    |  54 ++++---
 youtube_dl/extractor/thisvid.py    | 218 +++++++++++++++++++++++++++++
 3 files changed, 249 insertions(+), 28 deletions(-)
 create mode 100644 youtube_dl/extractor/thisvid.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 4d9f37424..947cbe8fd 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1265,6 +1265,11 @@ from .theweatherchannel import TheWeatherChannelIE
 from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
 from .thisoldhouse import ThisOldHouseIE
+from .thisvid import (
+    ThisVidIE,
+    ThisVidMemberIE,
+    ThisVidPlaylistIE,
+)
 from .threeqsdn import ThreeQSDNIE
 from .tiktok import (
     TikTokIE,
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 01e406750..597611157 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2252,31 +2252,7 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         }, {
-            # KVS Player
-            'url': 'https://thisvid.com/videos/fruit-is-healthy/',
-            'md5': 'f83e52f409b9139a7efee58ef926a72e',
-            'info_dict': {
-                'id': '7079579',
-                'display_id': 'fruit-is-healthy',
-                'ext': 'mp4',
-                'title': 'Fruit is healthy - ThisVid.com',
-                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/7079000/7079579/preview.jpg',
-            }
-        }, {
-            # KVS Player
-            'url': 'https://thisvid.com/embed/7079579/',
-            'info_dict': {
-                'id': '7079579',
-                'display_id': 'fruit-is-healthy',
-                'ext': 'mp4',
-                'title': 'Fruit is healthy - ThisVid.com',
-                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/7079000/7079579/preview.jpg',
-            },
-            'params': {
-                'skip_download': True,
-            },
-        }, {
-            # KVS Player
+            # KVS Player (tested also in thisvid.py)
             'url': 'https://youix.com/video/leningrad-zoj/',
             'md5': '94f96ba95706dc3880812b27b7d8a2b8',
             'info_dict': {
@@ -2306,6 +2282,7 @@ class GenericIE(InfoExtractor):
                 'display_id': '40-nochey-2016',
                 'ext': 'mp4',
                 'title': '40 ночей (2016) - BogMedia.org',
+                'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
                 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
             },
         }, {
@@ -2319,6 +2296,18 @@ class GenericIE(InfoExtractor):
                 'title': 'Syren De Mer  onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
                 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
             },
+        }, {
+            'url': 'https://mrdeepfakes.com/video/5/selena-gomez-pov-deep-fakes',
+            'md5': 'fec4ad5ec150f655e0c74c696a4a2ff4',
+            'info_dict': {
+                'id': '5',
+                'display_id': 'selena-gomez-pov-deep-fakes',
+                'ext': 'mp4',
+                'title': 'Selena Gomez POV (Deep Fakes) DeepFake Porn - MrDeepFakes',
+                'description': 'md5:17d1f84b578c9c26875ac5ef9a932354',
+                'height': 720,
+                'age_limit': 18,
+            },
         },
     ]
 
@@ -2491,6 +2480,7 @@ class GenericIE(InfoExtractor):
                     'url': getrealurl(flashvars[key], flashvars['license_code']),
                     'format_id': format_id,
                     'ext': 'mp4',
+                    'http_headers': {'Referer': url},
                 }))
             if not formats[-1].get('height'):
                 formats[-1]['quality'] = 1
@@ -2713,9 +2703,15 @@ class GenericIE(InfoExtractor):
         # but actually don't.
         AGE_LIMIT_MARKERS = [
             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
+            r'>[^<]*you acknowledge you are at least (\d+) years old',
         ]
-        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
-            age_limit = 18
+        for marker in AGE_LIMIT_MARKERS:
+            m = re.search(marker, webpage)
+            if not m:
+                continue
+            age_limit = max(
+                age_limit or 0,
+                int_or_none(m.groups() and m.group(1), default=18))
 
         # video uploader is domain name
         video_uploader = self._search_regex(
@@ -3570,7 +3566,9 @@ class GenericIE(InfoExtractor):
             self.report_extraction('KVS Player')
             if found.group('maj_ver') not in ('4', '5', '6'):
                 self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found.group('ver'), ))
-            return self._extract_kvs(url, webpage, video_id)
+            return merge_dicts(
+                self._extract_kvs(url, webpage, video_id),
+                info_dict)
 
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
diff --git a/youtube_dl/extractor/thisvid.py b/youtube_dl/extractor/thisvid.py
new file mode 100644
index 000000000..bc4bcb2d1
--- /dev/null
+++ b/youtube_dl/extractor/thisvid.py
@@ -0,0 +1,218 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urlparse,
+)
+from ..utils import (
+    clean_html,
+    get_element_by_class,
+    int_or_none,
+    merge_dicts,
+    url_or_none,
+    urljoin,
+)
+
+
+class ThisVidIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
+    _TESTS = [{
+        'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
+        'md5': '839becb572995687e11a69dc4358a386',
+        'info_dict': {
+            'id': '3533241',
+            'ext': 'mp4',
+            'title': 'Sitting on ball tight jeans',
+            'description': 'md5:372353bb995883d1b65fddf507489acd',
+            'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
+            'uploader_id': '150629',
+            'uploader': 'jeanslevisjeans',
+            'age_limit': 18,
+        }
+    }, {
+        'url': 'https://thisvid.com/embed/3533241/',
+        'md5': '839becb572995687e11a69dc4358a386',
+        'info_dict': {
+            'id': '3533241',
+            'ext': 'mp4',
+            'title': 'Sitting on ball tight jeans',
+            'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
+            'uploader_id': '150629',
+            'uploader': 'jeanslevisjeans',
+            'age_limit': 18,
+        }
+    }]
+
+    def _real_extract(self, url):
+        main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
+        webpage = self._download_webpage(url, main_id)
+
+        title = self._html_search_regex(
+            r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
+            webpage, 'title')
+
+        if type_ == 'embed':
+            # look for more metadata
+            video_alt_url = url_or_none(self._search_regex(
+                r'''video_alt_url\s*:\s+'(%s/)',''' % (self._VALID_URL, ),
+                webpage, 'video_alt_url', default=None))
+            if video_alt_url and video_alt_url != url:
+                webpage = self._download_webpage(
+                    video_alt_url, main_id,
+                    note='Redirecting embed to main page', fatal=False) or webpage
+
+        video_holder = get_element_by_class('video-holder', webpage) or ''
+        if '>This video is a private video' in video_holder:
+            self.raise_login_required(
+                (clean_html(video_holder) or 'Private video').split('\n', 1)[0])
+
+        uploader = self._html_search_regex(
+            r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
+            webpage, 'uploader', default='')
+        uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
+        if len(uploader) == 2:
+            # id must be non-empty, uploader could be ''
+            uploader_id, uploader = uploader
+            uploader = uploader or None
+        else:
+            uploader_id = uploader = None
+
+        return merge_dicts({
+            '_type': 'url_transparent',
+            'title': title,
+            'age_limit': 18,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+        }, self.url_result(url, ie='Generic'))
+
+
+class ThisVidMemberIE(InfoExtractor):
+    _VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://thisvid.com/members/2140501/',
+        'info_dict': {
+            'id': '2140501',
+            'title': 'Rafflesia\'s Profile',
+        },
+        'playlist_mincount': 16,
+    }, {
+        'url': 'https://thisvid.com/members/2140501/favourite_videos/',
+        'info_dict': {
+            'id': '2140501',
+            'title': 'Rafflesia\'s Favourite Videos',
+        },
+        'playlist_mincount': 15,
+    }, {
+        'url': 'https://thisvid.com/members/636468/public_videos/',
+        'info_dict': {
+            'id': '636468',
+            'title': 'Happymouth\'s Public Videos',
+        },
+        'playlist_mincount': 196,
+    },
+    ]
+
+    def _urls(self, html):
+        for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (ThisVidIE._VALID_URL, ), html):
+            yield m.group('url')
+
+    def _real_extract(self, url):
+        pl_id = self._match_id(url)
+        webpage = self._download_webpage(url, pl_id)
+
+        title = re.split(
+            r'(?i)\s*\|\s*ThisVid\.com\s*$',
+            self._og_search_title(webpage, default=None) or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
+
+        def entries(page_url, html=None):
+            for page in itertools.count(1):
+                if not html:
+                    html = self._download_webpage(
+                        page_url, pl_id, note='Downloading page %d' % (page, ),
+                        fatal=False) or ''
+                for u in self._urls(html):
+                    yield u
+                next_page = get_element_by_class('pagination-next', html) or ''
+                if next_page:
+                    # member list page
+                    next_page = urljoin(url, self._search_regex(
+                        r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
+                        next_page, 'next page link', group='url', default=None))
+                # in case a member page should have pagination-next with empty link, not just `else:`
+                if next_page is None:
+                    # playlist page
+                    parsed_url = compat_urlparse.urlparse(page_url)
+                    base_path, num = parsed_url.path.rsplit('/', 1)
+                    num = int_or_none(num)
+                    if num is None:
+                        base_path, num = parsed_url.path.rstrip('/'), 1
+                    parsed_url = parsed_url._replace(path=base_path + ('/%d' % (num + 1, )))
+                    next_page = compat_urlparse.urlunparse(parsed_url)
+                    if page_url == next_page:
+                        next_page = None
+                if not next_page:
+                    break
+                page_url, html = next_page, None
+
+        return self.playlist_from_matches(
+            entries(url, webpage), playlist_id=pl_id, playlist_title=title, ie='ThisVid')
+
+
+class ThisVidPlaylistIE(ThisVidMemberIE):
+    _VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
+    _TESTS = [{
+        'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
+        'info_dict': {
+            'id': '6615',
+            'title': 'Underwear Stuff',
+        },
+        'playlist_mincount': 200,
+    }, {
+        'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
+        'info_dict': {
+            'id': '1072387',
+            'ext': 'mp4',
+            'title': 'Big Italian Booty 28',
+            'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
+            'uploader_id': '367912',
+            'uploader': 'Jcmusclefun',
+            'age_limit': 18,
+        },
+        'params': {
+            'noplaylist': True,
+        },
+    }]
+
+    def _get_video_url(self, pl_url):
+        video_id = re.match(self._VALID_URL, pl_url).group('video_id')
+        return urljoin(pl_url, '/videos/%s/' % (video_id, ))
+
+    def _urls(self, html):
+        for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (self._VALID_URL, ), html):
+            yield self._get_video_url(m.group('url'))
+
+    def _real_extract(self, url):
+        pl_id = self._match_id(url)
+
+        if self._downloader.params.get('noplaylist'):
+            self.to_screen('Downloading just the featured video because of --no-playlist')
+            return self.url_result(self._get_video_url(url), 'ThisVid')
+
+        self.to_screen(
+            'Downloading playlist %s - add --no-playlist to download just the featured video' % (pl_id, ))
+        result = super(ThisVidPlaylistIE, self)._real_extract(url)
+
+        # rework title returned as `the title - the title`
+        title = result['title']
+        t_len = len(title)
+        if t_len > 5 and t_len % 2 != 0:
+            t_len = t_len // 2
+            if title[t_len] == '-':
+                title = [t.strip() for t in (title[:t_len], title[t_len + 1:])]
+                if title[0] and title[0] == title[1]:
+                    result['title'] = title[0]
+        return result

From fc2beab0e701c497a003f11fef5c0df54fba1da3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 13 Nov 2022 14:59:30 +0000
Subject: [PATCH 64/78] [generic] Improve KVS (etc) extraction

* detect kt_player('kt_player', 'https://.../kt_player.swf?v=5...
* detect age limit if 18 USC 2257 is mentioned
* test with shooshtime.com

Partially resolves #31332.
---
 youtube_dl/extractor/generic.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 597611157..3e8281ed3 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -36,6 +36,7 @@ from ..utils import (
     unsmuggle_url,
     UnsupportedError,
     url_or_none,
+    urljoin,
     xpath_attr,
     xpath_text,
     xpath_with_ns,
@@ -2308,6 +2309,17 @@ class GenericIE(InfoExtractor):
                 'height': 720,
                 'age_limit': 18,
             },
+        }, {
+            'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
+            'md5': 'e2f0a4c329f7986280b7328e24036d60',
+            'info_dict': {
+                'id': '284002',
+                'display_id': 'just-out-of-the-shower-joi',
+                'ext': 'mp4',
+                'title': 'Just Out Of The Shower JOI - Shooshtime',
+                'height': 720,
+                'age_limit': 18,
+            },
         },
     ]
 
@@ -2477,7 +2489,7 @@ class GenericIE(InfoExtractor):
             format_id = flashvars.get(key + '_text', key)
             formats.append(merge_dicts(
                 parse_resolution(format_id) or parse_resolution(flashvars[key]), {
-                    'url': getrealurl(flashvars[key], flashvars['license_code']),
+                    'url': urljoin(url, getrealurl(flashvars[key], flashvars['license_code'])),
                     'format_id': format_id,
                     'ext': 'mp4',
                     'http_headers': {'Referer': url},
@@ -2704,6 +2716,7 @@ class GenericIE(InfoExtractor):
         AGE_LIMIT_MARKERS = [
             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
             r'>[^<]*you acknowledge you are at least (\d+) years old',
+            r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
         ]
         for marker in AGE_LIMIT_MARKERS:
             m = re.search(marker, webpage)
@@ -3559,13 +3572,15 @@ class GenericIE(InfoExtractor):
                 return info_dict
 
         # Look for generic KVS player (before ld+json for tests)
-        found = re.search(
-            r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)\1[^>]*>',
-            webpage)
+        found = self._search_regex(
+            (r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
+             # kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
+             r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,', 
+            ), webpage, 'KVS player', group='ver', default=False)
         if found:
-            self.report_extraction('KVS Player')
-            if found.group('maj_ver') not in ('4', '5', '6'):
-                self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found.group('ver'), ))
+            self.report_extraction('%s: KVS Player' % (video_id, ))
+            if found.split('.')[0] not in ('4', '5', '6'):
+                self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found, ))
             return merge_dicts(
                 self._extract_kvs(url, webpage, video_id),
                 info_dict)

From 195f22f679330549882a8234e7234942893a4902 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 13 Nov 2022 15:09:29 +0000
Subject: [PATCH 65/78] [generic] Improve KVS (etc) extraction

---
 youtube_dl/extractor/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 3e8281ed3..0e473e952 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -3575,8 +3575,8 @@ class GenericIE(InfoExtractor):
         found = self._search_regex(
             (r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
              # kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
-             r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,', 
-            ), webpage, 'KVS player', group='ver', default=False)
+             r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
+             ), webpage, 'KVS player', group='ver', default=False)
         if found:
             self.report_extraction('%s: KVS Player' % (video_id, ))
             if found.split('.')[0] not in ('4', '5', '6'):

From 14ef89a8dab4f6ba6185d6f5bf0317a705d7b842 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Wed, 1 Feb 2023 09:39:49 +0530
Subject: [PATCH 66/78] Support `if` statements

Fix for yt-dlp/yt_dlp#6131
Closes #31509
---
 test/test_jsinterp.py          | 32 ++++++++++++++++++++++++++++++++
 test/test_youtube_signature.py |  4 ++++
 youtube_dl/jsinterp.py         | 21 ++++++++++++++++++---
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 5121c8cf8..c47def737 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -158,6 +158,38 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('z'), 5)
         self.assertEqual(jsi.call_function('y'), 2)
 
+    def test_if(self):
+        jsi = JSInterpreter('''
+        function x() {
+            let a = 9;
+            if (0==0) {a++}
+            return a
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        jsi = JSInterpreter('''
+        function x() {
+            if (0==0) {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) {return 1}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) {return 1}
+            else if (1==0) {return 2}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        """
+
     def test_for_loop(self):
         # function x() { a=0; for (i=0; i-10; i++) {a++} a }
         jsi = JSInterpreter('''
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4e678cae0..ac37ffa45 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -135,6 +135,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
         'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
     ),
+    (
+        'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
+        'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 530a705b4..9a3b8d7f2 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -214,7 +214,7 @@ class JSInterpreter(object):
         def __init__(self, msg, *args, **kwargs):
             expr = kwargs.pop('expr', None)
             if expr is not None:
-                msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
+                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     @classmethod
@@ -268,7 +268,7 @@ class JSInterpreter(object):
                 elif in_quote == '/' and char in '[]':
                     in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op))
+            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -301,7 +301,7 @@ class JSInterpreter(object):
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
-            raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals()))
+            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
     @staticmethod
@@ -428,10 +428,25 @@ class JSInterpreter(object):
 
         m = re.match(r'''(?x)
                 (?P<try>try)\s*\{|
+                (?P<if>if)\s*\(|
                 (?P<switch>switch)\s*\(|
                 (?P<for>for)\s*\(
                 ''', expr)
         md = m.groupdict() if m else {}
+        if md.get('if'):
+            cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
+            if_expr, expr = self._separate_at_paren(expr.lstrip())
+            # TODO: "else if" is not handled
+            else_expr = None
+            m = re.match(r'else\s*{', expr)
+            if m:
+                else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+            cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
+            ret, should_abort = self.interpret_statement(
+                if_expr if cndn else else_expr, local_vars, allow_recursion)
+            if should_abort:
+                return ret, True
+
         if md.get('try'):
             try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
             err = None

From 295736c9cba714fb5de7d1c3dd31d86e50091cf8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 14:28:32 +0000
Subject: [PATCH 67/78] [jsinterp] Improve parsing * support subset `... else
 if ...` * support `while` * add `RegExp` class * generalise `new` support *
 limited more debug strings * matching test changes

---
 test/test_jsinterp.py  |  53 +++++++++++++-
 youtube_dl/jsinterp.py | 156 +++++++++++++++++++++++++++--------------
 2 files changed, 154 insertions(+), 55 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index c47def737..b5962356c 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -11,8 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
 import re
 
-from youtube_dl.compat import compat_re_Pattern
-
 from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
 
 
@@ -140,15 +138,23 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertTrue(math.isnan(jsi.call_function('x')))
 
+    def test_Date(self):
         jsi = JSInterpreter('''
         function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
         ''')
         self.assertEqual(jsi.call_function('x'), 86000)
+
         jsi = JSInterpreter('''
         function x(dt) { return new Date(dt) - 0; }
         ''')
         self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
 
+        # date format m/d/y
+        jsi = JSInterpreter('''
+        function x() { return new Date('12/31/1969 18:01:26 MDT') - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 86000)
+
     def test_call(self):
         jsi = JSInterpreter('''
         function x() { return 2; }
@@ -181,6 +187,15 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('x'), 10)
 
         """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) return 1;
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        """
+
+    def test_elseif(self):
         jsi = JSInterpreter('''
         function x() {
             if (0!=0) {return 1}
@@ -188,6 +203,16 @@ class TestJSInterpreter(unittest.TestCase):
             else {return 10}
         }''')
         self.assertEqual(jsi.call_function('x'), 10)
+
+        """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) return 1;
+            else if (1==0) {return 2}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        # etc
         """
 
     def test_for_loop(self):
@@ -197,6 +222,13 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 10)
 
+    def test_while_loop(self):
+        # function x() { a=0; while (a<10) {a++} a }
+        jsi = JSInterpreter('''
+        function x() { a=0; while (a<10) {a++} return a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
     def test_switch(self):
         jsi = JSInterpreter('''
         function x(f) { switch(f){
@@ -415,13 +447,28 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/; return a; }
         ''')
-        self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
+        attrs = set(('findall', 'finditer', 'flags', 'groupindex',
+                     'groups', 'match', 'pattern', 'scanner',
+                     'search', 'split', 'sub', 'subn'))
+        self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
 
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/i; return a; }
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
+        jsi = JSInterpreter(r'''
+        function x() { let a=[/[)\\]/]; return a[0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
+
+        """  # fails
+        jsi = JSInterpreter(r'''
+        function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+        """
+
     def test_char_code_at(self):
         jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
         self.assertEqual(jsi.call_function('x', 0), 116)
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 9a3b8d7f2..1e7b342ac 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -187,19 +187,6 @@ class LocalNameSpace(ChainMap):
 class JSInterpreter(object):
     __named_object_counter = 0
 
-    _RE_FLAGS = {
-        # special knowledge: Python's re flags are bitmask values, current max 128
-        # invent new bitmask values well above that for literal parsing
-        # TODO: new pattern class to execute matches with these flags
-        'd': 1024,  # Generate indices for substring matches
-        'g': 2048,  # Global search
-        'i': re.I,  # Case-insensitive search
-        'm': re.M,  # Multi-line search
-        's': re.S,  # Allows . to match newline characters
-        'u': re.U,  # Treat a pattern as a sequence of unicode code points
-        'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
-    }
-
     _OBJ_NAME = '__youtube_dl_jsinterp_obj'
 
     OP_CHARS = None
@@ -217,9 +204,48 @@ class JSInterpreter(object):
                 msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
+    class JS_RegExp(object):
+        _RE_FLAGS = {
+            # special knowledge: Python's re flags are bitmask values, current max 128
+            # invent new bitmask values well above that for literal parsing
+            # TODO: new pattern class to execute matches with these flags
+            'd': 1024,  # Generate indices for substring matches
+            'g': 2048,  # Global search
+            'i': re.I,  # Case-insensitive search
+            'm': re.M,  # Multi-line search
+            's': re.S,  # Allows . to match newline characters
+            'u': re.U,  # Treat a pattern as a sequence of unicode code points
+            'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
+        }
+
+        def __init__(self, pattern_txt, flags=''):
+            if isinstance(flags, compat_str):
+                flags, _ = self.regex_flags(flags)
+            # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
+            # First, avoid https://github.com/python/cpython/issues/74534
+            self.__self = re.compile(pattern_txt.replace('[[', r'[\['), flags)
+            for name in dir(self.__self):
+                # Only these? Obviously __class__, __init__.
+                # PyPy creates a __weakref__ attribute with value None
+                # that can't be setattr'd but also can't need to be copied.
+                if name in ('__class__', '__init__', '__weakref__'):
+                    continue
+                setattr(self, name, getattr(self.__self, name))
+
+        @classmethod
+        def regex_flags(cls, expr):
+            flags = 0
+            if not expr:
+                return flags, expr
+            for idx, ch in enumerate(expr):
+                if ch not in cls._RE_FLAGS:
+                    break
+                flags |= cls._RE_FLAGS[ch]
+            return flags, expr[idx + 1:]
+
     @classmethod
     def __op_chars(cls):
-        op_chars = set(';,')
+        op_chars = set(';,[')
         for op in cls._all_operators():
             for c in op[0]:
                 op_chars.add(c)
@@ -231,17 +257,6 @@ class JSInterpreter(object):
         namespace[name] = obj
         return name
 
-    @classmethod
-    def _regex_flags(cls, expr):
-        flags = 0
-        if not expr:
-            return flags, expr
-        for idx, ch in enumerate(expr):
-            if ch not in cls._RE_FLAGS:
-                break
-            flags |= cls._RE_FLAGS[ch]
-        return flags, expr[idx + 1:]
-
     @classmethod
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
@@ -328,7 +343,7 @@ class JSInterpreter(object):
         try:
             return opfunc(left_val, right_val)
         except Exception as e:
-            raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
+            raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
 
     def _index(self, obj, idx, allow_undefined=False):
         if idx == 'length':
@@ -338,7 +353,7 @@ class JSInterpreter(object):
         except Exception as e:
             if allow_undefined:
                 return JS_Undefined
-            raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
+            raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
 
     def _dump(self, obj, namespace):
         try:
@@ -352,6 +367,7 @@ class JSInterpreter(object):
         allow_recursion -= 1
 
         should_return = False
+        # fails on (eg) if (...) stmt1; else stmt2;
         sub_statements = list(self._separate(stmt, ';')) or ['']
         expr = stmt = sub_statements.pop().strip()
         for sub_stmt in sub_statements:
@@ -371,25 +387,30 @@ class JSInterpreter(object):
         if expr[0] in _QUOTES:
             inner, outer = self._separate(expr, expr[0], 1)
             if expr[0] == '/':
-                flags, outer = self._regex_flags(outer)
-                inner = re.compile(inner[1:], flags=flags)  # , strict=True))
+                flags, outer = self.JS_RegExp.regex_flags(outer)
+                inner = self.JS_RegExp(inner[1:], flags=flags)
             else:
                 inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
             if not outer:
                 return inner, should_return
             expr = self._named_object(local_vars, inner) + outer
 
-        if expr.startswith('new '):
-            obj = expr[4:]
-            if obj.startswith('Date('):
-                left, right = self._separate_at_paren(obj[4:])
-                expr = unified_timestamp(
-                    self.interpret_expression(left, local_vars, allow_recursion), False)
+        new_kw, _, obj = expr.partition('new ')
+        if not new_kw:
+            for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
+                                  ('RegExp', self.JS_RegExp),
+                                  ('Error', self.Exception)):
+                if not obj.startswith(klass + '('):
+                    continue
+                left, right = self._separate_at_paren(obj[len(klass):])
+                argvals = self.interpret_iter(left, local_vars, allow_recursion)
+                expr = konstr(*argvals)
                 if not expr:
-                    raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
-                expr = self._dump(int(expr * 1000), local_vars) + right
+                    raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr)
+                expr = self._dump(expr, local_vars) + right
+                break
             else:
-                raise self.Exception('Unsupported object {obj}'.format(**locals()), expr=expr)
+                raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
 
         if expr.startswith('void '):
             left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
@@ -430,24 +451,45 @@ class JSInterpreter(object):
                 (?P<try>try)\s*\{|
                 (?P<if>if)\s*\(|
                 (?P<switch>switch)\s*\(|
-                (?P<for>for)\s*\(
+                (?P<for>for)\s*\(|
+                (?P<while>while)\s*\(
                 ''', expr)
         md = m.groupdict() if m else {}
         if md.get('if'):
             cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
-            if_expr, expr = self._separate_at_paren(expr.lstrip())
-            # TODO: "else if" is not handled
+            if expr.startswith('{'):
+                if_expr, expr = self._separate_at_paren(expr)
+            else:
+                # may lose ... else ... because of ll.368-374
+                if_expr, expr = self._separate_at_paren(expr, delim=';')
             else_expr = None
-            m = re.match(r'else\s*{', expr)
+            m = re.match(r'else\s*(?P<block>\{)?', expr)
             if m:
-                else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                if m.group('block'):
+                    else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                else:
+                    # handle subset ... else if (...) {...} else ...
+                    # TODO: make interpret_statement do this properly, if possible
+                    exprs = list(self._separate(expr[m.end():], delim='}', max_split=2))
+                    if len(exprs) > 1:
+                        if re.match(r'\s*if\s*\(', exprs[0]) and re.match(r'\s*else\b', exprs[1]):
+                            else_expr = exprs[0] + '}' + exprs[1]
+                            expr = (exprs[2] + '}') if len(exprs) == 3 else None
+                        else:
+                            else_expr = exprs[0]
+                            exprs.append('')
+                            expr = '}'.join(exprs[1:])
+                    else:
+                        else_expr = exprs[0]
+                        expr = None
+                    else_expr = else_expr.lstrip() + '}'
             cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
             ret, should_abort = self.interpret_statement(
                 if_expr if cndn else else_expr, local_vars, allow_recursion)
             if should_abort:
                 return ret, True
 
-        if md.get('try'):
+        elif md.get('try'):
             try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
             err = None
             try:
@@ -484,8 +526,8 @@ class JSInterpreter(object):
             if err:
                 raise err
 
-        elif md.get('for'):
-            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
+        elif md.get('for') or md.get('while'):
+            init_or_cond, remaining = self._separate_at_paren(expr[m.end() - 1:])
             if remaining.startswith('{'):
                 body, expr = self._separate_at_paren(remaining)
             else:
@@ -496,11 +538,12 @@ class JSInterpreter(object):
                     body = 'switch(%s){%s}' % (switch_val, body)
                 else:
                     body, expr = remaining, ''
-            start, cndn, increment = self._separate(constructor, ';')
-            self.interpret_expression(start, local_vars, allow_recursion)
-            while True:
-                if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
-                    break
+            if md.get('for'):
+                start, cndn, increment = self._separate(init_or_cond, ';')
+                self.interpret_expression(start, local_vars, allow_recursion)
+            else:
+                cndn, increment = init_or_cond, None
+            while _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
                 try:
                     ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
                     if should_abort:
@@ -509,7 +552,8 @@ class JSInterpreter(object):
                     break
                 except JS_Continue:
                     pass
-                self.interpret_expression(increment, local_vars, allow_recursion)
+                if increment:
+                    self.interpret_expression(increment, local_vars, allow_recursion)
 
         elif md.get('switch'):
             switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
@@ -764,6 +808,10 @@ class JSInterpreter(object):
                     if idx >= len(obj):
                         return None
                     return ord(obj[idx])
+                elif member == 'replace':
+                    assertion(isinstance(obj, compat_str), 'must be applied on a string')
+                    assertion(len(argvals) == 2, 'takes exactly two arguments')
+                    return re.sub(argvals[0], argvals[1], obj)
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)
@@ -795,6 +843,10 @@ class JSInterpreter(object):
             raise self.Exception('Cannot return from an expression', expr)
         return ret
 
+    def interpret_iter(self, list_txt, local_vars, allow_recursion):
+        for v in self._separate(list_txt):
+            yield self.interpret_expression(v, local_vars, allow_recursion)
+
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
         obj = {}

From 37cbdfa0e7c9d00d450af32dc9cdaf93cbfc4576 Mon Sep 17 00:00:00 2001
From: Brian Marks <bm1549@users.noreply.github.com>
Date: Thu, 2 Feb 2023 11:58:21 -0500
Subject: [PATCH 68/78] [americastestkitchen] Add support for downloading
 entire series (#31493)

Also
* support new sites and URL patterns
* back-port from yt-dlp

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/americastestkitchen.py | 115 +++++++++++++++-----
 1 file changed, 88 insertions(+), 27 deletions(-)

diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py
index be960c0f9..08d3604e9 100644
--- a/youtube_dl/extractor/americastestkitchen.py
+++ b/youtube_dl/extractor/americastestkitchen.py
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class AmericasTestKitchenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
         'md5': 'b861c3e365ac38ad319cfd509c30577f',
@@ -23,15 +23,20 @@ class AmericasTestKitchenIE(InfoExtractor):
             'id': '5b400b9ee338f922cb06450c',
             'title': 'Japanese Suppers',
             'ext': 'mp4',
+            'display_id': 'weeknight-japanese-suppers',
             'description': 'md5:64e606bfee910627efc4b5f050de92b3',
-            'thumbnail': r're:^https?://',
-            'timestamp': 1523318400,
-            'upload_date': '20180410',
-            'release_date': '20180410',
+            'timestamp': 1523304000,
+            'upload_date': '20180409',
+            'release_date': '20180409',
             'series': "America's Test Kitchen",
+            'season': 'Season 18',
             'season_number': 18,
             'episode': 'Japanese Suppers',
             'episode_number': 15,
+            'duration': 1376,
+            'thumbnail': r're:^https?://',
+            'average_rating': 0,
+            'view_count': int,
         },
         'params': {
             'skip_download': True,
@@ -44,15 +49,20 @@ class AmericasTestKitchenIE(InfoExtractor):
             'id': '5fbe8c61bda2010001c6763b',
             'title': 'Simple Chicken Dinner',
             'ext': 'mp4',
+            'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
             'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
-            'thumbnail': r're:^https?://',
-            'timestamp': 1610755200,
-            'upload_date': '20210116',
-            'release_date': '20210116',
+            'timestamp': 1610737200,
+            'upload_date': '20210115',
+            'release_date': '20210115',
             'series': "America's Test Kitchen",
+            'season': 'Season 21',
             'season_number': 21,
             'episode': 'Simple Chicken Dinner',
             'episode_number': 3,
+            'duration': 1397,
+            'thumbnail': r're:^https?://',
+            'view_count': int,
+            'average_rating': 0,
         },
         'params': {
             'skip_download': True,
@@ -60,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
     }, {
         'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
         'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
+        'only_matching': True,
     }, {
         'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
         'only_matching': True,
@@ -94,7 +110,7 @@ class AmericasTestKitchenIE(InfoExtractor):
 
 
 class AmericasTestKitchenSeasonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
     _TESTS = [{
         # ATK Season
         'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@@ -105,48 +121,93 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
         'playlist_count': 13,
     }, {
         # Cooks Country Season
-        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
         'info_dict': {
             'id': 'season_12',
             'title': 'Season 12',
         },
         'playlist_count': 13,
+    }, {
+        # America's Test Kitchen Series
+        'url': 'https://www.americastestkitchen.com/',
+        'info_dict': {
+            'id': 'americastestkitchen',
+            'title': 'America\'s Test Kitchen',
+        },
+        'playlist_count': 558,
+    }, {
+        # Cooks Country Series
+        'url': 'https://www.americastestkitchen.com/cookscountry',
+        'info_dict': {
+            'id': 'cookscountry',
+            'title': 'Cook\'s Country',
+        },
+        'playlist_count': 199,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cookscountry/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cookscountry.com',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cooksillustrated/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cooksillustrated.com',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        show_name, season_number = re.match(self._VALID_URL, url).groups()
-        season_number = int(season_number)
+        match = re.match(self._VALID_URL, url).groupdict()
+        show = match.get('show2')
+        show_path = ('/' + show) if show else ''
+        show = show or match['show']
+        season_number = int_or_none(match.get('season'))
+
+        slug, title = {
+            'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
+            'cookscountry': ('cco', 'Cook\'s Country'),
+            'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
+        }[show]
 
-        slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+        facet_filters = [
+            'search_document_klass:episode',
+            'search_show_slug:' + slug,
+        ]
 
-        season = 'Season %d' % season_number
+        if season_number:
+            playlist_id = 'season_%d' % season_number
+            playlist_title = 'Season %d' % season_number
+            facet_filters.append('search_season_list:' + playlist_title)
+        else:
+            playlist_id = show
+            playlist_title = title
 
         season_search = self._download_json(
             'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
-            season, headers={
-                'Origin': 'https://www.%s.com' % show_name,
+            playlist_id, headers={
+                'Origin': 'https://www.americastestkitchen.com',
                 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
                 'X-Algolia-Application-Id': 'Y1FNZXUI30',
             }, query={
-                'facetFilters': json.dumps([
-                    'search_season_list:' + season,
-                    'search_document_klass:episode',
-                    'search_show_slug:' + slug,
-                ]),
-                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
+                'facetFilters': json.dumps(facet_filters),
+                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
                 'attributesToHighlight': '',
                 'hitsPerPage': 1000,
             })
 
         def entries():
             for episode in (season_search.get('hits') or []):
-                search_url = episode.get('search_url')
+                search_url = episode.get('search_url')  # always formatted like '/episode/123-title-of-episode'
                 if not search_url:
                     continue
                 yield {
                     '_type': 'url',
-                    'url': 'https://www.%s.com%s' % (show_name, search_url),
-                    'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+                    'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
+                    'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
                     'title': episode.get('title'),
                     'description': episode.get('description'),
                     'timestamp': unified_timestamp(episode.get('search_document_date')),
@@ -156,4 +217,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
                 }
 
         return self.playlist_result(
-            entries(), 'season_%d' % season_number, season)
+            entries(), playlist_id, playlist_title)

From 297fbff23b347612a5f6002b40adba9dfad85413 Mon Sep 17 00:00:00 2001
From: Rodrigo Dias <roycocup@users.noreply.github.com>
Date: Thu, 2 Feb 2023 17:10:09 +0000
Subject: [PATCH 69/78] [doc] Fixed typo appearing to promise an example
 (#31489)

Resolves #31425

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cd888c731..6e07ddb1c 100644
--- a/README.md
+++ b/README.md
@@ -632,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use
 
 The current default template is `%(title)s-%(id)s.%(ext)s`.
 
-In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
 
 #### Output template and Windows batch files
 

From 807e593a32a1ace8fa0be8129fc5071d86516c99 Mon Sep 17 00:00:00 2001
From: JChris246 <43832407+JChris246@users.noreply.github.com>
Date: Thu, 2 Feb 2023 13:12:36 -0400
Subject: [PATCH 70/78] [cammodels] fix and improve extractor (#31453)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/cammodels.py | 34 +++++++++----------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py
index 1eb81b75e..d2e860b24 100644
--- a/youtube_dl/extractor/cammodels.py
+++ b/youtube_dl/extractor/cammodels.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
     int_or_none,
     url_or_none,
 )
@@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
     def _real_extract(self, url):
         user_id = self._match_id(url)
 
-        webpage = self._download_webpage(
-            url, user_id, headers=self.geo_verification_headers())
-
-        manifest_root = self._html_search_regex(
-            r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
-
-        if not manifest_root:
-            ERRORS = (
-                ("I'm offline, but let's stay connected", 'This user is currently offline'),
-                ('in a private show', 'This user is in a private show'),
-                ('is currently performing LIVE', 'This model is currently performing live'),
-            )
-            for pattern, message in ERRORS:
-                if pattern in webpage:
-                    error = message
-                    expected = True
-                    break
-            else:
-                error = 'Unable to find manifest URL root'
-                expected = False
-            raise ExtractorError(error, expected=expected)
-
         manifest = self._download_json(
-            '%s%s.json' % (manifest_root, user_id), user_id)
+            'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
 
         formats = []
+        thumbnails = []
         for format_id, format_dict in manifest['formats'].items():
             if not isinstance(format_dict, dict):
                 continue
@@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
                         'preference': -1,
                     })
                 else:
+                    if format_id == 'jpeg':
+                        thumbnails.append({
+                            'url': f['url'],
+                            'width': f['width'],
+                            'height': f['height'],
+                            'format_id': f['format_id'],
+                        })
                     continue
                 formats.append(f)
         self._sort_formats(formats)
@@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
         return {
             'id': user_id,
             'title': self._live_title(user_id),
+            'thumbnails': thumbnails,
             'is_live': True,
             'formats': formats,
             'age_limit': 18

From e9611a2a3603ee201d0c1ba99e8bfd8ec1e697cd Mon Sep 17 00:00:00 2001
From: Leon Etienne <40911701+Leonetienne@users.noreply.github.com>
Date: Thu, 2 Feb 2023 18:13:39 +0100
Subject: [PATCH 71/78] [pr0gramm] implement InfoExtractor, Resolves #31433
 (#31434)

* [pr0gramm] implement infoextractor

* [pr0gramm] remove misplaced comment, uncapture regex-group

* [pr0gramm]: specify utf-8 coding

* [pr0gramm]: add trailing comma to lists for maintainability

* [pr0gramm]: ie only sets upload_date attribute

* [pr0gramm]: add video_id to title

* [pr0gramm]: more forgiving _valid_url regex

* [pr0gramm]: add uploader to title, if set

* Discriminate URL pattern

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |   4 ++
 youtube_dl/extractor/pr0gramm.py   | 105 +++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 youtube_dl/extractor/pr0gramm.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 947cbe8fd..cf0388ed2 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1667,3 +1667,7 @@ from .zingmp3 import (
 )
 from .zoom import ZoomIE
 from .zype import ZypeIE
+from .pr0gramm import (
+    Pr0grammIE,
+    Pr0grammStaticIE,
+)
diff --git a/youtube_dl/extractor/pr0gramm.py b/youtube_dl/extractor/pr0gramm.py
new file mode 100644
index 000000000..b68224fd5
--- /dev/null
+++ b/youtube_dl/extractor/pr0gramm.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+import re
+from ..utils import (
+    merge_dicts,
+)
+
+
+class Pr0grammStaticIE(InfoExtractor):
+    # Possible urls:
+    # https://pr0gramm.com/static/5466437
+    _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://pr0gramm.com/static/5466437',
+        'md5': '52fa540d70d3edc286846f8ca85938aa',
+        'info_dict': {
+            'id': '5466437',
+            'ext': 'mp4',
+            'title': 'pr0gramm-5466437 by g11st',
+            'uploader': 'g11st',
+            'upload_date': '20221221',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        # Fetch media sources
+        entries = self._parse_html5_media_entries(url, webpage, video_id)
+        media_info = entries[0]
+
+        # this raises if there are no formats
+        self._sort_formats(media_info.get('formats') or [])
+
+        # Fetch author
+        uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
+
+        # Fetch approx upload timestamp from filename
+        # Have None-defaults in case the extraction fails
+        uploadDay = None
+        uploadMon = None
+        uploadYear = None
+        uploadTimestr = None
+        # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
+        m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
+
+        if (m):
+            # Up to a day of accuracy should suffice...
+            uploadDay = m.groupdict().get('day')
+            uploadMon = m.groupdict().get('mon')
+            uploadYear = m.groupdict().get('year')
+            uploadTimestr = uploadYear + uploadMon + uploadDay
+
+        return merge_dicts({
+            'id': video_id,
+            'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
+            'uploader': uploader,
+            'upload_date': uploadTimestr
+        }, media_info)
+
+
+# This extractor is for the primary url (used for sharing, and appears in the
+# location bar) Since this page loads the DOM via JS, yt-dl can't find any
+# video information here. So let's redirect to a compatibility version of
+# the site, which does contain the <video>-element  by itself,  without requiring
+# js to be ran.
+class Pr0grammIE(InfoExtractor):
+    # Possible urls:
+    # https://pr0gramm.com/new/546637
+    # https://pr0gramm.com/new/video/546637
+    # https://pr0gramm.com/top/546637
+    # https://pr0gramm.com/top/video/546637
+    # https://pr0gramm.com/user/g11st/uploads/5466437
+    # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
+    # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
+    # https://pr0gramm.com/user/froschler/1elf/5232030
+    # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
+    # https://pr0gramm.com/top/fruher war alles damals/5498175
+
+    _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
+    _TEST = {
+        'url': 'https://pr0gramm.com/new/video/5466437',
+        'info_dict': {
+            'id': '5466437',
+            'ext': 'mp4',
+            'title': 'pr0gramm-5466437 by g11st',
+            'uploader': 'g11st',
+            'upload_date': '20221221',
+        }
+    }
+
+    def _generic_title():
+        return "oof"
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        return self.url_result(
+            'https://pr0gramm.com/static/' + video_id,
+            video_id=video_id,
+            ie=Pr0grammStaticIE.ie_key())

From 98b0cf1cd05c493eae0f37aaa599d25d2848c0b0 Mon Sep 17 00:00:00 2001
From: Ruowang Sun <91006887+JohnnySunUmich@users.noreply.github.com>
Date: Thu, 2 Feb 2023 12:21:05 -0500
Subject: [PATCH 72/78] [Callin] Add new extractor (#31414)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/callin.py     | 74 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 75 insertions(+)
 create mode 100644 youtube_dl/extractor/callin.py

diff --git a/youtube_dl/extractor/callin.py b/youtube_dl/extractor/callin.py
new file mode 100644
index 000000000..341be479f
--- /dev/null
+++ b/youtube_dl/extractor/callin.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    traverse_obj,
+    try_get,
+)
+
+
+class CallinIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
+    _TESTS = [{
+        'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
+        'md5': '14ede27ee2c957b7e4db93140fc0745c',
+        'info_dict': {
+            'id': 'PrumRdSQJW',
+            'ext': 'mp4',
+            'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
+            'description': 'Or, why the government doesn’t like SpaceX',
+            'channel': 'The Pull Request',
+            'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
+        }
+    }, {
+        'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
+        'md5': '16f704ddbf82a27e3930533b12062f07',
+        'info_dict': {
+            'id': 'lzxMidUnjA',
+            'ext': 'mp4',
+            'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
+            'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
+            'channel': 'The DEBRIEF With Briahna Joy Gray',
+            'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
+        }
+    }]
+
+    def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', fatal=fatal, **kw),
+            video_id, transform_source=transform_source, fatal=fatal)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        next_data = self._search_nextjs_data(webpage, video_id)
+        episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
+        if not episode:
+            raise ExtractorError('Failed to find episode data')
+
+        title = episode.get('title') or self._og_search_title(webpage)
+        description = episode.get('description') or self._og_search_description(webpage)
+
+        formats = []
+        formats.extend(self._extract_m3u8_formats(
+            episode.get('m3u8'), video_id, 'mp4',
+            entry_protocol='m3u8_native', fatal=False))
+        self._sort_formats(formats)
+
+        channel = try_get(episode, lambda x: x['show']['title'], compat_str)
+        channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'channel': channel,
+            'channel_url': channel_url,
+        }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index cf0388ed2..f7bb4042f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -158,6 +158,7 @@ from .businessinsider import BusinessInsiderIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
+from .callin import CallinIE
 from .camdemy import (
     CamdemyIE,
     CamdemyFolderIE

From 6d829d811932b24be4d3cc8b6c1e0d46c2b1566c Mon Sep 17 00:00:00 2001
From: zhangeric-15 <71106422+zhangeric-15@users.noreply.github.com>
Date: Thu, 2 Feb 2023 12:26:31 -0500
Subject: [PATCH 73/78] [YouTube] Fix not finding videos listed under a
 channel's "shorts" subpage.  (#31409)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolves #31336

Co-authored-by: Jouni Järvinen <rautamiekka@users.noreply.github.com>
Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/youtube.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3d12e2e4a..28fdb086a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -315,7 +315,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         title = try_get(
             renderer,
             (lambda x: x['title']['runs'][0]['text'],
-             lambda x: x['title']['simpleText']), compat_str)
+             lambda x: x['title']['simpleText'],
+             lambda x: x['headline']['simpleText']), compat_str)
         description = try_get(
             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
             compat_str)
@@ -2207,6 +2208,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     IE_NAME = 'youtube:tab'
 
     _TESTS = [{
+        # Shorts
+        'url': 'https://www.youtube.com/@SuperCooperShorts/shorts',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'description': 'Short clips from Super Cooper Sundays!',
+            'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
+            'title': 'Super Cooper Shorts - Shorts',
+        }
+    }, {
+        # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
+        'url': 'https://www.youtube.com/@emergencyawesome/shorts',
+        'info_dict': {
+            'description': 'md5:592c080c06fef4de3c902c4a8eecd850',
+            'id': 'UCDiFRMQWpcp8_KD4vwIVicw',
+            'title': 'Emergency Awesome - Home',
+        },
+        'playlist_mincount': 5,
+    }, {
         # playlists, multipage
         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
         'playlist_mincount': 94,
@@ -2680,7 +2699,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     def _rich_grid_entries(self, contents):
         for content in contents:
-            video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
+            video_renderer = try_get(
+                content,
+                (lambda x: x['richItemRenderer']['content']['videoRenderer'],
+                 lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
+                dict)
             if video_renderer:
                 entry = self._video_entry(video_renderer)
                 if entry:

From be3392a0d491af81f353b4372d47d589fda54b0c Mon Sep 17 00:00:00 2001
From: Epsilonator <28658223+clueless-skywatcher@users.noreply.github.com>
Date: Thu, 2 Feb 2023 23:03:09 +0530
Subject: [PATCH 74/78] [Blerp] Add new extractor (#31398)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/blerp.py      | 173 +++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |   1 +
 2 files changed, 174 insertions(+)
 create mode 100644 youtube_dl/extractor/blerp.py

diff --git a/youtube_dl/extractor/blerp.py b/youtube_dl/extractor/blerp.py
new file mode 100644
index 000000000..355daef6e
--- /dev/null
+++ b/youtube_dl/extractor/blerp.py
@@ -0,0 +1,173 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from ..utils import (
+    strip_or_none,
+    traverse_obj,
+)
+from .common import InfoExtractor
+
+
+class BlerpIE(InfoExtractor):
+    IE_NAME = 'blerp'
+    _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [{
+        'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
+        'info_dict': {
+            'id': '6320fe8745636cb4dd677a5a',
+            'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
+            'uploader': 'luminousaj',
+            'uploader_id': '5fb81e51aa66ae000c395478',
+            'ext': 'mp3',
+            'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
+        }
+    }, {
+        'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
+        'info_dict': {
+            'id': '5bc94ef4796001000498429f',
+            'title': 'Yee',
+            'uploader': '179617322678353920',
+            'uploader_id': '5ba99cf71386730004552c42',
+            'ext': 'mp3',
+            'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
+        }
+    }]
+
+    _GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
+    _GRAPHQL_QUERY = (
+        '''query webBitePageGetBite($_id: MongoID!) {
+            web {
+                biteById(_id: $_id) {
+                    ...bitePageFrag
+                    __typename
+                }
+                __typename
+            }
+        }
+
+        fragment bitePageFrag on Bite {
+            _id
+            title
+            userKeywords
+            keywords
+            color
+            visibility
+            isPremium
+            owned
+            price
+            extraReview
+            isAudioExists
+            image {
+                filename
+                original {
+                    url
+                    __typename
+                }
+                __typename
+            }
+            userReactions {
+                _id
+                reactions
+                createdAt
+                __typename
+            }
+            topReactions
+            totalSaveCount
+            saved
+            blerpLibraryType
+            license
+            licenseMetaData
+            playCount
+            totalShareCount
+            totalFavoriteCount
+            totalAddedToBoardCount
+            userCategory
+            userAudioQuality
+            audioCreationState
+            transcription
+            userTranscription
+            description
+            createdAt
+            updatedAt
+            author
+            listingType
+            ownerObject {
+                _id
+                username
+                profileImage {
+                    filename
+                    original {
+                        url
+                        __typename
+                    }
+                    __typename
+                }
+                __typename
+            }
+            transcription
+            favorited
+            visibility
+            isCurated
+            sourceUrl
+            audienceRating
+            strictAudienceRating
+            ownerId
+            reportObject {
+                reportedContentStatus
+                __typename
+            }
+            giphy {
+                mp4
+                gif
+                __typename
+            }
+            audio {
+                filename
+                original {
+                    url
+                    __typename
+                }
+                mp3 {
+                    url
+                    __typename
+                }
+                __typename
+            }
+            __typename
+        }
+
+        ''')
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        data = {
+            'operationName': self._GRAPHQL_OPERATIONNAME,
+            'query': self._GRAPHQL_QUERY,
+            'variables': {
+                '_id': audio_id
+            }
+        }
+
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        json_result = self._download_json('https://api.blerp.com/graphql',
+                                          audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
+
+        bite_json = json_result['data']['web']['biteById']
+
+        info_dict = {
+            'id': bite_json['_id'],
+            'url': bite_json['audio']['mp3']['url'],
+            'title': bite_json['title'],
+            'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
+            'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
+            'ext': 'mp3',
+            'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
+        }
+
+        return info_dict
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index f7bb4042f..b8db4c818 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -138,6 +138,7 @@ from .bleacherreport import (
     BleacherReportIE,
     BleacherReportCMSIE,
 )
+from .blerp import BlerpIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bongacams import BongaCamsIE

From bc6f94e459694f541a2a1078fad59b02f2fc9d4c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 23:19:03 +0000
Subject: [PATCH 75/78] [FIFA] Back-port extractor from yt-dlp (#31385)

---
 youtube_dl/extractor/extractors.py |   1 +
 youtube_dl/extractor/fifa.py       | 101 +++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 youtube_dl/extractor/fifa.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b8db4c818..31a3e588e 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -376,6 +376,7 @@ from .fc2 import (
     FC2EmbedIE,
 )
 from .fczenit import FczenitIE
+from .fifa import FifaIE
 from .filmon import (
     FilmOnIE,
     FilmOnChannelIE,
diff --git a/youtube_dl/extractor/fifa.py b/youtube_dl/extractor/fifa.py
new file mode 100644
index 000000000..15157774e
--- /dev/null
+++ b/youtube_dl/extractor/fifa.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    int_or_none,
+    traverse_obj,
+    unified_timestamp,
+)
+
+if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
+
+    BaseInfoExtractor = InfoExtractor
+
+    import re
+
+    class InfoExtractor(BaseInfoExtractor):
+
+        @classmethod
+        def _match_valid_url(cls, url):
+            return re.match(cls._VALID_URL, url)
+
+
+class FifaIE(InfoExtractor):
+    _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
+        'info_dict': {
+            'id': '7on10qPcnyLajDDU3ntg6y',
+            'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
+            'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments'],
+            'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
+            'duration': 8165,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
+        'info_dict': {
+            'id': '1cg5r5Qt6Qt12ilkDgb1sV',
+            'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
+            'description': 'md5:d908c74ee66322b804ae2e521b02a855',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments', 'Highlights'],
+            'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
+            'duration': 902,
+            'release_timestamp': 1404777600,
+            'release_date': '20140708',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
+        'info_dict': {
+            'id': '3C6gQH9C2DLwzNx7BMRQdp',
+            'title': 'Josimar goal against Northern Ireland | Classic Goals',
+            'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments', 'Goal'],
+            'duration': 28,
+            'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        video_id, locale = self._match_valid_url(url).group('id', 'locale')
+        webpage = self._download_webpage(url, video_id)
+
+        preconnect_link = self._search_regex(
+            r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
+
+        video_details = self._download_json(
+            '{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
+
+        preplay_parameters = self._download_json(
+            '{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
+
+        content_data = self._download_json(
+            # 1. query string is expected to be sent as-is
+            # 2. `sig` must be appended
+            # 3. if absent, the call appears to work but the manifest is bad (404)
+            'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
+            video_id, 'Downloading Content Data')
+
+        # formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
+        formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_details['title'],
+            'description': video_details.get('description'),
+            'duration': int_or_none(video_details.get('duration')),
+            'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
+            'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
+            'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
+            'formats': formats,
+            'subtitles': subtitles,
+        }

From f316f5d4e391ca40273bce65c67bedc16ae99172 Mon Sep 17 00:00:00 2001
From: afterdelight <39585663+afterdelight@users.noreply.github.com>
Date: Fri, 3 Feb 2023 06:20:14 +0700
Subject: [PATCH 76/78] [xhamster] add support for new domain xhvid.com
 (#31370)

---
 youtube_dl/extractor/xhamster.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index f764021ba..e17947fc6 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -24,7 +24,7 @@ from ..utils import (
 
 
 class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:.+?\.)?%s/
@@ -123,6 +123,9 @@ class XHamsterIE(InfoExtractor):
     }, {
         'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
         'only_matching': True,
+    }, {
+        'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -433,6 +436,9 @@ class XHamsterUserIE(InfoExtractor):
     }, {
         'url': 'https://xhday.com/users/mobhunter',
         'only_matching': True,
+    }, {
+        'url': 'https://xhvid.com/users/pelushe21',
+        'only_matching': True,
     }]
 
     def _entries(self, user_id):

From 9d17948b5a1cc48bd526b1163292415577131c31 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 23:25:44 +0000
Subject: [PATCH 77/78] [myvideoge] Add new extractor (#31360)

NB download tests on CI servers blocked

Co-authored-by: Alfonso Solbes <fonk666@gmail.com>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/myvideoge.py  | 87 ++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 youtube_dl/extractor/myvideoge.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 31a3e588e..96b27b179 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -728,6 +728,7 @@ from .myvi import (
     MyviIE,
     MyviEmbedIE,
 )
+from .myvideoge import MyVideoGeIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
     NationalGeographicVideoIE,
diff --git a/youtube_dl/extractor/myvideoge.py b/youtube_dl/extractor/myvideoge.py
new file mode 100644
index 000000000..efbfda7a6
--- /dev/null
+++ b/youtube_dl/extractor/myvideoge.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    get_element_by_id,
+    get_element_by_class,
+    int_or_none,
+    js_to_json,
+    MONTH_NAMES,
+    qualities,
+    unified_strdate,
+)
+
+
+class MyVideoGeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://www.myvideo.ge/v/3941048',
+        'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
+        'info_dict': {
+            'id': '3941048',
+            'ext': 'mp4',
+            'title': 'The best prikol',
+            'upload_date': '20200611',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'chixa33',
+            'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
+        },
+        # working from local dev system
+        'skip': 'site blocks CI servers',
+    }
+    _MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
+
+    _quality = staticmethod(qualities(('SD', 'HD')))
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = (
+            self._og_search_title(webpage, default=None)
+            or clean_html(get_element_by_class('my_video_title', webpage))
+            or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
+
+        jwplayer_sources = self._parse_json(
+            self._search_regex(
+                r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
+            or '',
+            video_id, transform_source=js_to_json, fatal=False)
+
+        formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
+        for f in formats or []:
+            f['preference'] = self._quality(f['format_id'])
+        self._sort_formats(formats)
+
+        description = (
+            self._og_search_description(webpage)
+            or get_element_by_id('long_desc_holder', webpage)
+            or self._html_search_meta('description', webpage))
+
+        uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
+
+        upload_date = get_element_by_class('mv_vid_upl_date', webpage)
+        # as ka locale may not be present roll a local date conversion
+        upload_date = (unified_strdate(
+            # translate any ka month to an en one
+            re.sub('|'.join(self._MONTH_NAMES_KA),
+                   lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
+                   upload_date, re.I))
+            if upload_date else None)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'uploader': uploader,
+            'formats': formats,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'upload_date': upload_date,
+            'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
+            'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
+            'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
+        }

From 384f632e8a9b61e864a26678d85b2b39933b9bae Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 3 Feb 2023 21:10:07 +0000
Subject: [PATCH 78/78] [ITV] Overhaul ITV extractor (#30266)

* support ITVX URLs (thanks Vangelis66)
* support legacy ITV Hub URLs
* include extraction fix 4c57dd2 from sleaux-meaux 3 May 2021
* include extraction fix 6fbcc16, fix by staubichsauger & pukkandan
* work-around duration parsing pending fix to utils.parse_duration
* apply default vanilla UA for pages and media to avoid site blocking
* also detect and report `Episode not found` instead of generic 404
* rework ITVBTCCIE with geo-block detection, best effort geo-restriction handling, news article support
* fix tests
---
 youtube_dl/extractor/itv.py | 380 ++++++++++++++++++++++++++++--------
 1 file changed, 298 insertions(+), 82 deletions(-)

diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index e86c40b42..7026139ea 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -3,123 +3,266 @@ from __future__ import unicode_literals
 
 import json
 import re
+import sys
 
 from .common import InfoExtractor
 from .brightcove import BrightcoveNewIE
+from ..compat import (
+    compat_HTTPError,
+    compat_integer_types,
+    compat_kwargs,
+    compat_urlparse,
+)
 from ..utils import (
     clean_html,
     determine_ext,
+    error_to_compat_str,
     extract_attributes,
-    get_element_by_class,
-    JSON_LD_RE,
+    ExtractorError,
+    get_element_by_attribute,
+    int_or_none,
     merge_dicts,
     parse_duration,
+    parse_iso8601,
+    remove_start,
     smuggle_url,
+    strip_or_none,
+    traverse_obj,
     url_or_none,
+    urljoin,
 )
 
 
-class ITVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
-    _GEO_COUNTRIES = ['GB']
+class ITVBaseIE(InfoExtractor):
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        transform_source = kw.pop('transform_source', None)
+        fatal = kw.pop('fatal', True)
+        return self._parse_json(
+            self._search_regex(
+                r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
+                webpage, 'next.js data', group='js', fatal=fatal, **kw),
+            video_id, transform_source=transform_source, fatal=fatal)
+
+    def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
+        if errnote is False:
+            return False
+        if errnote is None:
+            errnote = 'Unable to download webpage'
+
+        errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
+        if fatal:
+            raise ExtractorError(errmsg, sys.exc_info()[2], cause=err, video_id=video_id)
+        else:
+            self._downloader.report_warning(errmsg)
+            return False
+
+    @staticmethod
+    def _vanilla_ua_header():
+        return {'User-agent': 'Mozilla/5.0'}
+
+    def _download_webpage_handle(self, url, video_id, *args, **kwargs):
+        # specialised to (a) use vanilla UA (b) detect geo-block
+        params = self._downloader.params
+        nkwargs = {}
+        if (
+                'user_agent' not in params
+                and not any(re.match(r'(?i)user-agent\s*:', h)
+                            for h in (params.get('headers') or []))
+                and 'User-agent' not in (kwargs.get('headers') or {})):
+
+            kwargs.setdefault('headers', {})
+            kwargs['headers'] = self._vanilla_ua_header()
+            nkwargs = kwargs
+        if kwargs.get('expected_status') is not None:
+            exp = kwargs['expected_status']
+            if isinstance(exp, compat_integer_types):
+                exp = [exp]
+            if isinstance(exp, (list, tuple)) and 403 not in exp:
+                kwargs['expected_status'] = [403]
+                kwargs['expected_status'].extend(exp)
+                nkwargs = kwargs
+        else:
+            kwargs['expected_status'] = 403
+            nkwargs = kwargs
+
+        if nkwargs:
+            kwargs = compat_kwargs(kwargs)
+
+        ret = super(ITVBaseIE, self)._download_webpage_handle(url, video_id, *args, **kwargs)
+        if ret is False:
+            return ret
+        webpage, urlh = ret
+
+        if urlh.getcode() == 403:
+            # geo-block error is like this, with an unnecessary 'Of':
+            # '{\n  "Message" : "Request Originated Outside Of Allowed Geographic Region",\
+            # \n  "TransactionId" : "oas-magni-475082-xbYF0W"\n}'
+            if '"Request Originated Outside Of Allowed Geographic Region"' in webpage:
+                self.raise_geo_restricted(countries=['GB'])
+            ret = self.__handle_request_webpage_error(
+                compat_HTTPError(urlh.geturl(), 403, 'HTTP Error 403: Forbidden', urlh.headers, urlh),
+                fatal=kwargs.get('fatal'))
+
+        return ret
+
+
+class ITVIE(ITVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
+    _IE_DESC = 'ITVX'
     _TESTS = [{
+        'note': 'Hub URLs redirect to ITVX',
         'url': 'https://www.itv.com/hub/liar/2a4547a0012',
-        'info_dict': {
-            'id': '2a4547a0012',
-            'ext': 'mp4',
-            'title': 'Liar - Series 2 - Episode 6',
-            'description': 'md5:d0f91536569dec79ea184f0a44cca089',
-            'series': 'Liar',
-            'season_number': 2,
-            'episode_number': 6,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
+        'only_matching': True,
     }, {
-        # unavailable via data-playlist-url
+        'note': 'Hub page unavailable via data-playlist-url (404 now)',
         'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
         'only_matching': True,
     }, {
-        # InvalidVodcrid
+        'note': 'Hub page with InvalidVodcrid (404 now)',
         'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
         'only_matching': True,
     }, {
-        # ContentUnavailable
+        'note': 'Hub page with ContentUnavailable (404 now)',
         'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
         'only_matching': True,
-    }]
+    }, {
+        'note': 'ITVX, or itvX, show',
+        'url': 'https://www.itv.com/watch/vera/1a7314/1a7314a0014',
+        'md5': 'bd0ad666b2c058fffe7d036785880064',
+        'info_dict': {
+            'id': '1a7314a0014',
+            'ext': 'mp4',
+            'title': 'Vera - Series 3 - Episode 4 - Prodigal Son',
+            'description': 'Vera and her team investigate the fatal stabbing of an ex-Met police officer outside a busy Newcastle nightclub - but there aren\'t many clues.',
+            'timestamp': 1653591600,
+            'upload_date': '20220526',
+            'uploader': 'ITVX',
+            'thumbnail': r're:https://\w+\.itv\.com/images/(?:\w+/)+\d+x\d+\?',
+            'duration': 5340.8,
+            'age_limit': 16,
+            'series': 'Vera',
+            'series_number': 3,
+            'episode': 'Prodigal Son',
+            'episode_number': 4,
+            'channel': 'ITV3',
+            'categories': list,
+        },
+        'params': {
+            # m3u8 download
+            # 'skip_download': True,
+        },
+        'skip': 'only available in UK',
+    }, {
+        'note': 'Latest ITV news bulletin: details change daily',
+        'url': 'https://www.itv.com/watch/news/varies-but-is-not-checked/6js5d0f',
+        'info_dict': {
+            'id': '6js5d0f',
+            'ext': 'mp4',
+            'title': r're:The latest ITV News headlines - \S.+',
+            'description': r'''re:.* today's top stories from the ITV News team.$''',
+            'timestamp': int,
+            'upload_date': r're:2\d\d\d(?:0[1-9]|1[0-2])(?:[012][1-9]|3[01])',
+            'uploader': 'ITVX',
+            'thumbnail': r're:https://images\.ctfassets\.net/(?:\w+/)+[\w.]+\.(?:jpg|png)',
+            'duration': float,
+            'age_limit': None,
+        },
+        'params': {
+            # variable download
+            # 'skip_download': True,
+        },
+        'skip': 'only available in UK',
+    }
+    ]
+
+    def _og_extract(self, webpage, require_title=False):
+        return {
+            'title': self._og_search_title(webpage, fatal=require_title),
+            'description': self._og_search_description(webpage, default=None),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'uploader': self._og_search_property('site_name', webpage, default=None),
+        }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
+
+        # now quite different params!
         params = extract_attributes(self._search_regex(
-            r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
+            r'''(<[^>]+\b(?:class|data-testid)\s*=\s*("|')genie-container\2[^>]*>)''',
+            webpage, 'params'))
+
+        ios_playlist_url = traverse_obj(
+            params, 'data-video-id', 'data-video-playlist',
+            get_all=False, expected_type=url_or_none)
 
-        ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
-        hmac = params['data-video-hmac']
         headers = self.geo_verification_headers()
         headers.update({
             'Accept': 'application/vnd.itv.vod.playlist.v2+json',
             'Content-Type': 'application/json',
-            'hmac': hmac.upper(),
         })
         ios_playlist = self._download_json(
             ios_playlist_url, video_id, data=json.dumps({
                 'user': {
-                    'itvUserId': '',
                     'entitlements': [],
-                    'token': ''
                 },
                 'device': {
-                    'manufacturer': 'Safari',
-                    'model': '5',
+                    'manufacturer': 'Mobile Safari',
+                    'model': '5.1',
                     'os': {
-                        'name': 'Windows NT',
-                        'version': '6.1',
-                        'type': 'desktop'
+                        'name': 'iOS',
+                        'version': '5.0',
+                        'type': ' mobile'
                     }
                 },
                 'client': {
                     'version': '4.1',
-                    'id': 'browser'
+                    'id': 'browser',
+                    'supportsAdPods': True,
+                    'service': 'itv.x',
+                    'appversion': '2.43.28',
                 },
                 'variantAvailability': {
+                    'player': 'hls',
                     'featureset': {
                         'min': ['hls', 'aes', 'outband-webvtt'],
                         'max': ['hls', 'aes', 'outband-webvtt']
                     },
-                    'platformTag': 'dotcom'
+                    'platformTag': 'mobile'
                 }
             }).encode(), headers=headers)
         video_data = ios_playlist['Playlist']['Video']
-        ios_base_url = video_data.get('Base')
+        ios_base_url = traverse_obj(video_data, 'Base', expected_type=url_or_none)
+
+        media_url = (
+            (lambda u: url_or_none(urljoin(ios_base_url, u)))
+            if ios_base_url else url_or_none)
 
         formats = []
-        for media_file in (video_data.get('MediaFiles') or []):
-            href = media_file.get('Href')
+        for media_file in traverse_obj(video_data, 'MediaFiles', expected_type=list) or []:
+            href = traverse_obj(media_file, 'Href', expected_type=media_url)
             if not href:
                 continue
-            if ios_base_url:
-                href = ios_base_url + href
             ext = determine_ext(href)
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    href, video_id, 'mp4', entry_protocol='m3u8_native',
+                    href, video_id, 'mp4', entry_protocol='m3u8',
                     m3u8_id='hls', fatal=False))
+
             else:
                 formats.append({
                     'url': href,
                 })
         self._sort_formats(formats)
+        for f in formats:
+            f.setdefault('http_headers', {})
+            f['http_headers'].update(self._vanilla_ua_header())
 
         subtitles = {}
-        subs = video_data.get('Subtitles') or []
-        for sub in subs:
-            if not isinstance(sub, dict):
-                continue
-            href = url_or_none(sub.get('Href'))
+        for sub in traverse_obj(video_data, 'Subtitles', expected_type=list) or []:
+            href = traverse_obj(sub, 'Href', expected_type=url_or_none)
             if not href:
                 continue
             subtitles.setdefault('en', []).append({
@@ -127,59 +270,132 @@ class ITVIE(InfoExtractor):
                 'ext': determine_ext(href, 'vtt'),
             })
 
-        info = self._search_json_ld(webpage, video_id, default={})
-        if not info:
-            json_ld = self._parse_json(self._search_regex(
-                JSON_LD_RE, webpage, 'JSON-LD', '{}',
-                group='json_ld'), video_id, fatal=False)
-            if json_ld and json_ld.get('@type') == 'BreadcrumbList':
-                for ile in (json_ld.get('itemListElement:') or []):
-                    item = ile.get('item:') or {}
-                    if item.get('@type') == 'TVEpisode':
-                        item['@context'] = 'http://schema.org'
-                        info = self._json_ld(item, video_id, fatal=False) or {}
-                        break
+        next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
+        video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
+        title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
+        info = self._og_extract(webpage, require_title=not title)
+        tn = info.pop('thumbnail', None)
+        if tn:
+            info['thumbnails'] = [{'url': tn}]
+
+        # num. episode title
+        num_ep_title = video_data.get('numberedEpisodeTitle')
+        if not num_ep_title:
+            num_ep_title = clean_html(get_element_by_attribute('data-testid', 'episode-hero-description-strong', webpage))
+            num_ep_title = num_ep_title and num_ep_title.rstrip(' -')
+        ep_title = strip_or_none(
+            video_data.get('episodeTitle')
+            or (num_ep_title.split('.', 1)[-1] if num_ep_title else None))
+        title = title or re.sub(r'\s+-\s+ITVX$', '', info['title'])
+        if ep_title and ep_title != title:
+            title = title + ' - ' + ep_title
+
+        def get_thumbnails():
+            tns = []
+            for w, x in (traverse_obj(video_data, ('imagePresets'), expected_type=dict) or {}).items():
+                if isinstance(x, dict):
+                    for y, z in x.items():
+                        tns.append({'id': w + '_' + y, 'url': z})
+            return tns or None
+
+        video_str = lambda *x: traverse_obj(
+            video_data, *x, get_all=False, expected_type=strip_or_none)
 
         return merge_dicts({
             'id': video_id,
-            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+            'title': title,
             'formats': formats,
             'subtitles': subtitles,
-            'duration': parse_duration(video_data.get('Duration')),
-            'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
+            # parsing hh:mm:ss:nnn not yet patched
+            'duration': parse_duration(re.sub(r'(\d{2})(:)(\d{3}$)', r'\1.\3', video_data.get('Duration') or '')),
+            'description': video_str('synopsis'),
+            'timestamp': traverse_obj(video_data, 'broadcastDateTime', 'dateTime', expected_type=parse_iso8601),
+            'thumbnails': get_thumbnails(),
+            'series': video_str('showTitle', 'programmeTitle'),
+            'series_number': int_or_none(video_data.get('seriesNumber')),
+            'episode': ep_title,
+            'episode_number': int_or_none((num_ep_title or '').split('.')[0]),
+            'channel': video_str('channel'),
+            'categories': traverse_obj(video_data, ('categories', 'formatted'), expected_type=list),
+            'age_limit': {False: 16, True: 0}.get(video_data.get('isChildrenCategory')),
         }, info)
 
 
-class ITVBTCCIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TEST = {
-        'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+class ITVBTCCIE(ITVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
+    _IE_DESC = 'ITV articles: News, British Touring Car Championship'
+    _TESTS = [{
+        'note': 'British Touring Car Championship',
+        'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
         'info_dict': {
             'id': 'btcc-2018-all-the-action-from-brands-hatch',
             'title': 'BTCC 2018: All the action from Brands Hatch',
         },
         'playlist_mincount': 9,
-    }
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
+    }, {
+        'note': 'redirects to /btcc/articles/...',
+        'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+        'only_matching': True,
+    }, {
+        'note': 'news article',
+        'url': 'https://www.itv.com/news/wales/2020-07-23/sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
+        'info_dict': {
+            'id': 'sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
+            'title': '''Sean Fletcher on why Wales' coastline should be your 'staycation' destination | ITV News''',
+        },
+        'playlist_mincount': 1,
+    }]
+
+    # should really be a class var of the BC IE
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+    BRIGHTCOVE_ACCOUNT = '1582188683001'
+    BRIGHTCOVE_PLAYER = 'HkiHLnNRx'
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, playlist_id)
-
-        entries = [
-            self.url_result(
-                smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
-                    # ITV does not like some GB IP ranges, so here are some
-                    # IP blocks it accepts
-                    'geo_ip_blocks': [
-                        '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
-                    ],
-                    'referrer': url,
-                }),
-                ie=BrightcoveNewIE.ie_key(), video_id=video_id)
-            for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
+        webpage, urlh = self._download_webpage_handle(url, playlist_id)
+        link = compat_urlparse.urlparse(urlh.geturl()).path.strip('/')
+
+        next_data = self._search_nextjs_data(webpage, playlist_id, fatal=False, default='{}')
+        path_prefix = compat_urlparse.urlparse(next_data.get('assetPrefix') or '').path.strip('/')
+        link = remove_start(link, path_prefix).strip('/')
+
+        content = traverse_obj(
+            next_data, ('props', 'pageProps', Ellipsis),
+            expected_type=lambda x: x if x['link'] == link else None,
+            get_all=False, default={})
+        content = traverse_obj(
+            content, ('body', 'content', Ellipsis, 'data'),
+            expected_type=lambda x: x if x.get('name') == 'Brightcove' or x.get('type') == 'Brightcove' else None)
+
+        contraband = {
+            # ITV does not like some GB IP ranges, so here are some
+            # IP blocks it accepts
+            'geo_ip_blocks': [
+                '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
+            ],
+            'referrer': urlh.geturl(),
+        }
+
+        def entries():
+
+            for data in content or []:
+                video_id = data.get('id')
+                if not video_id:
+                    continue
+                account = data.get('accountId') or self.BRIGHTCOVE_ACCOUNT
+                player = data.get('playerId') or self.BRIGHTCOVE_PLAYER
+                yield self.url_result(
+                    smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account, player, video_id), contraband),
+                    ie=BrightcoveNewIE.ie_key(), video_id=video_id)
+
+            # obsolete ?
+            for video_id in re.findall(r'''data-video-id=["'](\d+)''', webpage):
+                yield self.url_result(
+                    smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (self.BRIGHTCOVE_ACCOUNT, self.BRIGHTCOVE_PLAYER, video_id), contraband),
+                    ie=BrightcoveNewIE.ie_key(), video_id=video_id)
 
         title = self._og_search_title(webpage, fatal=False)
 
-        return self.playlist_result(entries, playlist_id, title)
+        return self.playlist_result(entries(), playlist_id, title)