From 0cca4de1ae173b4f2623ce317acfb9a1a0c15ab0 Mon Sep 17 00:00:00 2001 From: anderstornkvist Date: Tue, 19 Jan 2021 10:41:04 +0100 Subject: [PATCH] [box] allow subdomain for box without file id in url --- youtube_dl/extractor/box.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/box.py b/youtube_dl/extractor/box.py index 40453f9a0..c7ee58eb5 100644 --- a/youtube_dl/extractor/box.py +++ b/youtube_dl/extractor/box.py @@ -14,7 +14,7 @@ from ..utils import ( class BoxIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P[^/]+)/file/(?P\d+)' + _VALID_URL = r'https?://(?P(?:[^.]+\.)?)app\.box\.com/s/(?P[^/]+)(?:/file/(?P\d+))?' _TEST = { 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538', 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43', @@ -30,20 +30,22 @@ class BoxIE(InfoExtractor): } def _real_extract(self, url): - shared_name, file_id = re.match(self._VALID_URL, url).groups() + prefix, shared_name, file_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, file_id) + if not file_id: + file_id = re.search(r'"typedID":\s*"f_(\d+)"', webpage).group(1) request_token = self._parse_json(self._search_regex( r'Box\.config\s*=\s*({.+?});', webpage, 'Box config'), file_id)['requestToken'] access_token = self._download_json( - 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id, + 'https://' + prefix + 'app.box.com/app-api/enduserapp/elements/tokens', file_id, 'Downloading token JSON metadata', data=json.dumps({'fileIDs': [file_id]}).encode(), headers={ 'Content-Type': 'application/json', 'X-Request-Token': request_token, 'X-Box-EndUser-API': 'sharedName=' + shared_name, })[file_id]['read'] - shared_link = 'https://app.box.com/s/' + shared_name + shared_link = 'https://' + prefix + 'app.box.com/s/' + shared_name f = self._download_json( 'https://api.box.com/2.0/files/' + file_id, file_id, 'Downloading file JSON metadata', headers={