From f318882955b90bead8206ee411641e65037b1011 Mon Sep 17 00:00:00 2001 From: cladmi Date: Wed, 16 Dec 2020 09:54:48 +0100 Subject: [PATCH] [motherless] Fix recent videos upload date extraction (closes #27661) Less than a week old videos use a '20h ago' or '1d ago' format. I kept the support for 'Ago' with uppercase start at is was already in the code. --- youtube_dl/extractor/motherless.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index b1615b4d8..6cc36b308 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -85,18 +85,27 @@ class MotherlessIE(InfoExtractor): or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id) age_limit = self._rta_search(webpage) view_count = str_to_int(self._html_search_regex( - (r'>(\d+)\s+Views<', r'Views\s+([^<]+)<'), + (r'>([\d,.]+)\s+Views<', # 1,234,567 Views + r'Views\s+([^<]+)<'), webpage, 'view count', fatal=False)) like_count = str_to_int(self._html_search_regex( - (r'>(\d+)\s+Favorites<', r'Favorited\s+([^<]+)<'), + (r'>([\d,.]+)\s+Favorites<', # 1,234 Favorites + r'Favorited\s+([^<]+)<'), webpage, 'like count', fatal=False)) upload_date = self._html_search_regex( (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', + r'class=["\']count[^>]+>(\d+[hd])\s+[aA]go<', # 20h/1d ago r'Uploaded\s+([^<]+)<'), webpage, 'upload date') - if 'Ago' in upload_date: - days = int(re.search(r'([0-9]+)', upload_date).group(1)) - upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') + relative = re.match(r'(\d+)([hd])$', upload_date) + if relative: + delta = int(relative.group(1)) + unit = relative.group(2) + if unit == 'h': + delta_t = datetime.timedelta(hours=delta) + else: # unit == 'd' + delta_t = datetime.timedelta(days=delta) + upload_date = (datetime.datetime.now() - delta_t).strftime('%Y%m%d') else: upload_date = unified_strdate(upload_date)