From 27e5c22293c1e8d165cb23502788db6347195f88 Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 8 Aug 2022 19:42:25 -0700 Subject: [PATCH 1/9] Shorten title attribute for #29912 --- youtube_dl/extractor/twitter.py | 57 ++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index cfa7a7326..710fbc091 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -193,11 +193,27 @@ class TwitterIE(TwitterBaseIE): _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P\d+)' _TESTS = [{ + 'url': 'https://twitter.com/TulsiGabbard/status/1555878318469091330', + 'md5': '8c45b02c4f324e0b9054ceb888091283', + 'info_dict': { + 'id': '1555878318469091330', + 'ext': 'mp4', + 'title': '@TulsiGabbard - 1659785272', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': """Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss. https://t.co/pcc70Uyt76""", + 'uploader': 'Tulsi Gabbard 🌺', + 'uploader_id': 'TulsiGabbard', + 'duration': 44.466, + 'timestamp': 1659785272, + 'upload_date': '20220806', + 'age_limit': 0, + }, + },{ 'url': 'https://twitter.com/freethenipple/status/643211948184596480', 'info_dict': { 'id': '643211948184596480', 'ext': 'mp4', - 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', + 'title': '@freethenipple - 1442188653', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'uploader': 'FREE THE NIPPLE', @@ -213,7 +229,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '657991469417025536', 'ext': 'mp4', - 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai', + 'title': '@giphz', 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"', 'thumbnail': r're:^https?://.*\.png', 'uploader': 'Gifs', @@ -226,10 +242,10 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '665052190608723968', 'ext': 'mp4', - 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.', + 'title': '@starwars - 1447395772', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', - 'uploader': 'Star Wars', + 'uploader': 'Star Wars | Andor Premieres Sept 21 on Disney+', 'timestamp': 1447395772, 'upload_date': '20151113', }, @@ -238,7 +254,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', - 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.", + 'title': '@BTNBrentYarina - 1456976204', 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns", 'uploader_id': 'BTNBrentYarina', 'uploader': 'Brent Yarina', @@ -255,11 +271,11 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'simon vertugo - BEAT PROD: @suhmeduh #Damndaniel', + 'title': '@jaydingeer - 1455777459', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'simon vertugo', - 'uploader_id': 'simonvertugo', + 'uploader': 'jaydin donte geer', + 'uploader_id': 'jaydingeer', 'duration': 30.0, 'timestamp': 1455777459, 'upload_date': '20160218', @@ -282,7 +298,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '719944021058060289', 'ext': 'mp4', - 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', + 'title': '@CaptainAmerica - 1460483005', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'uploader_id': 'CaptainAmerica', 'uploader': 'Captain America', @@ -322,7 +338,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '910031516746514432', 'ext': 'mp4', - 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', + 'title': '@Prefet971 - 1505803395', 'thumbnail': r're:^https?://.*\.jpg', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'uploader': 'Préfet de Guadeloupe', @@ -340,7 +356,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1001551623938805763', 'ext': 'mp4', - 'title': 're:.*?Shep is on a roll today.*?', + 'title': '@LisPower1 - 1527623489', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'uploader': 'Lis Power', @@ -357,7 +373,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1087791357756956680', 'ext': 'mp4', - 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!', + 'title': '@Twitter - 1548184644', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976', 'uploader': 'Twitter', @@ -383,7 +399,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1349794411333394432', 'ext': 'mp4', - 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba', + 'title': '@BrooklynNets - 1610651040', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'uploader': 'Brooklyn Nets', @@ -440,14 +456,17 @@ class TwitterIE(TwitterBaseIE): 'tweet_mode': 'extended', }) - title = description = status['full_text'].replace('\n', ' ') - # strip 'https -_t.co_BJYgOjSeGA' junk from filenames - title = re.sub(r'\s+(https?://[^ ]+)', '', title) + # Get metadata + description = status['full_text'].replace('\n', ' ') user = status.get('user') or {} uploader = user.get('name') - if uploader: - title = '%s - %s' % (uploader, title) uploader_id = user.get('screen_name') + timestamp = unified_timestamp(status.get('created_at')) + + # Keep the file name short so it doesn't exceed filesystem limits + title = '%s' % (timestamp,) + if uploader_id: + title = '@%s - %s' % (uploader_id, timestamp) tags = [] for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []): @@ -461,7 +480,7 @@ class TwitterIE(TwitterBaseIE): 'title': title, 'description': description, 'uploader': uploader, - 'timestamp': unified_timestamp(status.get('created_at')), + 'timestamp': timestamp, 'uploader_id': uploader_id, 'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None, 'like_count': int_or_none(status.get('favorite_count')), From 05aa33b0b3c9bc575f5d5ad522007c59c4b60a1b Mon Sep 17 00:00:00 2001 From: palewire Date: Mon, 8 Aug 2022 19:43:42 -0700 Subject: [PATCH 2/9] Flake8 --- youtube_dl/extractor/twitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 710fbc091..a4632a502 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -208,7 +208,7 @@ class TwitterIE(TwitterBaseIE): 'upload_date': '20220806', 'age_limit': 0, }, - },{ + }, { 'url': 'https://twitter.com/freethenipple/status/643211948184596480', 'info_dict': { 'id': '643211948184596480', From abc3a6af48447f0f65fc8ce471d2ef993069e4cd Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 9 Aug 2022 03:33:41 -0700 Subject: [PATCH 3/9] use_generic --- youtube_dl/extractor/twitter.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index a4632a502..b6ce3a915 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -198,7 +198,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1555878318469091330', 'ext': 'mp4', - 'title': '@TulsiGabbard - 1659785272', + 'title': '@TulsiGabbard-1555878318469091330', 'thumbnail': r're:^https?://.*\.jpg', 'description': """Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss. https://t.co/pcc70Uyt76""", 'uploader': 'Tulsi Gabbard 🌺', @@ -213,7 +213,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '643211948184596480', 'ext': 'mp4', - 'title': '@freethenipple - 1442188653', + 'title': '@freethenipple-643211948184596480', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'uploader': 'FREE THE NIPPLE', @@ -229,7 +229,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '657991469417025536', 'ext': 'mp4', - 'title': '@giphz', + 'title': '@giphz-657991469417025536', 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"', 'thumbnail': r're:^https?://.*\.png', 'uploader': 'Gifs', @@ -242,7 +242,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '665052190608723968', 'ext': 'mp4', - 'title': '@starwars - 1447395772', + 'title': '@starwars-665052190608723968', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', 'uploader': 'Star Wars | Andor Premieres Sept 21 on Disney+', @@ -271,7 +271,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': '@jaydingeer - 1455777459', + 'title': '@jaydingeer-700207533655363584', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'jaydin donte geer', @@ -298,7 +298,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '719944021058060289', 'ext': 'mp4', - 'title': '@CaptainAmerica - 1460483005', + 'title': '@CaptainAmerica-719944021058060289', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'uploader_id': 'CaptainAmerica', 'uploader': 'Captain America', @@ -338,7 +338,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '910031516746514432', 'ext': 'mp4', - 'title': '@Prefet971 - 1505803395', + 'title': '@Prefet971-910031516746514432', 'thumbnail': r're:^https?://.*\.jpg', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'uploader': 'Préfet de Guadeloupe', @@ -356,7 +356,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1001551623938805763', 'ext': 'mp4', - 'title': '@LisPower1 - 1527623489', + 'title': '@LisPower1-1001551623938805763', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'uploader': 'Lis Power', @@ -373,7 +373,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1087791357756956680', 'ext': 'mp4', - 'title': '@Twitter - 1548184644', + 'title': '@Twitter-1087791357756956680', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976', 'uploader': 'Twitter', @@ -399,7 +399,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1349794411333394432', 'ext': 'mp4', - 'title': '@BrooklynNets - 1610651040', + 'title': '@BrooklynNets-1349794411333394432', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'uploader': 'Brooklyn Nets', @@ -464,9 +464,9 @@ class TwitterIE(TwitterBaseIE): timestamp = unified_timestamp(status.get('created_at')) # Keep the file name short so it doesn't exceed filesystem limits - title = '%s' % (timestamp,) + title = self._generic_title(url) if uploader_id: - title = '@%s - %s' % (uploader_id, timestamp) + title = '@%s-%s' % (uploader_id, title) tags = [] for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []): From d2381bebafaa67e985fb68d701c9a248cd51ee79 Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 9 Aug 2022 03:34:52 -0700 Subject: [PATCH 4/9] use_generic --- youtube_dl/extractor/twitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index b6ce3a915..910939d49 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -254,7 +254,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', - 'title': '@BTNBrentYarina - 1456976204', + 'title': '@BTNBrentYarina-705235433198714880', 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns", 'uploader_id': 'BTNBrentYarina', 'uploader': 'Brent Yarina', From c1c8791335ba017e4d0ac87f8ebf1b36e3e69a6a Mon Sep 17 00:00:00 2001 From: palewire Date: Tue, 9 Aug 2022 12:21:39 -0700 Subject: [PATCH 5/9] Switch trim method --- youtube_dl/extractor/twitter.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 910939d49..9260d8912 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -198,9 +198,9 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1555878318469091330', 'ext': 'mp4', - 'title': '@TulsiGabbard-1555878318469091330', + 'title': 'Tulsi Gabbard 🌺 - Puberty-blocking procedures promote[...]', + 'description': 'Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss. https://t.co/pcc70Uyt76', 'thumbnail': r're:^https?://.*\.jpg', - 'description': """Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss. https://t.co/pcc70Uyt76""", 'uploader': 'Tulsi Gabbard 🌺', 'uploader_id': 'TulsiGabbard', 'duration': 44.466, @@ -213,7 +213,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '643211948184596480', 'ext': 'mp4', - 'title': '@freethenipple-643211948184596480', + 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd to[...]', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'uploader': 'FREE THE NIPPLE', @@ -229,7 +229,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '657991469417025536', 'ext': 'mp4', - 'title': '@giphz-657991469417025536', + 'title': 'Gifs - "tu vai cai tu vai cai tu nao eh capaz disso[...]', 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"', 'thumbnail': r're:^https?://.*\.png', 'uploader': 'Gifs', @@ -242,7 +242,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '665052190608723968', 'ext': 'mp4', - 'title': '@starwars-665052190608723968', + 'title': 'Star Wars | Andor Premieres Sept 21 on Disney+ - A new beginning is coming December [...]', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', 'uploader': 'Star Wars | Andor Premieres Sept 21 on Disney+', @@ -254,7 +254,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '705235433198714880', 'ext': 'mp4', - 'title': '@BTNBrentYarina-705235433198714880', + 'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk[...]', 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns", 'uploader_id': 'BTNBrentYarina', 'uploader': 'Brent Yarina', @@ -271,7 +271,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': '@jaydingeer-700207533655363584', + 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh https://t.co/[...]', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'jaydin donte geer', @@ -298,7 +298,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '719944021058060289', 'ext': 'mp4', - 'title': '@CaptainAmerica-719944021058060289', + 'title': 'Captain America - @King0fNerd Are you sure you made t[...]', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'uploader_id': 'CaptainAmerica', 'uploader': 'Captain America', @@ -338,7 +338,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '910031516746514432', 'ext': 'mp4', - 'title': '@Prefet971-910031516746514432', + 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve[...]', 'thumbnail': r're:^https?://.*\.jpg', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'uploader': 'Préfet de Guadeloupe', @@ -356,7 +356,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1001551623938805763', 'ext': 'mp4', - 'title': '@LisPower1-1001551623938805763', + 'title': 'Lis Power - Holy shit, Shep is on a roll today.[...]', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'uploader': 'Lis Power', @@ -373,7 +373,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1087791357756956680', 'ext': 'mp4', - 'title': '@Twitter-1087791357756956680', + 'title': 'Twitter - A new https://t.co/fHiPXozBdO is co[...]', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976', 'uploader': 'Twitter', @@ -399,7 +399,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1349794411333394432', 'ext': 'mp4', - 'title': '@BrooklynNets-1349794411333394432', + 'title': 'Brooklyn Nets - WATCH: Sean Marks\' full media sessi[...]', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'uploader': 'Brooklyn Nets', @@ -464,9 +464,11 @@ class TwitterIE(TwitterBaseIE): timestamp = unified_timestamp(status.get('created_at')) # Keep the file name short so it doesn't exceed filesystem limits - title = self._generic_title(url) - if uploader_id: - title = '@%s-%s' % (uploader_id, title) + title = description + if len(title) > 40: + title = title[:35] + "[...]" + if uploader: + title = '%s - %s' % (uploader, title) tags = [] for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []): From 9b6e3940a752710e23cab2bf99c7c36aef891619 Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Wed, 10 Aug 2022 05:25:40 -0700 Subject: [PATCH 6/9] Update youtube_dl/extractor/twitter.py Co-authored-by: dirkf --- youtube_dl/extractor/twitter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 9260d8912..407564372 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -466,7 +466,12 @@ class TwitterIE(TwitterBaseIE): # Keep the file name short so it doesn't exceed filesystem limits title = description if len(title) > 40: - title = title[:35] + "[...]" + trim = 35 + # allow up to 10 more characters to find whitespace + m = re.search(r'\s+', title, trim, trim + 10) + if m: + trim = m.start() + 1 + title = title[:trim] + '...' if uploader: title = '%s - %s' % (uploader, title) From e84f0673bbc04cffee699b852e1a1c8d82176c04 Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Wed, 10 Aug 2022 05:27:46 -0700 Subject: [PATCH 7/9] Update twitter.py --- youtube_dl/extractor/twitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 407564372..31b4df8ae 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -464,7 +464,7 @@ class TwitterIE(TwitterBaseIE): timestamp = unified_timestamp(status.get('created_at')) # Keep the file name short so it doesn't exceed filesystem limits - title = description + title = re.sub(r'\s+(https?://[^ ]+)', '', description) if len(title) > 40: trim = 35 # allow up to 10 more characters to find whitespace From 784f597601dd0434cf39bf913ee3ad946934f6a9 Mon Sep 17 00:00:00 2001 From: palewire Date: Wed, 10 Aug 2022 05:43:46 -0700 Subject: [PATCH 8/9] Tweaks --- youtube_dl/extractor/twitter.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 31b4df8ae..73a36bccc 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -198,7 +198,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1555878318469091330', 'ext': 'mp4', - 'title': 'Tulsi Gabbard 🌺 - Puberty-blocking procedures promote[...]', + 'title': 'Tulsi Gabbard 🌺 - Puberty-blocking ...', 'description': 'Puberty-blocking procedures promoted by the Biden/Harris Admin are child abuse. The FDA has recently confirmed these hormones/drugs have extremely dangerous side effects, like brain swelling and vision loss. https://t.co/pcc70Uyt76', 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'Tulsi Gabbard 🌺', @@ -213,7 +213,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '643211948184596480', 'ext': 'mp4', - 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd to[...]', + 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'uploader': 'FREE THE NIPPLE', @@ -242,7 +242,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '665052190608723968', 'ext': 'mp4', - 'title': 'Star Wars | Andor Premieres Sept 21 on Disney+ - A new beginning is coming December [...]', + 'title': 'Star Wars | Andor Premieres Sept 21 on Disney+ - A new beginning is coming December ...', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', 'uploader': 'Star Wars | Andor Premieres Sept 21 on Disney+', @@ -271,7 +271,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh https://t.co/[...]', + 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'jaydin donte geer', @@ -298,7 +298,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '719944021058060289', 'ext': 'mp4', - 'title': 'Captain America - @King0fNerd Are you sure you made t[...]', + 'title': 'Captain America - @King0fNerd ...', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'uploader_id': 'CaptainAmerica', 'uploader': 'Captain America', @@ -338,7 +338,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '910031516746514432', 'ext': 'mp4', - 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve[...]', + 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve...', 'thumbnail': r're:^https?://.*\.jpg', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'uploader': 'Préfet de Guadeloupe', @@ -356,7 +356,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1001551623938805763', 'ext': 'mp4', - 'title': 'Lis Power - Holy shit, Shep is on a roll today.[...]', + 'title': 'Lis Power - Holy shit, Shep is on a roll today....', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'uploader': 'Lis Power', @@ -373,7 +373,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1087791357756956680', 'ext': 'mp4', - 'title': 'Twitter - A new https://t.co/fHiPXozBdO is co[...]', + 'title': 'Twitter - A new is coming. Some of you got a...', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976', 'uploader': 'Twitter', @@ -399,7 +399,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1349794411333394432', 'ext': 'mp4', - 'title': 'Brooklyn Nets - WATCH: Sean Marks\' full media sessi[...]', + 'title': 'Brooklyn Nets - WATCH: Sean Marks\' full media sessi...', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'uploader': 'Brooklyn Nets', @@ -468,8 +468,8 @@ class TwitterIE(TwitterBaseIE): if len(title) > 40: trim = 35 # allow up to 10 more characters to find whitespace - m = re.search(r'\s+', title, trim, trim + 10) - if m: + m = re.search(r'\s+', title) + if m and m.start() > 10: trim = m.start() + 1 title = title[:trim] + '...' if uploader: From 099dcca6c4216b814644a33b7254e3d45a2e6def Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Wed, 10 Aug 2022 17:55:32 -0300 Subject: [PATCH 9/9] Trailing whitespace --- youtube_dl/extractor/twitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 73a36bccc..0e882d9ed 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -467,7 +467,7 @@ class TwitterIE(TwitterBaseIE): title = re.sub(r'\s+(https?://[^ ]+)', '', description) if len(title) > 40: trim = 35 - # allow up to 10 more characters to find whitespace + # allow up to 10 more characters to find whitespace m = re.search(r'\s+', title) if m and m.start() > 10: trim = m.start() + 1