From 360104ec182f9a9545ed0ea64a77ec7ec749c103 Mon Sep 17 00:00:00 2001 From: fluks Date: Mon, 7 Nov 2022 16:33:59 +0200 Subject: [PATCH] [Telegram] Add new extractor Add very rudimentary extractor for Telegram. Supports only public and channels which are previawable with browser. Implements #28748. --- docs/supportedsites.md | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/telegram.py | 60 ++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 youtube_dl/extractor/telegram.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ae2a6b8b0..d6643f03c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -934,6 +934,7 @@ - **TeleBruxelles** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** + - **Telegram** - **TeleMB** - **TeleQuebec** - **TeleQuebecEmission** diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e36f86be4..072bc6dfe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1234,6 +1234,7 @@ from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE +from .telegram import TelegramIE from .telemb import TeleMBIE from .telequebec import ( TeleQuebecIE, diff --git a/youtube_dl/extractor/telegram.py b/youtube_dl/extractor/telegram.py new file mode 100644 index 000000000..b5111b405 --- /dev/null +++ b/youtube_dl/extractor/telegram.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, +) + + +class TelegramIE(InfoExtractor): + _VALID_URL = r'https://t\.me/(?P[^/]+)/(?P\d+)' + _TEST = { + 'url': 'https://t.me/telegram/195', + 'info_dict': { + 'id': '195', + 'ext': 'mp4', + 'title': 'telegram', + 'description': 'Telegram’s Bot Documentation has been completely overhauled –\xa0adding the latest info, along with detailed screenshots and videos.\n\nNewcomers now have an easy way to learn about all the powerful features, and can build a bot from our step-by-step tutorial with examples for popular programming languages.\n\nExperienced developers can explore recent updates and advanced features, ready for 2022 and beyond.', + 'duration': 23, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + m = re.match(r'https://t\.me/(?P[^/]+)/', url) + if m is None: + raise ExtractorError('Unable to find channel name') + title = m.group('channel') + embed_url = url + '?embed=1&mode=tme' + html = self._download_webpage(embed_url, video_id) + + video_url = self._search_regex(r'