From 15f27347911e51954184aa483a77c98eaea2c399 Mon Sep 17 00:00:00 2001 From: Jia Rong Yee <28086837+fourjr@users.noreply.github.com> Date: Sun, 22 Nov 2020 21:12:47 +0800 Subject: [PATCH] [nytimes] Add new cooking.nytimes.com extractor (#27143) * [nytimes] support cooking.nytimes.com, resolves #27112 Co-authored-by: remitamine --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nytimes.py | 38 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 31fb4c95a..fb18a0563 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -782,6 +782,7 @@ from .ntvru import NTVRuIE from .nytimes import ( NYTimesIE, NYTimesArticleIE, + NYTimesCookingIE, ) from .nuvid import NuvidIE from .nzz import NZZIE diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index fc78ca56c..976b1c694 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -221,3 +221,41 @@ class NYTimesArticleIE(NYTimesBaseIE): r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'), webpage, 'podcast data') return self._extract_podcast_from_json(podcast_data, page_id, webpage) + + +class NYTimesCookingIE(NYTimesBaseIE): + _VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P\d+)' + _TESTS = [{ + 'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart', + 'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3', + 'info_dict': { + 'id': '100000004756089', + 'ext': 'mov', + 'timestamp': 1479383008, + 'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON', + 'title': 'Cranberry Tart', + 'upload_date': '20161117', + 'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.', + }, + }, { + 'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey', + 'md5': '4b2e8c70530a89b8d905a2b572316eb8', + 'info_dict': { + 'id': '100000003951728', + 'ext': 'mov', + 'timestamp': 1445509539, + 'description': 'Turkey guide', + 'upload_date': '20151022', + 'title': 'Turkey', + } + }] + + def _real_extract(self, url): + page_id = self._match_id(url) + + webpage = self._download_webpage(url, page_id) + + video_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'video id') + + return self._extract_video_from_id(video_id)