[facebook] add support for watchparty pages(closes #27507)

pull/27132/merge
Remita Amine 3 years ago
parent 79dd92b1fe
commit ecaa535cf4

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
import socket import socket
@ -8,6 +9,7 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_http_client, compat_http_client,
compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus, compat_urllib_parse_unquote_plus,
@ -47,7 +49,8 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
groups/[^/]+/permalink/ groups/[^/]+/permalink/|
watchparty/
)| )|
facebook: facebook:
) )
@ -280,8 +283,18 @@ class FacebookIE(InfoExtractor):
# data.video.creation_story.attachments[].media # data.video.creation_story.attachments[].media
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275', 'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.facebook.com/watchparty/211641140192478',
'info_dict': {
'id': '211641140192478',
},
'playlist_count': 1,
'skip': 'Requires logging in',
}] }]
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
_api_config = {
'graphURI': '/api/graphql/'
}
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -405,6 +418,17 @@ class FacebookIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
def extract_relay_prefetched_data(_filter):
replay_data = extract_relay_data(_filter)
for require in (replay_data.get('require') or []):
if require[0] == 'RelayPrefetchedStreamCache':
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX, r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
@ -413,87 +437,83 @@ class FacebookIE(InfoExtractor):
video_data = extract_from_jsmods_instances(server_js_data) video_data = extract_from_jsmods_instances(server_js_data)
if not video_data: if not video_data:
graphql_data = self._parse_json(self._search_regex( data = extract_relay_prefetched_data(
r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);', r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
webpage, 'graphql data', default='{}'), video_id, fatal=False) or {} if data:
for require in (graphql_data.get('require') or []): entries = []
if require[0] == 'RelayPrefetchedStreamCache':
entries = [] def parse_graphql_video(video):
formats = []
def parse_graphql_video(video): q = qualities(['sd', 'hd'])
formats = [] for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
q = qualities(['sd', 'hd']) playable_url = video.get('playable_url' + suffix)
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]: if not playable_url:
playable_url = video.get('playable_url' + suffix) continue
if not playable_url: formats.append({
continue 'format_id': format_id,
formats.append({ 'quality': q(format_id),
'format_id': format_id, 'url': playable_url,
'quality': q(format_id), })
'url': playable_url, extract_dash_manifest(video, formats)
}) process_formats(formats)
extract_dash_manifest(video, formats) v_id = video.get('videoId') or video.get('id') or video_id
process_formats(formats) info = {
v_id = video.get('videoId') or video.get('id') or video_id 'id': v_id,
info = { 'formats': formats,
'id': v_id, 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
'formats': formats, 'uploader_id': try_get(video, lambda x: x['owner']['id']),
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']), 'timestamp': int_or_none(video.get('publish_time')),
'uploader_id': try_get(video, lambda x: x['owner']['id']), 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
'timestamp': int_or_none(video.get('publish_time')), }
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), description = try_get(video, lambda x: x['savable_description']['text'])
} title = video.get('name')
description = try_get(video, lambda x: x['savable_description']['text']) if title:
title = video.get('name') info.update({
if title: 'title': title,
info.update({ 'description': description,
'title': title, })
'description': description, else:
}) info['title'] = description or 'Facebook video #%s' % v_id
else: entries.append(info)
info['title'] = description or 'Facebook video #%s' % v_id
entries.append(info) def parse_attachment(attachment, key='media'):
media = attachment.get(key) or {}
def parse_attachment(attachment, key='media'): if media.get('__typename') == 'Video':
media = attachment.get(key) or {} return parse_graphql_video(media)
if media.get('__typename') == 'Video':
return parse_graphql_video(media) nodes = data.get('nodes') or []
node = data.get('node') or {}
data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {} if not nodes and node:
nodes.append(node)
nodes = data.get('nodes') or [] for node in nodes:
node = data.get('node') or {} story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
if not nodes and node: attachments = try_get(story, [
nodes.append(node) lambda x: x['attached_story']['attachments'],
for node in nodes: lambda x: x['attachments']
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {} ], list) or []
attachments = try_get(story, [ for attachment in attachments:
lambda x: x['attached_story']['attachments'], attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
lambda x: x['attachments'] ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
], list) or [] for n in ns:
for attachment in attachments: parse_attachment(n)
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict) parse_attachment(attachment)
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
for n in ns: edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
parse_attachment(n) for edge in edges:
parse_attachment(attachment) parse_attachment(edge, key='node')
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or [] video = data.get('video') or {}
for edge in edges: if video:
parse_attachment(edge, key='node') attachments = try_get(video, [
lambda x: x['story']['attachments'],
video = data.get('video') or {} lambda x: x['creation_story']['attachments']
if video: ], list) or []
attachments = try_get(video, [ for attachment in attachments:
lambda x: x['story']['attachments'], parse_attachment(attachment)
lambda x: x['creation_story']['attachments'] if not entries:
], list) or [] parse_graphql_video(video)
for attachment in attachments:
parse_attachment(attachment) return self.playlist_result(entries, video_id)
if not entries:
parse_graphql_video(video)
return self.playlist_result(entries, video_id)
if not video_data: if not video_data:
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
@ -504,6 +524,43 @@ class FacebookIE(InfoExtractor):
elif '>You must log in to continue' in webpage: elif '>You must log in to continue' in webpage:
self.raise_login_required() self.raise_login_required()
if not video_data and '/watchparty/' in url:
post_data = {
'doc_id': 3731964053542869,
'variables': json.dumps({
'livingRoomID': video_id,
}),
}
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
if prefetched_data:
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
if lsd:
post_data[lsd['name']] = lsd['value']
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
for define in (relay_data.get('define') or []):
if define[0] == 'RelayAPIConfigDefaults':
self._api_config = define[2]
living_room = self._download_json(
urljoin(url, self._api_config['graphURI']), video_id,
data=urlencode_postdata(post_data))['data']['living_room']
entries = []
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
video = try_get(edge, lambda x: x['node']['video']) or {}
v_id = video.get('id')
if not v_id:
continue
v_id = compat_str(v_id)
entries.append(self.url_result(
self._VIDEO_PAGE_TEMPLATE % v_id,
self.ie_key(), v_id, video.get('name')))
return self.playlist_result(entries, video_id)
if not video_data:
# Video info not in first request, do a secondary request using # Video info not in first request, do a secondary request using
# tahoe player specific URL # tahoe player specific URL
tahoe_data = self._download_webpage( tahoe_data = self._download_webpage(

Loading…
Cancel
Save