@ -5,10 +5,13 @@ import re
from . common import InfoExtractor
from . . utils import (
clean_html ,
extract_attributes ,
ExtractorError ,
get_element_by_id ,
int_or_none ,
str_to_int ,
merge_dicts ,
parse_count ,
T ,
traverse_obj ,
unified_strdate ,
@ -38,7 +41,7 @@ class YouPornIE(InfoExtractor):
' tags ' : list ,
' age_limit ' : 18 ,
} ,
' skip ' : ' This video has been d isabl ed' ,
' skip ' : ' This video has been d eactivat ed' ,
} , {
# Unknown uploader
' url ' : ' http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4 ' ,
@ -100,12 +103,29 @@ class YouPornIE(InfoExtractor):
return list ( yield_urls ( ) )
def _real_extract ( self , url ) :
# Actual video ID is hidden in the page
display_id = self . _match_valid_url ( url ) . group ( ' id ' , ' display_id ' )
url = ' http://www.youporn.com/watch/ %s ' % ( display_id [ 0 ] , )
display_id = display_id [ 1 ] or display_id [ 0 ]
webpage = self . _download_webpage (
url , display_id , headers = { ' Cookie ' : ' age_verified=1 ' } )
video_id = display_id [ 0 ]
watchable = self . _search_regex (
r ''' (<div \ s[^>]* \ bid \ s*= \ s*( ' | " )?watch-container(?(2) \ 2|(?!-) \ b)[^>]*>) ''' ,
webpage , ' video id ' , default = None )
if not watchable :
msg = re . split ( r ' \ s {4} ' , clean_html ( get_element_by_id (
' mainContent ' , webpage ) ) or ' ' ) [ 0 ]
raise ExtractorError (
( ' %s says: %s ' % ( self . IE_NAME , msg ) )
if msg else ' Video unavailable: no reason found ' ,
expected = True )
video_id = extract_attributes ( watchable ) . get ( ' data-video-id ' )
if not video_id :
# last chance saloon
video_id = self . _search_regex (
r ''' \ bcurrentVideoId \ s*: \ s*( ' | " )(?P<id> \ d+) \ 1 ''' ,
webpage , ' video id ' , group = ' id ' )
playervars = self . _search_json (
r ' \ bplayervars \ s*: ' , webpage , ' playervars ' , display_id )
@ -169,8 +189,10 @@ class YouPornIE(InfoExtractor):
thumbnail = self . _search_regex (
r ' (?:imageurl \ s*=|poster \ s*:) \ s*([ " \' ])(?P<thumbnail>.+?) \ 1 ' ,
webpage , ' thumbnail ' , fatal = False , group = ' thumbnail ' )
duration = int_or_none ( self . _html_search_meta (
' video:duration ' , webpage , ' duration ' , fatal = False ) )
duration = traverse_obj ( playervars , ( ' duration ' , T ( int_or_none ) ) )
if duration is None :
duration = int_or_none ( self . _html_search_meta (
' video:duration ' , webpage , ' duration ' , fatal = False ) )
uploader = self . _html_search_regex (
r ' (?s)<div[^>]+class=[ " \' ]submitByLink[ " \' ][^>]*>(.+?)</div> ' ,
@ -186,11 +208,11 @@ class YouPornIE(InfoExtractor):
view_count = None
views = self . _search_regex (
r ' (<div [^>]+\ bclass=[ " \' ]js_videoInfoViews[ " \' ]>) ' , webpage ,
' views ' , default = None )
r ' (<div \ s[^>]* \ bdata-value \ s*=[^>]+>) \ s*<label>Views:</label> ' ,
webpage , ' views ' , default = None )
if views :
view_count = str_to_i nt( extract_attributes ( views ) . get ( ' data-value ' ) )
comment_count = str_to_i nt( self . _search_regex (
view_count = parse_cou nt( extract_attributes ( views ) . get ( ' data-value ' ) )
comment_count = parse_cou nt( self . _search_regex (
r ' >All [Cc]omments? \ (([ \ d,.]+) \ ) ' ,
webpage , ' comment count ' , default = None ) )
@ -211,7 +233,7 @@ class YouPornIE(InfoExtractor):
result = merge_dicts ( data , {
' id ' : video_id ,
' display_id ' : display_id if display_id != video_id else None ,
' display_id ' : display_id ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
@ -225,4 +247,8 @@ class YouPornIE(InfoExtractor):
' age_limit ' : age_limit ,
' formats ' : formats ,
} )
# Remove promotionalm non-description
if result . get ( ' description ' , ' ' ) . startswith (
' Watch %s online ' % ( result [ ' title ' ] , ) ) :
del result [ ' description ' ]
return result