2008-10-13 15:28:44 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
#
|
|
|
|
# gPodder - A media aggregator and podcast client
|
2016-01-15 15:22:52 +01:00
|
|
|
# Copyright (c) 2005-2016 Thomas Perl and the gPodder Team
|
2008-10-13 15:28:44 +02:00
|
|
|
#
|
|
|
|
# gPodder is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# gPodder is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
2009-08-24 17:02:35 +02:00
|
|
|
# gpodder.youtube - YouTube and related magic
|
2008-10-13 15:28:44 +02:00
|
|
|
# Justin Forest <justin.forest@gmail.com> 2008-10-13
|
|
|
|
#
|
2009-08-24 17:02:35 +02:00
|
|
|
|
|
|
|
|
|
|
|
import gpodder
|
|
|
|
|
|
|
|
from gpodder import util
|
2011-07-15 16:32:06 +02:00
|
|
|
|
2013-01-27 13:24:30 +01:00
|
|
|
import os.path
|
|
|
|
|
2011-07-15 16:32:06 +02:00
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
2008-10-13 15:28:44 +02:00
|
|
|
|
2011-04-10 23:48:22 +02:00
|
|
|
try:
|
|
|
|
import simplejson as json
|
|
|
|
except ImportError:
|
|
|
|
import json
|
|
|
|
|
2008-10-13 15:28:44 +02:00
|
|
|
import re
|
2016-11-21 23:13:46 +01:00
|
|
|
import urllib.request, urllib.parse, urllib.error
|
2012-09-18 09:53:10 +02:00
|
|
|
|
|
|
|
try:
|
|
|
|
# Python >= 2.6
|
2016-11-21 23:13:46 +01:00
|
|
|
from urllib.parse import parse_qs
|
2012-09-18 09:53:10 +02:00
|
|
|
except ImportError:
|
|
|
|
# Python < 2.6
|
|
|
|
from cgi import parse_qs
|
2008-10-13 15:28:44 +02:00
|
|
|
|
2012-09-19 13:43:20 +02:00
|
|
|
# http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
|
|
|
|
# format id, (preferred ids, path(?), description) # video bitrate, audio bitrate
|
2012-11-18 21:35:21 +01:00
|
|
|
formats = [
|
2012-09-19 13:43:20 +02:00
|
|
|
# WebM VP8 video, Vorbis audio
|
|
|
|
# Fallback to an MP4 version of same quality.
|
|
|
|
# Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
|
|
|
|
# Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
|
|
|
|
(46, ([46, 37, 45, 22, 44, 35, 43, 18, 6, 34, 5], '45/1280x720/99/0/0', 'WebM 1080p (1920x1080)')), # N/A, 192 kbps
|
|
|
|
(45, ([45, 22, 44, 35, 43, 18, 6, 34, 5], '45/1280x720/99/0/0', 'WebM 720p (1280x720)')), # 2.0 Mbps, 192 kbps
|
|
|
|
(44, ([44, 35, 43, 18, 6, 34, 5], '44/854x480/99/0/0', 'WebM 480p (854x480)')), # 1.0 Mbps, 128 kbps
|
|
|
|
(43, ([43, 18, 6, 34, 5], '43/640x360/99/0/0', 'WebM 360p (640x360)')), # 0.5 Mbps, 128 kbps
|
|
|
|
|
|
|
|
# MP4 H.264 video, AAC audio
|
|
|
|
# Try 35 (FLV 480p H.264 AAC) between 720p and 360p because there's no MP4 480p.
|
|
|
|
# Try 34 (FLV 360p H.264 AAC) if 18 (MP4 360p) fails.
|
|
|
|
# Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
|
|
|
|
(38, ([38, 37, 22, 35, 18, 34, 6, 5], '38/1920x1080/9/0/115', 'MP4 4K 3072p (4096x3072)')), # 5.0 - 3.5 Mbps, 192 kbps
|
|
|
|
(37, ([37, 22, 35, 18, 34, 6, 5], '37/1920x1080/9/0/115', 'MP4 HD 1080p (1920x1080)')), # 4.3 - 3.0 Mbps, 192 kbps
|
|
|
|
(22, ([22, 35, 18, 34, 6, 5], '22/1280x720/9/0/115', 'MP4 HD 720p (1280x720)')), # 2.9 - 2.0 Mbps, 192 kbps
|
|
|
|
(18, ([18, 34, 6, 5], '18/640x360/9/0/115', 'MP4 360p (640x360)')), # 0.5 Mbps, 96 kbps
|
|
|
|
|
|
|
|
# FLV H.264 video, AAC audio
|
|
|
|
# Does not check for 360p MP4.
|
|
|
|
# Fallback to 6 or 5 (FLV Sorenson H.263 MP3) if all fails.
|
|
|
|
(35, ([35, 34, 6, 5], '35/854x480/9/0/115', 'FLV 480p (854x480)')), # 1 - 0.80 Mbps, 128 kbps
|
|
|
|
(34, ([34, 6, 5], '34/640x360/9/0/115', 'FLV 360p (640x360)')), # 0.50 Mbps, 128 kbps
|
|
|
|
|
|
|
|
# FLV Sorenson H.263 video, MP3 audio
|
|
|
|
(6, ([6, 5], '5/480x270/7/0/0', 'FLV 270p (480x270)')), # 0.80 Mbps, 64 kbps
|
|
|
|
(5, ([5], '5/320x240/7/0/0', 'FLV 240p (320x240)')), # 0.25 Mbps, 64 kbps
|
2012-11-18 21:35:21 +01:00
|
|
|
]
|
|
|
|
formats_dict = dict(formats)
|
2012-09-19 13:43:20 +02:00
|
|
|
|
2015-05-20 21:10:57 +02:00
|
|
|
V3_API_ENDPOINT = 'https://www.googleapis.com/youtube/v3'
|
|
|
|
CHANNEL_VIDEOS_XML = 'https://www.youtube.com/feeds/videos.xml'
|
|
|
|
|
|
|
|
|
2010-12-18 14:32:33 +01:00
|
|
|
class YouTubeError(Exception): pass
|
|
|
|
|
2012-09-19 13:43:20 +02:00
|
|
|
|
2012-10-01 10:56:26 +02:00
|
|
|
def get_fmt_ids(youtube_config):
|
|
|
|
fmt_ids = youtube_config.preferred_fmt_ids
|
|
|
|
if not fmt_ids:
|
2012-11-18 21:35:21 +01:00
|
|
|
format = formats_dict.get(youtube_config.preferred_fmt_id)
|
2012-10-01 10:56:26 +02:00
|
|
|
if format is None:
|
|
|
|
fmt_ids = []
|
|
|
|
else:
|
|
|
|
fmt_ids, path, description = format
|
|
|
|
|
|
|
|
return fmt_ids
|
|
|
|
|
2012-11-23 12:03:41 +01:00
|
|
|
def get_real_download_url(url, preferred_fmt_ids=None):
|
2012-09-19 13:43:20 +02:00
|
|
|
if not preferred_fmt_ids:
|
2012-11-18 21:35:21 +01:00
|
|
|
preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p
|
2011-06-08 11:01:36 +02:00
|
|
|
|
2009-01-26 12:37:21 +01:00
|
|
|
vid = get_youtube_id(url)
|
|
|
|
if vid is not None:
|
|
|
|
page = None
|
2012-09-23 20:29:29 +02:00
|
|
|
url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid
|
2009-01-26 12:37:21 +01:00
|
|
|
|
|
|
|
while page is None:
|
2009-12-17 13:08:55 +01:00
|
|
|
req = util.http_request(url, method='GET')
|
2009-01-26 12:37:21 +01:00
|
|
|
if 'location' in req.msg:
|
|
|
|
url = req.msg['location']
|
|
|
|
else:
|
|
|
|
page = req.read()
|
2008-11-06 21:49:03 +01:00
|
|
|
|
2009-09-11 02:07:54 +02:00
|
|
|
# Try to find the best video format available for this video
|
|
|
|
# (http://forum.videohelp.com/topic336882-1800.html#1912972)
|
2010-12-14 18:34:26 +01:00
|
|
|
def find_urls(page):
|
2013-10-26 16:57:30 +02:00
|
|
|
r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page)
|
2010-12-14 18:34:26 +01:00
|
|
|
if r4 is not None:
|
2016-11-21 23:13:46 +01:00
|
|
|
fmt_url_map = urllib.parse.unquote(r4.group(1))
|
2010-12-14 18:34:26 +01:00
|
|
|
for fmt_url_encoded in fmt_url_map.split(','):
|
2012-09-18 09:53:10 +02:00
|
|
|
video_info = parse_qs(fmt_url_encoded)
|
2014-02-28 21:23:08 +01:00
|
|
|
yield int(video_info['itag'][0]), video_info['url'][0]
|
2012-09-23 20:29:29 +02:00
|
|
|
else:
|
|
|
|
error_info = parse_qs(page)
|
|
|
|
error_message = util.remove_html_tags(error_info['reason'][0])
|
|
|
|
raise YouTubeError('Cannot download video: %s' % error_message)
|
2010-12-14 18:34:26 +01:00
|
|
|
|
|
|
|
fmt_id_url_map = sorted(find_urls(page), reverse=True)
|
2012-09-19 13:43:20 +02:00
|
|
|
|
|
|
|
if not fmt_id_url_map:
|
2010-12-18 14:32:33 +01:00
|
|
|
raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)
|
|
|
|
|
2012-09-19 13:43:20 +02:00
|
|
|
# Default to the highest fmt_id if we don't find a match below
|
2012-10-01 10:56:26 +02:00
|
|
|
_, url = fmt_id_url_map[0]
|
2012-09-19 13:43:20 +02:00
|
|
|
|
2010-12-14 18:34:26 +01:00
|
|
|
formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
|
|
|
|
fmt_id_url_map = dict(fmt_id_url_map)
|
2009-09-11 02:07:54 +02:00
|
|
|
|
2012-09-19 13:43:20 +02:00
|
|
|
for id in preferred_fmt_ids:
|
2012-10-01 10:56:26 +02:00
|
|
|
id = int(id)
|
|
|
|
if id in formats_available:
|
2012-11-18 21:35:21 +01:00
|
|
|
format = formats_dict.get(id)
|
2012-10-01 10:56:26 +02:00
|
|
|
if format is not None:
|
|
|
|
_, _, description = format
|
|
|
|
else:
|
|
|
|
description = 'Unknown'
|
|
|
|
|
|
|
|
logger.info('Found YouTube format: %s (fmt_id=%d)',
|
|
|
|
description, id)
|
|
|
|
url = fmt_id_url_map[id]
|
|
|
|
break
|
2008-10-13 15:28:44 +02:00
|
|
|
|
|
|
|
return url
|
|
|
|
|
2009-01-26 12:37:21 +01:00
|
|
|
def get_youtube_id(url):
|
2012-09-28 16:02:50 +02:00
|
|
|
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
|
2009-01-26 12:37:21 +01:00
|
|
|
if r is not None:
|
|
|
|
return r.group(1)
|
|
|
|
|
2012-09-28 16:02:50 +02:00
|
|
|
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
|
2009-01-26 12:37:21 +01:00
|
|
|
if r is not None:
|
|
|
|
return r.group(1)
|
2011-08-05 23:58:55 +02:00
|
|
|
|
2012-09-28 16:02:50 +02:00
|
|
|
r = re.compile('http[s]?://(?:[a-z]+\.)?youtube\.com/v/(.*)[?]', re.IGNORECASE).match(url)
|
2011-08-05 23:58:55 +02:00
|
|
|
if r is not None:
|
|
|
|
return r.group(1)
|
2009-01-26 12:37:21 +01:00
|
|
|
|
2015-07-01 22:59:09 +02:00
|
|
|
return for_each_feed_pattern(lambda url, channel: channel, url, None)
|
2009-01-26 12:37:21 +01:00
|
|
|
|
2009-09-09 19:53:26 +02:00
|
|
|
def is_video_link(url):
|
|
|
|
return (get_youtube_id(url) is not None)
|
|
|
|
|
2012-01-09 19:01:45 +01:00
|
|
|
def is_youtube_guid(guid):
|
|
|
|
return guid.startswith('tag:youtube.com,2008:video:')
|
|
|
|
|
2014-09-04 11:21:00 +02:00
|
|
|
def for_each_feed_pattern(func, url, fallback_result):
|
|
|
|
"""
|
|
|
|
Try to find the username for all possible YouTube feed/webpage URLs
|
|
|
|
Will call func(url, channel) for each match, and if func() returns
|
|
|
|
a result other than None, returns this. If no match is found or
|
|
|
|
func() returns None, return fallback_result.
|
|
|
|
"""
|
|
|
|
CHANNEL_MATCH_PATTERNS = [
|
|
|
|
'http[s]?://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)',
|
|
|
|
'http[s]?://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)',
|
2015-07-01 22:59:09 +02:00
|
|
|
'http[s]?://(?:[a-z]+\.)?youtube\.com/channel/([-_a-zA-Z0-9]+)',
|
2014-09-04 11:21:00 +02:00
|
|
|
'http[s]?://(?:[a-z]+\.)?youtube\.com/rss/user/([a-z0-9]+)/videos\.rss',
|
|
|
|
'http[s]?://gdata.youtube.com/feeds/users/([^/]+)/uploads',
|
2015-11-28 15:34:33 +01:00
|
|
|
'http[s]?://gdata.youtube.com/feeds/base/users/([^/]+)/uploads',
|
2015-07-01 22:59:09 +02:00
|
|
|
'http[s]?://(?:[a-z]+\.)?youtube\.com/feeds/videos.xml\?channel_id=([-_a-zA-Z0-9]+)',
|
2014-09-04 11:21:00 +02:00
|
|
|
]
|
|
|
|
|
|
|
|
for pattern in CHANNEL_MATCH_PATTERNS:
|
|
|
|
m = re.match(pattern, url, re.IGNORECASE)
|
|
|
|
if m is not None:
|
|
|
|
result = func(url, m.group(1))
|
|
|
|
if result is not None:
|
|
|
|
return result
|
|
|
|
|
|
|
|
return fallback_result
|
2008-10-13 15:28:44 +02:00
|
|
|
|
2014-09-04 11:21:00 +02:00
|
|
|
def get_real_channel_url(url):
|
|
|
|
def return_user_feed(url, channel):
|
2015-05-20 21:50:10 +02:00
|
|
|
result = 'https://gdata.youtube.com/feeds/users/{0}/uploads'.format(channel)
|
2014-09-04 11:21:00 +02:00
|
|
|
logger.debug('YouTube link resolved: %s => %s', url, result)
|
|
|
|
return result
|
2008-10-13 15:28:44 +02:00
|
|
|
|
2014-09-04 11:21:00 +02:00
|
|
|
return for_each_feed_pattern(return_user_feed, url, url)
|
2008-10-13 17:07:01 +02:00
|
|
|
|
|
|
|
def get_real_cover(url):
|
2014-09-04 11:21:00 +02:00
|
|
|
def return_user_cover(url, channel):
|
2015-07-01 22:59:09 +02:00
|
|
|
try:
|
|
|
|
api_url = 'https://www.youtube.com/channel/{0}'.format(channel)
|
2016-11-26 15:28:15 +01:00
|
|
|
data = util.urlopen(api_url).read().decode('utf-8')
|
2015-07-01 22:59:09 +02:00
|
|
|
m = re.search('<img class="channel-header-profile-image"[^>]* src=[\'"]([^\'"]+)[\'"][^>]*>', data)
|
|
|
|
if m is not None:
|
|
|
|
logger.debug('YouTube userpic for %s is: %s', url, m.group(1))
|
|
|
|
return m.group(1)
|
|
|
|
except Exception as e:
|
|
|
|
logger.warn('Could not retrieve cover art', exc_info=True)
|
|
|
|
return None
|
|
|
|
|
2008-10-13 17:07:01 +02:00
|
|
|
|
2014-09-04 11:21:00 +02:00
|
|
|
return None
|
|
|
|
|
|
|
|
return for_each_feed_pattern(return_user_cover, url, None)
|
2015-05-20 21:10:57 +02:00
|
|
|
|
|
|
|
def get_channels_for_user(username, api_key_v3):
|
2015-05-20 21:50:10 +02:00
|
|
|
stream = util.urlopen('{0}/channels?forUsername={1}&part=id&key={2}'.format(V3_API_ENDPOINT, username, api_key_v3))
|
2015-05-20 21:10:57 +02:00
|
|
|
data = json.load(stream)
|
2015-05-20 21:50:10 +02:00
|
|
|
return ['{0}?channel_id={1}'.format(CHANNEL_VIDEOS_XML, item['id']) for item in data['items']]
|
2015-05-20 21:19:20 +02:00
|
|
|
|
|
|
|
|
|
|
|
def resolve_v3_url(url, api_key_v3):
|
|
|
|
# Check if it's a YouTube feed, and if we have an API key, auto-resolve the channel
|
|
|
|
if url and api_key_v3:
|
|
|
|
_, user = for_each_feed_pattern(lambda url, channel: (url, channel), url, (None, None))
|
|
|
|
if user is not None:
|
|
|
|
logger.info('Getting channels for YouTube user %s', user)
|
|
|
|
new_urls = get_channels_for_user(user, api_key_v3)
|
|
|
|
logger.debug('YouTube channels retrieved: %r', new_urls)
|
|
|
|
if len(new_urls) == 1:
|
|
|
|
return new_urls[0]
|
|
|
|
|
|
|
|
return url
|