2008-10-13 15:28:44 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
#
|
|
|
|
# gPodder - A media aggregator and podcast client
|
2011-04-01 18:59:42 +02:00
|
|
|
# Copyright (c) 2005-2011 Thomas Perl and the gPodder Team
|
2008-10-13 15:28:44 +02:00
|
|
|
#
|
|
|
|
# gPodder is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# gPodder is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
2009-08-24 17:02:35 +02:00
|
|
|
# gpodder.youtube - YouTube and related magic
|
2008-10-13 15:28:44 +02:00
|
|
|
# Justin Forest <justin.forest@gmail.com> 2008-10-13
|
|
|
|
#
|
2009-08-24 17:02:35 +02:00
|
|
|
|
|
|
|
|
|
|
|
import gpodder
|
|
|
|
|
|
|
|
from gpodder import util
|
2011-07-15 16:32:06 +02:00
|
|
|
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
2008-10-13 15:28:44 +02:00
|
|
|
|
2011-04-10 23:48:22 +02:00
|
|
|
try:
|
|
|
|
import simplejson as json
|
|
|
|
except ImportError:
|
|
|
|
import json
|
|
|
|
|
2008-10-13 15:28:44 +02:00
|
|
|
import re
|
2008-10-15 13:47:27 +02:00
|
|
|
import urllib
|
2008-10-13 15:28:44 +02:00
|
|
|
|
2010-07-18 21:30:32 +02:00
|
|
|
# See http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
|
|
|
|
# Currently missing: the WebM 480p and 720 formats; 3GP profile
|
2009-12-17 13:08:55 +01:00
|
|
|
supported_formats = [
|
2010-07-18 21:30:32 +02:00
|
|
|
(37, '37/1920x1080/9/0/115', '1920x1080 (HD)'),
|
|
|
|
(22, '22/1280x720/9/0/115', '1280x720 (HD)'),
|
|
|
|
(35, '35/854x480/9/0/115', '854x480'),
|
|
|
|
(34, '34/640x360/9/0/115', '640x360'),
|
2010-09-28 17:41:46 +02:00
|
|
|
(18, '18/640x360/9/0/115', '640x360 (iPod)'),
|
2010-07-18 21:30:32 +02:00
|
|
|
(18, '18/480x360/9/0/115', '480x360 (iPod)'),
|
|
|
|
(5, '5/320x240/7/0/0', '320x240 (FLV)'),
|
2009-12-17 13:08:55 +01:00
|
|
|
]
|
|
|
|
|
2010-12-18 14:32:33 +01:00
|
|
|
class YouTubeError(Exception): pass
|
|
|
|
|
2011-06-08 11:01:36 +02:00
|
|
|
def get_real_download_url(url, preferred_fmt_id=None):
|
|
|
|
# Default fmt_id when none preferred
|
|
|
|
if preferred_fmt_id is None:
|
|
|
|
preferred_fmt_id = 18
|
|
|
|
|
|
|
|
# For Maemo 5, we force fmt_id 5 for performance reasons
|
2011-07-03 18:21:37 +02:00
|
|
|
if gpodder.ui.fremantle and not gpodder.ui.harmattan:
|
2011-06-08 11:01:36 +02:00
|
|
|
preferred_fmt_id = 5
|
|
|
|
|
2009-01-26 12:37:21 +01:00
|
|
|
vid = get_youtube_id(url)
|
|
|
|
if vid is not None:
|
|
|
|
page = None
|
|
|
|
url = 'http://www.youtube.com/watch?v=' + vid
|
|
|
|
|
|
|
|
while page is None:
|
2009-12-17 13:08:55 +01:00
|
|
|
req = util.http_request(url, method='GET')
|
2009-01-26 12:37:21 +01:00
|
|
|
if 'location' in req.msg:
|
|
|
|
url = req.msg['location']
|
|
|
|
else:
|
|
|
|
page = req.read()
|
2008-11-06 21:49:03 +01:00
|
|
|
|
2009-09-11 02:07:54 +02:00
|
|
|
# Try to find the best video format available for this video
|
|
|
|
# (http://forum.videohelp.com/topic336882-1800.html#1912972)
|
2010-12-14 18:34:26 +01:00
|
|
|
def find_urls(page):
|
2011-08-04 04:51:49 +02:00
|
|
|
r4 = re.search('.*"url_encoded_fmt_stream_map"\:\s+"([^"]+)".*', page)
|
2010-12-14 18:34:26 +01:00
|
|
|
if r4 is not None:
|
|
|
|
fmt_url_map = r4.group(1)
|
|
|
|
for fmt_url_encoded in fmt_url_map.split(','):
|
2011-08-04 04:51:49 +02:00
|
|
|
video_info = dict(map(urllib.unquote, x.split('=', 1))
|
|
|
|
for x in fmt_url_encoded.split('\\u0026'))
|
|
|
|
|
|
|
|
yield int(video_info['itag']), video_info['url']
|
2010-12-14 18:34:26 +01:00
|
|
|
|
|
|
|
fmt_id_url_map = sorted(find_urls(page), reverse=True)
|
|
|
|
# Default to the highest fmt_id if we don't find a match below
|
2010-12-18 14:32:33 +01:00
|
|
|
if fmt_id_url_map:
|
|
|
|
default_fmt_id, default_url = fmt_id_url_map[0]
|
|
|
|
else:
|
|
|
|
raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid)
|
|
|
|
|
2010-12-14 18:34:26 +01:00
|
|
|
formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map)
|
|
|
|
fmt_id_url_map = dict(fmt_id_url_map)
|
2009-09-11 02:07:54 +02:00
|
|
|
|
2011-02-01 17:57:57 +01:00
|
|
|
if gpodder.ui.fremantle:
|
2009-12-17 13:08:55 +01:00
|
|
|
# This provides good quality video, seems to be always available
|
|
|
|
# and is playable fluently in Media Player
|
2010-04-03 01:33:12 +02:00
|
|
|
if preferred_fmt_id == 5:
|
|
|
|
fmt_id = 5
|
|
|
|
else:
|
|
|
|
fmt_id = 18
|
2009-12-17 13:08:55 +01:00
|
|
|
else:
|
|
|
|
# As a fallback, use fmt_id 18 (seems to be always available)
|
|
|
|
fmt_id = 18
|
|
|
|
|
|
|
|
# This will be set to True if the search below has already "seen"
|
|
|
|
# our preferred format, but has not yet found a suitable available
|
|
|
|
# format for the given video.
|
|
|
|
seen_preferred = False
|
|
|
|
|
|
|
|
for id, wanted, description in supported_formats:
|
|
|
|
# If we see our preferred format, accept formats below
|
|
|
|
if id == preferred_fmt_id:
|
|
|
|
seen_preferred = True
|
|
|
|
|
|
|
|
# If the format is available and preferred (or lower),
|
|
|
|
# use the given format for our fmt_id
|
2010-12-18 15:27:10 +01:00
|
|
|
if id in formats_available and seen_preferred:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Found YouTube format: %s (fmt_id=%d)',
|
2009-12-17 13:08:55 +01:00
|
|
|
description, id)
|
|
|
|
fmt_id = id
|
|
|
|
break
|
2009-05-05 09:23:36 +02:00
|
|
|
|
2010-12-14 18:34:26 +01:00
|
|
|
url = fmt_id_url_map.get(fmt_id, None)
|
|
|
|
if url is None:
|
|
|
|
url = default_url
|
2008-10-13 15:28:44 +02:00
|
|
|
|
|
|
|
return url
|
|
|
|
|
2009-01-26 12:37:21 +01:00
|
|
|
def get_youtube_id(url):
|
|
|
|
r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
|
|
|
|
if r is not None:
|
|
|
|
return r.group(1)
|
|
|
|
|
|
|
|
r = re.compile('http://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
|
|
|
|
if r is not None:
|
|
|
|
return r.group(1)
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
2009-09-09 19:53:26 +02:00
|
|
|
def is_video_link(url):
|
|
|
|
return (get_youtube_id(url) is not None)
|
|
|
|
|
2008-10-13 15:28:44 +02:00
|
|
|
def get_real_channel_url(url):
|
|
|
|
r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
|
|
|
|
m = r.match(url)
|
|
|
|
|
|
|
|
if m is not None:
|
|
|
|
next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.debug('YouTube link resolved: %s => %s', url, next)
|
2008-10-13 15:28:44 +02:00
|
|
|
return next
|
|
|
|
|
|
|
|
r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
|
|
|
|
m = r.match(url)
|
|
|
|
|
|
|
|
if m is not None:
|
|
|
|
next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.debug('YouTube link resolved: %s => %s', url, next)
|
2008-10-13 15:28:44 +02:00
|
|
|
return next
|
|
|
|
|
|
|
|
return url
|
2008-10-13 17:07:01 +02:00
|
|
|
|
|
|
|
def get_real_cover(url):
|
2009-09-11 02:07:54 +02:00
|
|
|
r = re.compile('http://www\.youtube\.com/rss/user/([^/]+)/videos\.rss', \
|
|
|
|
re.IGNORECASE)
|
2008-10-13 17:07:01 +02:00
|
|
|
m = r.match(url)
|
|
|
|
|
|
|
|
if m is not None:
|
2009-09-11 02:07:54 +02:00
|
|
|
username = m.group(1)
|
|
|
|
api_url = 'http://gdata.youtube.com/feeds/api/users/%s?v=2' % username
|
2009-11-14 02:09:21 +01:00
|
|
|
data = util.urlopen(api_url).read()
|
2009-09-11 02:07:54 +02:00
|
|
|
match = re.search('<media:thumbnail url=[\'"]([^\'"]+)[\'"]/>', data)
|
|
|
|
if match is not None:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.debug('YouTube userpic for %s is: %s', url, match.group(1))
|
2009-09-11 02:07:54 +02:00
|
|
|
return match.group(1)
|
2008-10-13 17:07:01 +02:00
|
|
|
|
|
|
|
return None
|
2008-10-14 18:15:01 +02:00
|
|
|
|
2008-10-15 13:47:27 +02:00
|
|
|
def find_youtube_channels(string):
|
2011-04-10 23:48:22 +02:00
|
|
|
url = 'http://gdata.youtube.com/feeds/api/videos?alt=json&q=%s' % urllib.quote(string, '')
|
|
|
|
data = json.load(util.urlopen(url))
|
2008-10-15 13:47:27 +02:00
|
|
|
|
2009-08-24 17:02:35 +02:00
|
|
|
class FakeImporter(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.items = []
|
2008-10-15 13:47:27 +02:00
|
|
|
|
2009-08-24 17:02:35 +02:00
|
|
|
result = FakeImporter()
|
2011-04-10 23:48:22 +02:00
|
|
|
|
|
|
|
seen_users = set()
|
|
|
|
for entry in data['feed']['entry']:
|
|
|
|
user = entry['author'][0]['name']['$t']
|
|
|
|
title = entry['title']['$t']
|
|
|
|
url = 'http://www.youtube.com/rss/user/%s/videos.rss' % user
|
|
|
|
if user not in seen_users:
|
|
|
|
result.items.append({
|
|
|
|
'title': user,
|
|
|
|
'url': url,
|
|
|
|
'description': title
|
|
|
|
})
|
|
|
|
seen_users.add(user)
|
2009-08-24 17:02:35 +02:00
|
|
|
|
|
|
|
return result
|
2008-10-15 13:47:27 +02:00
|
|
|
|