gpodder/src/gpodder/resolver.py

# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#  resolver.py -- YouTube and related magic
#  Justin Forest <justin.forest@gmail.com> 2008-10-13
#
# TODO:
#
#   * Channel covers.
#   * Support for Vimeo, maybe blip.tv and others.

import re
import urllib
import urllib2
import gtk
import gobject

import gpodder
from xml.sax import saxutils
from gpodder.liblogger import log
from gpodder.util import proxy_request

def get_real_download_url(url, proxy=None):
    # IDs from http://forum.videohelp.com/topic336882-1800.html#1912972
    if gpodder.interface == gpodder.MAEMO:
        # Use 3GP with AAC on Maemo
        fmt_id = 17
    else:
        # Use MP4 with AAC by default
        fmt_id = 18

    r1 = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
    if r1 is not None:
        page = proxy_request('http://www.youtube.com/watch?v=' + r1.group(1), proxy, method='GET').read()

        r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page)
        if r2:
            next = 'http://www.youtube.com/get_video?video_id=' + r1.group(1) + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id
            log('YouTube link resolved: %s => %s', url, next)
            return next

    return url

def get_real_channel_url(url):
    r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
    m = r.match(url)

    if m is not None:
        next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
        log('YouTube link resolved: %s => %s', url, next)
        return next

    r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
    m = r.match(url)

    if m is not None:
        next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
        log('YouTube link resolved: %s => %s', url, next)
        return next

    return url

def get_real_cover(url):
    log('Cover: %s', url)

    r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)
    m = r.match(url)

    if m is not None:
        data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()
        data = data[data.find('id="user-profile-image"'):]
        data = data[data.find('src="') + 5:]

        next = data[:data.find('"')]

        if next.strip() == '':
            return None

        log('YouTube userpic for %s is: %s', url, next)
        return next

    return None

def get_real_episode_length(episode):
    url = get_real_download_url(episode.url)

    if url != episode.url:
        try:
            info = urllib2.urlopen(url).info()
            if 'content-length' in info:
                return info['content-length']
        except urllib2.HTTPError:
            pass

    return 0

def find_youtube_channels(string):
    # FIXME: Make proper use of the YouTube API instead
    # of screen-scraping the YouTube website
    url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'

    r = re.compile('>\s+<')
    data = r.sub('><', urllib.urlopen(url).read())

    r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')
    m1 = r1.findall(data)

    r2 = re.compile('\s+')

    model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING, gobject.TYPE_STRING)

    found_users = []
    for (name, title) in m1:
        if name not in found_users:
            found_users.append(name)
            link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
            model.append([False, name, link])

    return model
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00			`# -- coding: utf-8 --`
			`#`
			`# gPodder - A media aggregator and podcast client`
Update copyright info from 2005-2008 to 2005-2009 As every year, we have to update the year info for gPodder in all our files throughout the source tree. 2009-02-01 21:22:21 +01:00			`# Copyright (c) 2005-2009 Thomas Perl and the gPodder Team`
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00			`#`
			`# gPodder is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation; either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# gPodder is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`#`
			`# resolver.py -- YouTube and related magic`
			`# Justin Forest <justin.forest@gmail.com> 2008-10-13`
			`#`
			`# TODO:`
			`#`
			`# * Channel covers.`
			`# * Support for Vimeo, maybe blip.tv and others.`

			`import re`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`import urllib`
Support for YouTube in CoverDownloader. 2008-10-13 17:07:01 +02:00			`import urllib2`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`import gtk`
			`import gobject`
Use lower-quality YouTube videos on Maemo (bug 297) On Maemo devices, we should reduce the quality of the videos that are downloaded so that the videos play fast enough on the tablet to be viewable. 2009-01-05 15:01:05 +01:00
			`import gpodder`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`from xml.sax import saxutils`
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00			`from gpodder.liblogger import log`
			`from gpodder.util import proxy_request`

			`def get_real_download_url(url, proxy=None):`
Use lower-quality YouTube videos on Maemo (bug 297) On Maemo devices, we should reduce the quality of the videos that are downloaded so that the videos play fast enough on the tablet to be viewable. 2009-01-05 15:01:05 +01:00			`# IDs from http://forum.videohelp.com/topic336882-1800.html#1912972`
			`if gpodder.interface == gpodder.MAEMO:`
			`# Use 3GP with AAC on Maemo`
			`fmt_id = 17`
			`else:`
			`# Use MP4 with AAC by default`
			`fmt_id = 18`

Updated YouTube downloader YouTube changed their server side logic and the old way of resolving enclosure URLs stopped working. The new method is taken from clive. 2008-11-06 21:49:03 +01:00			`r1 = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)`
			`if r1 is not None:`
			`page = proxy_request('http://www.youtube.com/watch?v=' + r1.group(1), proxy, method='GET').read()`

			`r2 = re.compile('."t"\:\s+"([^"]+)".').search(page)`
			`if r2:`
Use lower-quality YouTube videos on Maemo (bug 297) On Maemo devices, we should reduce the quality of the videos that are downloaded so that the videos play fast enough on the tablet to be viewable. 2009-01-05 15:01:05 +01:00			`next = 'http://www.youtube.com/get_video?video_id=' + r1.group(1) + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id`
Updated YouTube downloader YouTube changed their server side logic and the old way of resolving enclosure URLs stopped working. The new method is taken from clive. 2008-11-06 21:49:03 +01:00			`log('YouTube link resolved: %s => %s', url, next)`
			`return next`
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00
			`return url`

			`def get_real_channel_url(url):`
			`r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)`
			`m = r.match(url)`

			`if m is not None:`
			`next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'`
			`log('YouTube link resolved: %s => %s', url, next)`
			`return next`

			`r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)`
			`m = r.match(url)`

			`if m is not None:`
			`next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'`
			`log('YouTube link resolved: %s => %s', url, next)`
			`return next`

			`return url`
Support for YouTube in CoverDownloader. 2008-10-13 17:07:01 +02:00
			`def get_real_cover(url):`
			`log('Cover: %s', url)`

			`r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)`
			`m = r.match(url)`

			`if m is not None:`
			`data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()`
			`data = data[data.find('id="user-profile-image"'):]`
			`data = data[data.find('src="') + 5:]`

			`next = data[:data.find('"')]`

			`if next.strip() == '':`
			`return None`

			`log('YouTube userpic for %s is: %s', url, next)`
			`return next`

			`return None`
Retreive enclosure length for YouTube videos Issue HEAD requests to the real video if an episode has zero length during a channel update. Only affects YouTube at the moment. 2008-10-14 18:15:01 +02:00
			`def get_real_episode_length(episode):`
			`url = get_real_download_url(episode.url)`

			`if url != episode.url:`
			`try:`
			`info = urllib2.urlopen(url).info()`
			`if 'content-length' in info:`
			`return info['content-length']`
Fix incorrect handling of HTTPError 2008-10-15 12:13:05 +02:00			`except urllib2.HTTPError:`
Retreive enclosure length for YouTube videos Issue HEAD requests to the real video if an episode has zero length during a channel update. Only affects YouTube at the moment. 2008-10-14 18:15:01 +02:00			`pass`

			`return 0`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00
			`def find_youtube_channels(string):`
Fix broken YouTube search function (website changed) The YouTube website has changed, which caused our YouTube search function to fail. This patch fixes this, but also removes the user description from the podcast list. 2008-12-05 15:02:32 +01:00			`# FIXME: Make proper use of the YouTube API instead`
			`# of screen-scraping the YouTube website`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'`

			`r = re.compile('>\s+<')`
			`data = r.sub('><', urllib.urlopen(url).read())`

Fix broken YouTube search function (website changed) The YouTube website has changed, which caused our YouTube search function to fail. This patch fixes this, but also removes the user description from the podcast list. 2008-12-05 15:02:32 +01:00			`r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`m1 = r1.findall(data)`

			`r2 = re.compile('\s+')`

			`model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING, gobject.TYPE_STRING)`

Fix broken YouTube search function (website changed) The YouTube website has changed, which caused our YouTube search function to fail. This patch fixes this, but also removes the user description from the podcast list. 2008-12-05 15:02:32 +01:00			`found_users = []`
			`for (name, title) in m1:`
			`if name not in found_users:`
			`found_users.append(name)`
			`link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'`
			`model.append([False, name, link])`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00
			`return model`