gpodder/src/gpodder/youtube.py

# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#  gpodder.youtube - YouTube and related magic
#  Justin Forest <justin.forest@gmail.com> 2008-10-13
#


import gpodder

from gpodder import util
from gpodder.liblogger import log

import re
import urllib
import urllib2

from xml.sax import saxutils

def get_real_download_url(url, proxy=None):
    # IDs from http://forum.videohelp.com/topic336882-1800.html#1912972
    if gpodder.interface == gpodder.MAEMO:
        # Use 3GP with AAC on Maemo
        fmt_id = 17
    else:
        # Use MP4 with AAC by default
        fmt_id = 18

    vid = get_youtube_id(url)
    if vid is not None:
        page = None
        url = 'http://www.youtube.com/watch?v=' + vid

        while page is None:
            req = util.proxy_request(url, proxy, method='GET')
            if 'location' in req.msg:
                url = req.msg['location']
            else:
                page = req.read()

        r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page)

        if gpodder.interface != gpodder.MAEMO:
            # Try to find the best video format available
            r3 = re.compile('.*"fmt_map"\:\s+"([^"]+)".*').search(page)
            formats = r3.group(1).split(",")
            if '18/512000/9/0/115' in formats: #[avc1]  480x270
                  fmt_id = 18
            elif '35/640000/9/0/115' in formats: #[H264]  480x360
                    fmt_id = 35
            elif '34/0/9/0/115' in formats: #[H264]  320x240
                    fmt_id = 34
            elif '5/0/7/0/0' in formats: #[FLV1]  320x240
                    fmt_id = 5

        if r2:
            next = 'http://www.youtube.com/get_video?video_id=' + vid + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id
            log('YouTube link resolved: %s => %s', url, next)
            return next

    return url

def get_youtube_id(url):
    r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
    if r is not None:
        return r.group(1)

    r = re.compile('http://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
    if r is not None:
        return r.group(1)

    return None

def get_real_channel_url(url):
    r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
    m = r.match(url)

    if m is not None:
        next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
        log('YouTube link resolved: %s => %s', url, next)
        return next

    r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
    m = r.match(url)

    if m is not None:
        next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
        log('YouTube link resolved: %s => %s', url, next)
        return next

    return url

def get_real_cover(url):
    log('Cover: %s', url)

    r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)
    m = r.match(url)

    if m is not None:
        data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()
        data = data[data.find('id="user-profile-image"'):]
        data = data[data.find('src="') + 5:]

        next = data[:data.find('"')]

        if next.strip() == '':
            return None

        log('YouTube userpic for %s is: %s', url, next)
        return next

    return None

def get_real_episode_length(episode):
    url = get_real_download_url(episode.url)

    if url != episode.url:
        try:
            info = urllib2.urlopen(url).info()
            if 'content-length' in info:
                return info['content-length']
        except urllib2.HTTPError:
            pass

    return 0

def find_youtube_channels(string):
    # FIXME: Make proper use of the YouTube API instead
    # of screen-scraping the YouTube website
    url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'

    r = re.compile('>\s+<')
    data = r.sub('><', urllib2.urlopen(url).read())

    r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')
    m1 = r1.findall(data)

    r2 = re.compile('\s+')

    class FakeImporter(object):
        def __init__(self):
            self.items = []

    result = FakeImporter()
    found_users = []
    for name, title in m1:
        if name not in found_users:
            found_users.append(name)
            link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
            result.items.append({'title': name, 'url': link, 'description': title})

    return result
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00			`# -- coding: utf-8 --`
			`#`
			`# gPodder - A media aggregator and podcast client`
Update copyright info from 2005-2008 to 2005-2009 As every year, we have to update the year info for gPodder in all our files throughout the source tree. 2009-02-01 21:22:21 +01:00			`# Copyright (c) 2005-2009 Thomas Perl and the gPodder Team`
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00			`#`
			`# gPodder is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation; either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# gPodder is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`#`
Rename gpodder.resolver to gpodder.youtube and clean it up Remove the GTK+ dependency in gpodder.youtube by making the only function that is in there fake a opml.Importer-like object, so we can use the model from gpodder.gtkui.opml to display the results instead of generating the model inside gpodder.youtube. 2009-08-24 17:02:35 +02:00			`# gpodder.youtube - YouTube and related magic`
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00			`# Justin Forest <justin.forest@gmail.com> 2008-10-13`
			`#`
Rename gpodder.resolver to gpodder.youtube and clean it up Remove the GTK+ dependency in gpodder.youtube by making the only function that is in there fake a opml.Importer-like object, so we can use the model from gpodder.gtkui.opml to display the results instead of generating the model inside gpodder.youtube. 2009-08-24 17:02:35 +02:00

			`import gpodder`

			`from gpodder import util`
			`from gpodder.liblogger import log`
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00
			`import re`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`import urllib`
Support for YouTube in CoverDownloader. 2008-10-13 17:07:01 +02:00			`import urllib2`
Use lower-quality YouTube videos on Maemo (bug 297) On Maemo devices, we should reduce the quality of the videos that are downloaded so that the videos play fast enough on the tablet to be viewable. 2009-01-05 15:01:05 +01:00
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`from xml.sax import saxutils`
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00
			`def get_real_download_url(url, proxy=None):`
Use lower-quality YouTube videos on Maemo (bug 297) On Maemo devices, we should reduce the quality of the videos that are downloaded so that the videos play fast enough on the tablet to be viewable. 2009-01-05 15:01:05 +01:00			`# IDs from http://forum.videohelp.com/topic336882-1800.html#1912972`
			`if gpodder.interface == gpodder.MAEMO:`
			`# Use 3GP with AAC on Maemo`
			`fmt_id = 17`
			`else:`
			`# Use MP4 with AAC by default`
			`fmt_id = 18`

Fixed the YouTube support. - Added support for redirects while retreiving episodes. - Added support for the new GData RSS format. 2009-01-26 12:37:21 +01:00			`vid = get_youtube_id(url)`
			`if vid is not None:`
			`page = None`
			`url = 'http://www.youtube.com/watch?v=' + vid`

			`while page is None:`
Rename gpodder.resolver to gpodder.youtube and clean it up Remove the GTK+ dependency in gpodder.youtube by making the only function that is in there fake a opml.Importer-like object, so we can use the model from gpodder.gtkui.opml to display the results instead of generating the model inside gpodder.youtube. 2009-08-24 17:02:35 +02:00			`req = util.proxy_request(url, proxy, method='GET')`
Fixed the YouTube support. - Added support for redirects while retreiving episodes. - Added support for the new GData RSS format. 2009-01-26 12:37:21 +01:00			`if 'location' in req.msg:`
			`url = req.msg['location']`
			`else:`
			`page = req.read()`
Updated YouTube downloader YouTube changed their server side logic and the old way of resolving enclosure URLs stopped working. The new method is taken from clive. 2008-11-06 21:49:03 +01:00
			`r2 = re.compile('."t"\:\s+"([^"]+)".').search(page)`
Fetch the best YouTube video format available (bug 428) Not every youtube video has a mp4 version. This patch fetches the best version known (except for Maemo). 2009-05-05 09:23:36 +02:00
			`if gpodder.interface != gpodder.MAEMO:`
			`# Try to find the best video format available`
			`r3 = re.compile('."fmt_map"\:\s+"([^"]+)".').search(page)`
			`formats = r3.group(1).split(",")`
			`if '18/512000/9/0/115' in formats: #[avc1] 480x270`
			`fmt_id = 18`
			`elif '35/640000/9/0/115' in formats: #[H264] 480x360`
			`fmt_id = 35`
			`elif '34/0/9/0/115' in formats: #[H264] 320x240`
			`fmt_id = 34`
			`elif '5/0/7/0/0' in formats: #[FLV1] 320x240`
			`fmt_id = 5`

Updated YouTube downloader YouTube changed their server side logic and the old way of resolving enclosure URLs stopped working. The new method is taken from clive. 2008-11-06 21:49:03 +01:00			`if r2:`
Fixed the YouTube support. - Added support for redirects while retreiving episodes. - Added support for the new GData RSS format. 2009-01-26 12:37:21 +01:00			`next = 'http://www.youtube.com/get_video?video_id=' + vid + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id`
Updated YouTube downloader YouTube changed their server side logic and the old way of resolving enclosure URLs stopped working. The new method is taken from clive. 2008-11-06 21:49:03 +01:00			`log('YouTube link resolved: %s => %s', url, next)`
			`return next`
YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00
			`return url`

Fixed the YouTube support. - Added support for redirects while retreiving episodes. - Added support for the new GData RSS format. 2009-01-26 12:37:21 +01:00			`def get_youtube_id(url):`
			`r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)`
			`if r is not None:`
			`return r.group(1)`

			`r = re.compile('http://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)`
			`if r is not None:`
			`return r.group(1)`

			`return None`

YouTube integration. Links to YouTube profiles are converted to the corresponding RSS feeds (http://www.youtube.com/rssls), which aren't available with the standard feed discovery. Normal links to YouTube enclosures (*.swf) are now on-the-fly replaced with links to high quality MP4 videos. Apparently links to real enclosures are not permanent, so they can't be saved in the database. 2008-10-13 15:28:44 +02:00			`def get_real_channel_url(url):`
			`r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)`
			`m = r.match(url)`

			`if m is not None:`
			`next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'`
			`log('YouTube link resolved: %s => %s', url, next)`
			`return next`

			`r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)`
			`m = r.match(url)`

			`if m is not None:`
			`next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'`
			`log('YouTube link resolved: %s => %s', url, next)`
			`return next`

			`return url`
Support for YouTube in CoverDownloader. 2008-10-13 17:07:01 +02:00
			`def get_real_cover(url):`
			`log('Cover: %s', url)`

			`r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)`
			`m = r.match(url)`

			`if m is not None:`
			`data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()`
			`data = data[data.find('id="user-profile-image"'):]`
			`data = data[data.find('src="') + 5:]`

			`next = data[:data.find('"')]`

			`if next.strip() == '':`
			`return None`

			`log('YouTube userpic for %s is: %s', url, next)`
			`return next`

			`return None`
Retreive enclosure length for YouTube videos Issue HEAD requests to the real video if an episode has zero length during a channel update. Only affects YouTube at the moment. 2008-10-14 18:15:01 +02:00
			`def get_real_episode_length(episode):`
			`url = get_real_download_url(episode.url)`

			`if url != episode.url:`
			`try:`
			`info = urllib2.urlopen(url).info()`
			`if 'content-length' in info:`
			`return info['content-length']`
Fix incorrect handling of HTTPError 2008-10-15 12:13:05 +02:00			`except urllib2.HTTPError:`
Retreive enclosure length for YouTube videos Issue HEAD requests to the real video if an episode has zero length during a channel update. Only affects YouTube at the moment. 2008-10-14 18:15:01 +02:00			`pass`

			`return 0`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00
			`def find_youtube_channels(string):`
Fix broken YouTube search function (website changed) The YouTube website has changed, which caused our YouTube search function to fail. This patch fixes this, but also removes the user description from the podcast list. 2008-12-05 15:02:32 +01:00			`# FIXME: Make proper use of the YouTube API instead`
			`# of screen-scraping the YouTube website`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'`

			`r = re.compile('>\s+<')`
Improve HTTP authentication handling (bug 525) Add some additional code and checks to make the use of password-protected podcasts more stable and easier to use. Thanks to Dan Ramos for the bug report. 2009-08-24 13:04:11 +02:00			`data = r.sub('><', urllib2.urlopen(url).read())`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00
Fix broken YouTube search function (website changed) The YouTube website has changed, which caused our YouTube search function to fail. This patch fixes this, but also removes the user description from the podcast list. 2008-12-05 15:02:32 +01:00			`r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00			`m1 = r1.findall(data)`

			`r2 = re.compile('\s+')`

Rename gpodder.resolver to gpodder.youtube and clean it up Remove the GTK+ dependency in gpodder.youtube by making the only function that is in there fake a opml.Importer-like object, so we can use the model from gpodder.gtkui.opml to display the results instead of generating the model inside gpodder.youtube. 2009-08-24 17:02:35 +02:00			`class FakeImporter(object):`
			`def __init__(self):`
			`self.items = []`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00
Rename gpodder.resolver to gpodder.youtube and clean it up Remove the GTK+ dependency in gpodder.youtube by making the only function that is in there fake a opml.Importer-like object, so we can use the model from gpodder.gtkui.opml to display the results instead of generating the model inside gpodder.youtube. 2009-08-24 17:02:35 +02:00			`result = FakeImporter()`
Fix broken YouTube search function (website changed) The YouTube website has changed, which caused our YouTube search function to fail. This patch fixes this, but also removes the user description from the podcast list. 2008-12-05 15:02:32 +01:00			`found_users = []`
Rename gpodder.resolver to gpodder.youtube and clean it up Remove the GTK+ dependency in gpodder.youtube by making the only function that is in there fake a opml.Importer-like object, so we can use the model from gpodder.gtkui.opml to display the results instead of generating the model inside gpodder.youtube. 2009-08-24 17:02:35 +02:00			`for name, title in m1:`
Fix broken YouTube search function (website changed) The YouTube website has changed, which caused our YouTube search function to fail. This patch fixes this, but also removes the user description from the podcast list. 2008-12-05 15:02:32 +01:00			`if name not in found_users:`
			`found_users.append(name)`
			`link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'`
Rename gpodder.resolver to gpodder.youtube and clean it up Remove the GTK+ dependency in gpodder.youtube by making the only function that is in there fake a opml.Importer-like object, so we can use the model from gpodder.gtkui.opml to display the results instead of generating the model inside gpodder.youtube. 2009-08-24 17:02:35 +02:00			`result.items.append({'title': name, 'url': link, 'description': title})`

			`return result`
New podcast finder dialog Replaces the old web OPML importer, includes top 50 podcasts from Podcast Alley and a YouTube channel finder. 2008-10-15 13:47:27 +02:00