Support for iTunes Podcast links (http://phobos.apple.com/...)

git-svn-id: svn://svn.berlios.de/gpodder/trunk@482 b0d088ad-0a06-0410-aad2-9ed5178a7e87
This commit is contained in:
Thomas Perl 2007-12-06 08:31:09 +00:00
parent dd90fbad42
commit b714659132
2 changed files with 92 additions and 0 deletions

View File

@ -1,3 +1,10 @@
Thu, 06 Dec 2007 09:29:09 +0100 <thp@perli.net>
Support for iTunes Podcast links (http://phobos.apple.com/...)
* src/gpodder/util.py: Add support for subscribing to iTunes Podcast
links (those with phobos.apple.com); new functions:
parse_itunes_xml(), http_get_and_gunzip() and itunes_dicover_rss()
Thu, 06 Dec 2007 09:15:57 +0100 <thp@perli.net>
Added Galician translation from Teo Ramirez

View File

@ -42,9 +42,14 @@ import re
import htmlentitydefs
import time
import locale
import gzip
import urlparse
import urllib
import urllib2
import StringIO
import xml.dom.minidom
def make_directory( path):
@ -73,8 +78,13 @@ def normalize_feed_url( url):
This will also normalize feed:// and itpc:// to http://
"""
if not url or len( url) < 8:
return None
url = itunes_discover_rss(url)
if url is None:
return None
if url.startswith( 'http://') or url.startswith( 'https://') or url.startswith( 'ftp://'):
return url
@ -461,3 +471,78 @@ def find_command( command):
return None
def parse_itunes_xml(doc):
"""
Parses an XML document in the "doc" parameter (this has to be
a string containing the XML document) and searches all "<dict>"
elements for the first occurence of a "<key>feedURL</key>"
element and then continues the search for the string value of
this key.
This returns the RSS feed URL for Apple iTunes Podcast XML
documents that are retrieved by itunes_discover_rss().
"""
d = xml.dom.minidom.parseString(doc)
last_key = None
for pairs in d.getElementsByTagName('dict'):
for node in pairs.childNodes:
if node.nodeType != node.ELEMENT_NODE:
continue
if node.tagName == 'key' and node.childNodes.length > 0:
if node.firstChild.nodeType == node.TEXT_NODE:
last_key = node.firstChild.data
if last_key != 'feedURL':
continue
if node.tagName == 'string' and node.childNodes.length > 0:
if node.firstChild.nodeType == node.TEXT_NODE:
return node.firstChild.data
return None
def http_get_and_gunzip(uri):
"""
Does a HTTP GET request and tells the server that we accept
gzip-encoded data. This is necessary, because the Apple iTunes
server will always return gzip-encoded data, regardless of what
we really request.
Returns the uncompressed document at the given URI.
"""
request = urllib2.Request(uri)
request.add_header("Accept-encoding", "gzip")
usock = urllib2.urlopen(request)
data = usock.read()
if usock.headers.get('content-encoding', None) == 'gzip':
data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
return data
def itunes_discover_rss(url):
"""
Takes an iTunes-specific podcast URL and turns it
into a "normal" RSS feed URL. If the given URL is
not a phobos.apple.com URL, we will simply return
the URL and assume it's already an RSS feed URL.
Idea from Andrew Clarke's itunes-url-decoder.py
"""
if not 'phobos.apple.com' in url.lower():
# This doesn't look like an iTunes URL
return url
try:
data = http_get_and_gunzip(url)
(url,) = re.findall("itmsOpen\('([^']*)", data)
url = url.replace('itms://', 'http://')
feed_data = http_get_and_gunzip(url)
return parse_itunes_xml(feed_data)
except:
return None