Port from feedparser to podcastparser
This commit is contained in:
parent
28788865fb
commit
4189cf48fb
4
README
4
README
|
@ -31,7 +31,7 @@
|
|||
[ DEPENDENCIES ]
|
||||
|
||||
- Python 2.7 or newer http://python.org/
|
||||
- Feedparser 5.1.2 or newer http://code.google.com/p/feedparser/
|
||||
- Podcastparser 0.6.0 or newer http://gpodder.org/podcastparser/
|
||||
- mygpoclient 1.7 or newer http://gpodder.org/mygpoclient/
|
||||
- Python D-Bus bindings
|
||||
|
||||
|
@ -39,7 +39,7 @@
|
|||
the dummy (no-op) D-Bus module provided in "tools/fake-dbus-module/".
|
||||
|
||||
For quick testing, you can use the script tools/localdepends.py to
|
||||
install local copies of feedparser and mygpoclient into "src/" from
|
||||
install local copies of podcastparser and mygpoclient into "src/" from
|
||||
PyPI. With this, you get a self-contained gPodder CLI codebase.
|
||||
|
||||
|
||||
|
|
|
@ -37,12 +37,12 @@ import locale
|
|||
|
||||
# Check if real hard dependencies are available
|
||||
try:
|
||||
import feedparser
|
||||
import podcastparser
|
||||
except ImportError:
|
||||
print """
|
||||
Error: Module "feedparser" (python-feedparser) not found.
|
||||
The feedparser module can be downloaded from
|
||||
http://code.google.com/p/feedparser/
|
||||
Error: Module "podcastparser" (python-podcastparser) not found.
|
||||
The podcastparser module can be downloaded from
|
||||
http://gpodder.org/podcastparser/
|
||||
|
||||
From a source checkout, you can download local copies of all
|
||||
CLI dependencies for debugging (will be placed into "src/"):
|
||||
|
@ -50,7 +50,7 @@ except ImportError:
|
|||
python tools/localdepends.py
|
||||
"""
|
||||
sys.exit(1)
|
||||
del feedparser
|
||||
del podcastparser
|
||||
|
||||
try:
|
||||
import mygpoclient
|
||||
|
@ -58,7 +58,7 @@ except ImportError:
|
|||
print """
|
||||
Error: Module "mygpoclient" (python-mygpoclient) not found.
|
||||
The mygpoclient module can be downloaded from
|
||||
http://thp.io/2010/mygpoclient/
|
||||
http://gpodder.org/mygpoclient/
|
||||
|
||||
From a source checkout, you can download local copies of all
|
||||
CLI dependencies for debugging (will be placed into "src/"):
|
||||
|
|
|
@ -22,11 +22,17 @@
|
|||
# Thomas Perl <thp@gpodder.org>; 2009-06-11
|
||||
#
|
||||
|
||||
import feedparser
|
||||
import podcastparser
|
||||
|
||||
from gpodder import util
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from urllib2 import HTTPError
|
||||
from HTMLParser import HTMLParser
|
||||
import urlparse
|
||||
|
||||
try:
|
||||
# Python 2
|
||||
from rfc822 import mktime_tz
|
||||
|
@ -35,75 +41,6 @@ except ImportError:
|
|||
from email.utils import mktime_tz
|
||||
|
||||
|
||||
# Version check to avoid bug 1648
|
||||
feedparser_version = tuple(int(x) if x.isdigit() else x
|
||||
for x in feedparser.__version__.split('.'))
|
||||
feedparser_miniumum_version = (5, 1, 2)
|
||||
if feedparser_version < feedparser_miniumum_version:
|
||||
installed_version = feedparser.__version__
|
||||
required_version = '.'.join(str(x) for x in feedparser_miniumum_version)
|
||||
logger.warn('Your feedparser is too old. Installed: %s, recommended: %s',
|
||||
installed_version, required_version)
|
||||
|
||||
|
||||
def patch_feedparser():
|
||||
"""Monkey-patch the Universal Feed Parser"""
|
||||
# Detect the 'plain' content type as 'text/plain'
|
||||
# http://code.google.com/p/feedparser/issues/detail?id=80
|
||||
def mapContentType2(self, contentType):
|
||||
contentType = contentType.lower()
|
||||
if contentType == 'text' or contentType == 'plain':
|
||||
contentType = 'text/plain'
|
||||
elif contentType == 'html':
|
||||
contentType = 'text/html'
|
||||
elif contentType == 'xhtml':
|
||||
contentType = 'application/xhtml+xml'
|
||||
return contentType
|
||||
|
||||
try:
|
||||
if feedparser._FeedParserMixin().mapContentType('plain') == 'plain':
|
||||
feedparser._FeedParserMixin.mapContentType = mapContentType2
|
||||
except:
|
||||
pass
|
||||
|
||||
# Fix parsing of Media RSS with feedparser, as described here:
|
||||
# http://code.google.com/p/feedparser/issues/detail?id=100#c4
|
||||
def _start_media_content(self, attrsD):
|
||||
context = self._getContext()
|
||||
context.setdefault('media_content', [])
|
||||
context['media_content'].append(attrsD)
|
||||
|
||||
try:
|
||||
feedparser._FeedParserMixin._start_media_content = _start_media_content
|
||||
except:
|
||||
pass
|
||||
|
||||
# Fix problem with the EA.com official podcast
|
||||
# https://bugs.gpodder.org/show_bug.cgi?id=588
|
||||
if '*/*' not in feedparser.ACCEPT_HEADER.split(','):
|
||||
feedparser.ACCEPT_HEADER += ',*/*'
|
||||
|
||||
# Fix problem with YouTube feeds and pubDate/atom:modified
|
||||
# https://bugs.gpodder.org/show_bug.cgi?id=1492
|
||||
# http://code.google.com/p/feedparser/issues/detail?id=310
|
||||
def _end_updated(self):
|
||||
value = self.pop('updated')
|
||||
parsed_value = feedparser._parse_date(value)
|
||||
overwrite = ('youtube.com' not in self.baseuri)
|
||||
try:
|
||||
self._save('updated_parsed', parsed_value, overwrite=overwrite)
|
||||
except TypeError, te:
|
||||
logger.warn('Your feedparser version is too old: %s', te)
|
||||
|
||||
try:
|
||||
feedparser._FeedParserMixin._end_updated = _end_updated
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
patch_feedparser()
|
||||
|
||||
|
||||
class ExceptionWithData(Exception):
|
||||
"""Base exception with additional payload"""
|
||||
def __init__(self, data):
|
||||
|
@ -114,7 +51,6 @@ class ExceptionWithData(Exception):
|
|||
return '%s: %s' % (self.__class__.__name__, str(self.data))
|
||||
|
||||
# Temporary errors
|
||||
class Offline(Exception): pass
|
||||
class BadRequest(Exception): pass
|
||||
class InternalServerError(Exception): pass
|
||||
class WifiLogin(ExceptionWithData): pass
|
||||
|
@ -137,6 +73,26 @@ class Result:
|
|||
self.feed = feed
|
||||
|
||||
|
||||
class FeedAutodiscovery(HTMLParser):
|
||||
def __init__(self, base):
|
||||
HTMLParser.__init__(self)
|
||||
self._base = base
|
||||
self._resolved_url = None
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == 'link':
|
||||
attrs = dict(attrs)
|
||||
|
||||
is_feed = attrs.get('type', '') in Fetcher.FEED_TYPES
|
||||
is_alternate = attrs.get('rel', '') == 'alternate'
|
||||
url = attrs.get('href', None)
|
||||
url = urlparse.urljoin(self._base, url)
|
||||
|
||||
if is_feed and is_alternate and url:
|
||||
logger.info('Feed autodiscovery: %s', url)
|
||||
self._resolved_url = url
|
||||
|
||||
|
||||
class Fetcher(object):
|
||||
# Supported types, see http://feedvalidator.org/docs/warning/EncodingMismatch.html
|
||||
FEED_TYPES = ('application/rss+xml',
|
||||
|
@ -145,9 +101,6 @@ class Fetcher(object):
|
|||
'application/xml',
|
||||
'text/xml')
|
||||
|
||||
def __init__(self, user_agent):
|
||||
self.user_agent = user_agent
|
||||
|
||||
def _resolve_url(self, url):
|
||||
"""Provide additional ways of resolving an URL
|
||||
|
||||
|
@ -158,45 +111,6 @@ class Fetcher(object):
|
|||
"""
|
||||
return None
|
||||
|
||||
def _autodiscover_feed(self, feed):
|
||||
# First, try all <link> elements if available
|
||||
for link in feed.feed.get('links', ()):
|
||||
is_feed = link.get('type', '') in self.FEED_TYPES
|
||||
is_alternate = link.get('rel', '') == 'alternate'
|
||||
url = link.get('href', None)
|
||||
|
||||
if url and is_feed and is_alternate:
|
||||
try:
|
||||
return self._parse_feed(url, None, None, False)
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
# Second, try to resolve the URL
|
||||
url = self._resolve_url(feed.href)
|
||||
if url:
|
||||
result = self._parse_feed(url, None, None, False)
|
||||
result.status = NEW_LOCATION
|
||||
return result
|
||||
|
||||
def _check_offline(self, feed):
|
||||
if not hasattr(feed, 'headers'):
|
||||
raise Offline()
|
||||
|
||||
def _check_wifi_login_page(self, feed):
|
||||
html_page = 'text/html' in feed.headers.get('content-type', '')
|
||||
if not feed.version and feed.status == 302 and html_page:
|
||||
raise WifiLogin(feed.href)
|
||||
|
||||
def _check_valid_feed(self, feed):
|
||||
if feed is None:
|
||||
raise InvalidFeed('feed is None')
|
||||
|
||||
if not hasattr(feed, 'status'):
|
||||
raise InvalidFeed('feed has no status code')
|
||||
|
||||
if not feed.version and feed.status != 304 and feed.status != 401:
|
||||
raise InvalidFeed('unknown feed type')
|
||||
|
||||
def _normalize_status(self, status):
|
||||
# Based on Mark Pilgrim's "Atom aggregator behaviour" article
|
||||
if status in (200, 301, 302, 304, 400, 401, 403, 404, 410, 500):
|
||||
|
@ -212,16 +126,9 @@ class Fetcher(object):
|
|||
else:
|
||||
return status
|
||||
|
||||
def _check_rss_redirect(self, feed):
|
||||
new_location = feed.feed.get('newlocation', None)
|
||||
if new_location:
|
||||
feed.href = feed.feed.newlocation
|
||||
return Result(NEW_LOCATION, feed)
|
||||
def _check_statuscode(self, response, feed):
|
||||
status = self._normalize_status(response.getcode())
|
||||
|
||||
return None
|
||||
|
||||
def _check_statuscode(self, feed):
|
||||
status = self._normalize_status(feed.status)
|
||||
if status == 200:
|
||||
return Result(UPDATED_FEED, feed)
|
||||
elif status == 301:
|
||||
|
@ -247,69 +154,43 @@ class Fetcher(object):
|
|||
raise UnknownStatusCode(status)
|
||||
|
||||
def _parse_feed(self, url, etag, modified, autodiscovery=True):
|
||||
headers = {}
|
||||
if modified is not None:
|
||||
headers['If-Modified-Since'] = modified
|
||||
if etag is not None:
|
||||
headers['If-None-Match'] = etag
|
||||
|
||||
if url.startswith('file://'):
|
||||
is_local = True
|
||||
url = url[len('file://'):]
|
||||
stream = open(url)
|
||||
else:
|
||||
is_local = False
|
||||
try:
|
||||
stream = util.urlopen(url, headers)
|
||||
except HTTPError as e:
|
||||
return self._check_statuscode(e, e.geturl())
|
||||
|
||||
feed = feedparser.parse(url,
|
||||
agent=self.user_agent,
|
||||
modified=modified,
|
||||
etag=etag)
|
||||
if stream.headers.get('content-type', '').startswith('text/html'):
|
||||
if autodiscovery:
|
||||
ad = FeedAutodiscovery(url)
|
||||
ad.feed(stream.read())
|
||||
if ad._resolved_url:
|
||||
try:
|
||||
self._parse_feed(ad._resolved_url, None, None, False)
|
||||
return Result(NEW_LOCATION, ad._resolved_url)
|
||||
except Exception as e:
|
||||
logger.warn('Feed autodiscovery failed', exc_info=True)
|
||||
|
||||
if is_local:
|
||||
if feed.version:
|
||||
feed.headers = {}
|
||||
return Result(UPDATED_FEED, feed)
|
||||
else:
|
||||
raise InvalidFeed('Not a valid feed file')
|
||||
else:
|
||||
self._check_offline(feed)
|
||||
self._check_wifi_login_page(feed)
|
||||
# Second, try to resolve the URL
|
||||
url = self._resolve_url(url)
|
||||
if url:
|
||||
return Result(NEW_LOCATION, url)
|
||||
|
||||
if feed.status != 304 and not feed.version and autodiscovery:
|
||||
feed = self._autodiscover_feed(feed).feed
|
||||
raise InvalidFeed('Got HTML document instead')
|
||||
|
||||
self._check_valid_feed(feed)
|
||||
|
||||
redirect = self._check_rss_redirect(feed)
|
||||
if redirect is not None:
|
||||
return redirect
|
||||
|
||||
return self._check_statuscode(feed)
|
||||
feed = podcastparser.parse(url, stream)
|
||||
return self._check_statuscode(stream, feed)
|
||||
|
||||
def fetch(self, url, etag=None, modified=None):
|
||||
return self._parse_feed(url, etag, modified)
|
||||
|
||||
|
||||
def get_pubdate(entry):
|
||||
"""Try to determine the real pubDate of a feedparser entry
|
||||
|
||||
This basically takes the updated_parsed value, but also uses some more
|
||||
advanced techniques to work around various issues with ugly feeds.
|
||||
|
||||
"published" now also takes precedence over "updated" (with updated used as
|
||||
a fallback if published is not set/available). RSS' "pubDate" element is
|
||||
"updated", and will only be used if published_parsed is not available.
|
||||
|
||||
If parsing the date into seconds since epoch returns an error (date is
|
||||
before epoch or after the end of time), epoch is used as fallback.
|
||||
This fixes https://bugs.gpodder.org/show_bug.cgi?id=2023
|
||||
"""
|
||||
|
||||
pubdate = entry.get('published_parsed', None)
|
||||
|
||||
if pubdate is None:
|
||||
pubdate = entry.get('updated_parsed', None)
|
||||
|
||||
if pubdate is None:
|
||||
# Cannot determine pubdate - party like it's 1970!
|
||||
return 0
|
||||
|
||||
try:
|
||||
pubtimeseconds = mktime_tz(pubdate + (0,))
|
||||
return pubtimeseconds
|
||||
except(OverflowError,ValueError):
|
||||
logger.warn('bad pubdate %s is before epoch or after end of time (2038)',pubdate)
|
||||
return 0
|
||||
|
|
|
@ -44,7 +44,7 @@ import time
|
|||
import datetime
|
||||
|
||||
import hashlib
|
||||
import feedparser
|
||||
import podcastparser
|
||||
import collections
|
||||
import string
|
||||
|
||||
|
@ -60,12 +60,9 @@ class gPodderFetcher(feedcore.Fetcher):
|
|||
"""
|
||||
custom_handlers = []
|
||||
|
||||
def __init__(self):
|
||||
feedcore.Fetcher.__init__(self, gpodder.user_agent)
|
||||
|
||||
def fetch_channel(self, channel):
|
||||
etag = channel.http_etag
|
||||
modified = feedparser._parse_date(channel.http_last_modified)
|
||||
modified = podcastparser.parse_pubdate(channel.http_last_modified)
|
||||
# If we have a username or password, rebuild the url with them included
|
||||
# Note: using a HTTPBasicAuthHandler would be pain because we need to
|
||||
# know the realm. It can be done, but I think this method works, too
|
||||
|
@ -144,145 +141,54 @@ class PodcastEpisode(PodcastModelObject):
|
|||
youtube.is_video_link(self.link))
|
||||
|
||||
@classmethod
|
||||
def from_feedparser_entry(cls, entry, channel):
|
||||
def from_podcastparser_entry(cls, entry, channel):
|
||||
episode = cls(channel)
|
||||
episode.guid = entry.get('id', '')
|
||||
episode.guid = entry['guid']
|
||||
episode.title = entry['title']
|
||||
episode.link = entry['link']
|
||||
episode.description = entry['description']
|
||||
episode.total_time = entry['total_time']
|
||||
episode.published = entry['published']
|
||||
episode.payment_url = entry['payment_url']
|
||||
|
||||
# Replace multi-space and newlines with single space (Maemo bug 11173)
|
||||
episode.title = re.sub('\s+', ' ', entry.get('title', ''))
|
||||
episode.link = entry.get('link', '')
|
||||
if 'content' in entry and len(entry['content']) and \
|
||||
entry['content'][0].get('type', '') == 'text/html':
|
||||
episode.description = entry['content'][0].value
|
||||
else:
|
||||
episode.description = entry.get('summary', '')
|
||||
audio_available = any(enclosure['mime_type'].startswith('audio/') for enclosure in entry['enclosures'])
|
||||
video_available = any(enclosure['mime_type'].startswith('video/') for enclosure in entry['enclosures'])
|
||||
|
||||
# Fallback to subtitle if summary is not available
|
||||
if not episode.description:
|
||||
episode.description = entry.get('subtitle', '')
|
||||
|
||||
try:
|
||||
total_time = 0
|
||||
|
||||
# Parse iTunes-specific podcast duration metadata
|
||||
itunes_duration = entry.get('itunes_duration', '')
|
||||
if itunes_duration:
|
||||
total_time = util.parse_time(itunes_duration)
|
||||
|
||||
# Parse time from YouTube descriptions if it's a YouTube feed
|
||||
if youtube.is_youtube_guid(episode.guid):
|
||||
result = re.search(r'Time:<[^>]*>\n<[^>]*>([:0-9]*)<',
|
||||
episode.description)
|
||||
if result:
|
||||
youtube_duration = result.group(1)
|
||||
total_time = util.parse_time(youtube_duration)
|
||||
|
||||
episode.total_time = total_time
|
||||
except:
|
||||
pass
|
||||
|
||||
episode.published = feedcore.get_pubdate(entry)
|
||||
|
||||
enclosures = entry.get('enclosures', [])
|
||||
media_rss_content = entry.get('media_content', [])
|
||||
audio_available = any(e.get('type', '').startswith('audio/') \
|
||||
for e in enclosures + media_rss_content)
|
||||
video_available = any(e.get('type', '').startswith('video/') \
|
||||
for e in enclosures + media_rss_content)
|
||||
|
||||
# XXX: Make it possible for hooks/extensions to override this by
|
||||
# giving them a list of enclosures and the "self" object (podcast)
|
||||
# and letting them sort and/or filter the list of enclosures to
|
||||
# get the desired enclosure picked by the algorithm below.
|
||||
filter_and_sort_enclosures = lambda x: x
|
||||
|
||||
payment_info = [link['href'] for link in entry.get('links', [])
|
||||
if link['rel'] == 'payment']
|
||||
if payment_info:
|
||||
episode.payment_url = payment_info[0]
|
||||
|
||||
# Enclosures
|
||||
for e in filter_and_sort_enclosures(enclosures):
|
||||
episode.mime_type = e.get('type', 'application/octet-stream')
|
||||
if episode.mime_type == '':
|
||||
# See Maemo bug 10036
|
||||
logger.warn('Fixing empty mimetype in ugly feed')
|
||||
episode.mime_type = 'application/octet-stream'
|
||||
|
||||
if '/' not in episode.mime_type:
|
||||
continue
|
||||
for enclosure in entry['enclosures']:
|
||||
episode.mime_type = enclosure['mime_type']
|
||||
|
||||
# Skip images in feeds if audio or video is available (bug 979)
|
||||
# This must (and does) also look in Media RSS enclosures (bug 1430)
|
||||
if episode.mime_type.startswith('image/') and \
|
||||
(audio_available or video_available):
|
||||
if episode.mime_type.startswith('image/') and (audio_available or video_available):
|
||||
continue
|
||||
|
||||
# If we have audio or video available later on, skip
|
||||
# 'application/octet-stream' data types (fixes Linux Outlaws)
|
||||
if episode.mime_type == 'application/octet-stream' and \
|
||||
(audio_available or video_available):
|
||||
if episode.mime_type == 'application/octet-stream' and (audio_available or video_available):
|
||||
continue
|
||||
|
||||
episode.url = util.normalize_feed_url(e.get('href', ''))
|
||||
episode.url = util.normalize_feed_url(enclosure['url'])
|
||||
if not episode.url:
|
||||
continue
|
||||
|
||||
try:
|
||||
episode.file_size = int(e.length) or -1
|
||||
except:
|
||||
episode.file_size = -1
|
||||
|
||||
episode.file_size = enclosure['file_size']
|
||||
return episode
|
||||
|
||||
# Media RSS content
|
||||
for m in filter_and_sort_enclosures(media_rss_content):
|
||||
episode.mime_type = m.get('type', 'application/octet-stream')
|
||||
if '/' not in episode.mime_type:
|
||||
continue
|
||||
|
||||
# Skip images in Media RSS if we have audio/video (bug 1444)
|
||||
if episode.mime_type.startswith('image/') and \
|
||||
(audio_available or video_available):
|
||||
continue
|
||||
|
||||
episode.url = util.normalize_feed_url(m.get('url', ''))
|
||||
if not episode.url:
|
||||
continue
|
||||
|
||||
try:
|
||||
episode.file_size = int(m.get('filesize', 0)) or -1
|
||||
except:
|
||||
episode.file_size = -1
|
||||
|
||||
try:
|
||||
episode.total_time = int(m.get('duration', 0)) or 0
|
||||
except:
|
||||
episode.total_time = 0
|
||||
# Brute-force detection of the episode link
|
||||
episode.url = util.normalize_feed_url(entry['link'])
|
||||
if not episode.url:
|
||||
return None
|
||||
|
||||
if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo, escapist_videos)):
|
||||
return episode
|
||||
|
||||
# Brute-force detection of any links
|
||||
for l in entry.get('links', ()):
|
||||
episode.url = util.normalize_feed_url(l.get('href', ''))
|
||||
if not episode.url:
|
||||
continue
|
||||
# Check if we can resolve this link to a audio/video file
|
||||
filename, extension = util.filename_from_url(episode.url)
|
||||
file_type = util.file_type_by_extension(extension)
|
||||
|
||||
if ( youtube.is_video_link(episode.url) or \
|
||||
vimeo.is_video_link(episode.url) or \
|
||||
escapist_videos.is_video_link(episode.url) ):
|
||||
return episode
|
||||
|
||||
# Check if we can resolve this link to a audio/video file
|
||||
filename, extension = util.filename_from_url(episode.url)
|
||||
file_type = util.file_type_by_extension(extension)
|
||||
if file_type is None and hasattr(l, 'type'):
|
||||
extension = util.extension_from_mimetype(l.type)
|
||||
file_type = util.file_type_by_extension(extension)
|
||||
|
||||
# The link points to a audio or video file - use it!
|
||||
if file_type is not None:
|
||||
return episode
|
||||
# The link points to a audio or video file - use it!
|
||||
if file_type is not None:
|
||||
return episode
|
||||
|
||||
return None
|
||||
|
||||
|
@ -1009,29 +915,11 @@ class PodcastChannel(PodcastModelObject):
|
|||
self.remove_unreachable_episodes(existing, seen_guids, max_episodes)
|
||||
|
||||
def _consume_updated_feed(self, feed, max_episodes=0):
|
||||
# Cover art URL
|
||||
cover_url = None
|
||||
if hasattr(feed.feed, 'image'):
|
||||
for attribute in ('href', 'url'):
|
||||
new_value = getattr(feed.feed.image, attribute, None)
|
||||
if new_value is not None:
|
||||
cover_url = new_value
|
||||
elif hasattr(feed.feed, 'icon'):
|
||||
cover_url = feed.feed.icon
|
||||
|
||||
# Payment URL information
|
||||
payment_info = [link['href'] for link in feed.feed.get('links', [])
|
||||
if link['rel'] == 'payment']
|
||||
if payment_info:
|
||||
payment_url = payment_info[0]
|
||||
else:
|
||||
payment_url = None
|
||||
|
||||
self._consume_metadata(feed.feed.get('title', self.url),
|
||||
feed.feed.get('link', self.link),
|
||||
feed.feed.get('subtitle', self.description),
|
||||
cover_url,
|
||||
payment_url)
|
||||
self._consume_metadata(feed.get('title', self.url),
|
||||
feed.get('link', self.link),
|
||||
feed.get('description', ''),
|
||||
feed.get('cover_url', None),
|
||||
feed.get('payment_url', None))
|
||||
|
||||
# Load all episodes to update them properly.
|
||||
existing = self.get_all_episodes()
|
||||
|
@ -1040,7 +928,7 @@ class PodcastChannel(PodcastModelObject):
|
|||
# because if the feed lists items in ascending order and has >
|
||||
# max_episodes old episodes, new episodes will not be shown.
|
||||
# See also: gPodder Bug 1186
|
||||
entries = sorted(feed.entries, key=feedcore.get_pubdate, reverse=True)
|
||||
entries = sorted(feed.get('episodes', []), key=lambda episode: episode['published'], reverse=True)
|
||||
|
||||
# We can limit the maximum number of entries that gPodder will parse
|
||||
if max_episodes > 0 and len(entries) > max_episodes:
|
||||
|
@ -1060,18 +948,8 @@ class PodcastChannel(PodcastModelObject):
|
|||
|
||||
# Search all entries for new episodes
|
||||
for entry in entries:
|
||||
episode = self.EpisodeClass.from_feedparser_entry(entry, self)
|
||||
episode = self.EpisodeClass.from_podcastparser_entry(entry, self)
|
||||
if episode is not None:
|
||||
if not episode.title:
|
||||
logger.warn('Using filename as title for %s', episode.url)
|
||||
basename = os.path.basename(episode.url)
|
||||
episode.title, ext = os.path.splitext(basename)
|
||||
|
||||
# Maemo bug 12073
|
||||
if not episode.guid:
|
||||
logger.warn('Using download URL as GUID for %s', episode.title)
|
||||
episode.guid = episode.url
|
||||
|
||||
seen_guids.add(episode.guid)
|
||||
else:
|
||||
continue
|
||||
|
@ -1140,12 +1018,14 @@ class PodcastChannel(PodcastModelObject):
|
|||
elif result.status == feedcore.UPDATED_FEED:
|
||||
self._consume_updated_feed(result.feed, max_episodes)
|
||||
elif result.status == feedcore.NEW_LOCATION:
|
||||
url = result.feed.href
|
||||
url = result.feed
|
||||
logger.info('New feed location: %s => %s', self.url, url)
|
||||
if url in set(x.url for x in self.model.get_podcasts()):
|
||||
raise Exception('Already subscribed to ' + url)
|
||||
self.url = url
|
||||
self._consume_updated_feed(result.feed, max_episodes)
|
||||
# With the updated URL, fetch the feed again
|
||||
self.update(max_episodes)
|
||||
return
|
||||
elif result.status == feedcore.NOT_MODIFIED:
|
||||
pass
|
||||
|
||||
|
|
|
@ -62,8 +62,6 @@ import webbrowser
|
|||
import mimetypes
|
||||
import itertools
|
||||
|
||||
import feedparser
|
||||
|
||||
import StringIO
|
||||
import xml.dom.minidom
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ tmp_dir = tempfile.mkdtemp()
|
|||
|
||||
MODULES = [
|
||||
# Module name, Regex-file chooser (1st group = location in "src/")
|
||||
('feedparser', r'feedparser-[0-9.]+/feedparser/(feedparser.py)'),
|
||||
('podcastparser', r'podcastparser-[0-9.]+/(podcastparser.py)'),
|
||||
('mygpoclient', r'mygpoclient-[0-9.]+/(mygpoclient/[^/]*\.py)')
|
||||
]
|
||||
|
||||
|
|
|
@ -252,7 +252,7 @@ int main(int argc, char** argv)
|
|||
// decref GtkModule
|
||||
#endif
|
||||
|
||||
// XXX: Test for feedparser, mygpoclient, dbus
|
||||
// XXX: Test for podcastparser, mygpoclient, dbus
|
||||
|
||||
MainPy = (void*)PyFile_FromString(MAIN_MODULE, "r");
|
||||
if (MainPy == NULL) { BAILOUT("Cannot load main file") }
|
||||
|
|
Loading…
Reference in New Issue