Feedcore: Successful updates should return value

This commit is contained in:
Thomas Perl 2012-07-10 11:57:59 +02:00
parent 972c045ad9
commit f2f187549d
2 changed files with 68 additions and 82 deletions

View File

@ -95,7 +95,6 @@ class ExceptionWithData(Exception):
def __str__(self):
return '%s: %s' % (self.__class__.__name__, str(self.data))
# Temporary errors
class Offline(Exception): pass
class BadRequest(Exception): pass
@ -111,11 +110,13 @@ class UnknownStatusCode(ExceptionWithData): pass
# Authentication error
class AuthenticationRequired(Exception): pass
# Successful parsing of the feed
class UpdatedFeed(ExceptionWithData): pass
class NewLocation(ExceptionWithData): pass
class NotModified(ExceptionWithData): pass
# Successful status codes
UPDATED_FEED, NEW_LOCATION, NOT_MODIFIED, CUSTOM_FEED = range(4)
class Result:
def __init__(self, status, feed=None):
self.status = status
self.feed = feed
class Fetcher(object):
@ -140,29 +141,24 @@ class Fetcher(object):
return None
def _autodiscover_feed(self, feed):
try:
# First, try all <link> elements if available
for link in feed.feed.get('links', ()):
is_feed = link.get('type', '') in self.FEED_TYPES
is_alternate = link.get('rel', '') == 'alternate'
url = link.get('href', None)
# First, try all <link> elements if available
for link in feed.feed.get('links', ()):
is_feed = link.get('type', '') in self.FEED_TYPES
is_alternate = link.get('rel', '') == 'alternate'
url = link.get('href', None)
if url and is_feed and is_alternate:
try:
self._parse_feed(url, None, None, False)
except UpdatedFeed, updated:
raise
except Exception:
pass
if url and is_feed and is_alternate:
try:
return self._parse_feed(url, None, None, False)
except Exception, e:
pass
# Second, try to resolve the URL
url = self._resolve_url(feed.href)
if url:
self._parse_feed(url, None, None, False)
except UpdatedFeed, updated:
raise NewLocation(updated.data)
except Exception, e:
pass
# Second, try to resolve the URL
url = self._resolve_url(feed.href)
if url:
result = self._parse_feed(url, None, None, False)
result.status = NEW_LOCATION
return result
def _check_offline(self, feed):
if not hasattr(feed, 'headers'):
@ -202,19 +198,22 @@ class Fetcher(object):
new_location = feed.feed.get('newlocation', None)
if new_location:
feed.href = feed.feed.newlocation
raise NewLocation(feed)
return Result(NEW_LOCATION, feed)
return None
def _check_statuscode(self, feed):
status = self._normalize_status(feed.status)
if status == 200:
raise UpdatedFeed(feed)
return Result(UPDATED_FEED, feed)
elif status == 301:
raise NewLocation(feed)
return Result(NEW_LOCATION, feed)
elif status == 302:
raise UpdatedFeed(feed)
return Result(UPDATED_FEED, feed)
elif status == 304:
raise NotModified(feed)
elif status == 400:
return Result(NOT_MODIFIED, feed)
if status == 400:
raise BadRequest('bad request')
elif status == 401:
raise AuthenticationRequired('authentication required')
@ -230,7 +229,6 @@ class Fetcher(object):
raise UnknownStatusCode(status)
def _parse_feed(self, url, etag, modified, autodiscovery=True):
"""Parse the feed and raise the result."""
if url.startswith('file://'):
is_local = True
url = url[len('file://'):]
@ -245,7 +243,7 @@ class Fetcher(object):
if is_local:
if feed.version:
feed.headers = {}
raise UpdatedFeed(feed)
return Result(UPDATED_FEED, feed)
else:
raise InvalidFeed('Not a valid feed file')
else:
@ -256,18 +254,15 @@ class Fetcher(object):
self._autodiscover_feed(feed)
self._check_valid_feed(feed)
self._check_rss_redirect(feed)
self._check_statuscode(feed)
redirect = self._check_rss_redirect(feed)
if redirect is not None:
return redirect
return self._check_statuscode(feed)
def fetch(self, url, etag=None, modified=None):
"""Download a feed, with optional etag an modified values
This method will always raise an exception that tells
the calling code the result of the fetch operation. See
the code for the feedcore module for all the possible
exception types.
"""
self._parse_feed(url, etag, modified)
return self._parse_feed(url, etag, modified)
def get_pubdate(entry):

View File

@ -50,8 +50,6 @@ import string
_ = gpodder.gettext
class CustomFeed(feedcore.ExceptionWithData): pass
class gPodderFetcher(feedcore.Fetcher):
"""
This class extends the feedcore Fetcher with the gPodder User-Agent and the
@ -72,8 +70,8 @@ class gPodderFetcher(feedcore.Fetcher):
for handler in self.custom_handlers:
custom_feed = handler.handle_url(url)
if custom_feed is not None:
raise CustomFeed(custom_feed)
self.fetch(url, etag, modified)
return feedcore.Result(feedcore.CUSTOM_FEED, custom_feed)
return self.fetch(url, etag, modified)
def _resolve_url(self, url):
url = youtube.get_real_channel_url(url)
@ -974,6 +972,15 @@ class PodcastChannel(PodcastModelObject):
if not self.title or self.title == self.url:
self.title = new_title
# Start YouTube- and Vimeo-specific title FIX
YOUTUBE_PREFIX = 'Uploads by '
VIMEO_PREFIX = 'Vimeo / '
if self.title.startswith(YOUTUBE_PREFIX):
self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
elif self.title.startswith(VIMEO_PREFIX):
self.title = self.title[len(VIMEO_PREFIX):] + ' on Vimeo'
# End YouTube- and Vimeo-specific title FIX
def _consume_custom_feed(self, custom_feed, max_episodes=0):
self._consume_updated_title(custom_feed.get_title())
self.link = custom_feed.get_link()
@ -1000,14 +1007,6 @@ class PodcastChannel(PodcastModelObject):
self.link = feed.feed.get('link', self.link)
self.description = feed.feed.get('subtitle', self.description)
# Start YouTube- and Vimeo-specific title FIX
YOUTUBE_PREFIX = 'Uploads by '
VIMEO_PREFIX = 'Vimeo / '
if self.title.startswith(YOUTUBE_PREFIX):
self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
elif self.title.startswith(VIMEO_PREFIX):
self.title = self.title[len(VIMEO_PREFIX):] + ' on Vimeo'
# End YouTube- and Vimeo-specific title FIX
if hasattr(feed.feed, 'image'):
for attribute in ('href', 'url'):
@ -1134,34 +1133,26 @@ class PodcastChannel(PodcastModelObject):
# Sort episodes by pubdate, descending
self.children.sort(key=lambda e: e.published, reverse=True)
def _update_etag_modified(self, feed):
self.http_etag = feed.headers.get('etag', self.http_etag)
self.http_last_modified = feed.headers.get('last-modified', self.http_last_modified)
def update(self, max_episodes=0):
try:
self.feed_fetcher.fetch_channel(self)
except CustomFeed, updated:
custom_feed = updated.data
self._consume_custom_feed(custom_feed, max_episodes)
self.save()
except feedcore.UpdatedFeed, updated:
feed = updated.data
self._consume_updated_feed(feed, max_episodes)
self._update_etag_modified(feed)
self.save()
except feedcore.NewLocation, updated:
feed = updated.data
logger.info('New feed location: %s => %s', self.url, feed.href)
if feed.href in set(x.url for x in self.model.get_podcasts()):
raise Exception('Already subscribed to ' + feed.href)
self.url = feed.href
self._consume_updated_feed(feed, max_episodes)
self._update_etag_modified(feed)
self.save()
except feedcore.NotModified, updated:
feed = updated.data
self._update_etag_modified(feed)
result = self.feed_fetcher.fetch_channel(self)
if result.status == feedcore.CUSTOM_FEED:
self._consume_custom_feed(result.feed, max_episodes)
elif result.status == feedcore.UPDATED_FEED:
self._consume_updated_feed(result.feed, max_episodes)
elif result.status == feedcore.NEW_LOCATION:
url = result.feed.href
logger.info('New feed location: %s => %s', self.url, url)
if url in set(x.url for x in self.model.get_podcasts()):
raise Exception('Already subscribed to ' + url)
self.url = url
self._consume_updated_feed(result.feed, max_episodes)
elif result.status == feedcore.NOT_MODIFIED:
pass
self.http_etag = result.feed.headers.get('etag', self.http_etag)
self.http_last_modified = result.feed.headers.get('last-modified', self.http_last_modified)
self.save()
except Exception, e:
# "Not really" errors