gpodder/src/gpodder/feedcore.py

# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

#
# Generic feed fetching module for aggregators
# Thomas Perl <thp@gpodder.org>; 2009-06-11
#

import feedparser

try:
    # Python 2
    from rfc822 import mktime_tz
except ImportError:
    # Python 3
    from email.utils import mktime_tz

def patch_feedparser():
    """Monkey-patch the Universal Feed Parser"""
    # Detect the 'plain' content type as 'text/plain'
    # http://code.google.com/p/feedparser/issues/detail?id=80
    def mapContentType2(self, contentType):
        contentType = contentType.lower()
        if contentType == 'text' or contentType == 'plain':
            contentType = 'text/plain'
        elif contentType == 'html':
            contentType = 'text/html'
        elif contentType == 'xhtml':
            contentType = 'application/xhtml+xml'
        return contentType

    try:
        if feedparser._FeedParserMixin().mapContentType('plain') == 'plain':
            feedparser._FeedParserMixin.mapContentType = mapContentType2
    except:
        pass
    
    # Fix parsing of Media RSS with feedparser, as described here: 
    #   http://code.google.com/p/feedparser/issues/detail?id=100#c4
    def _start_media_content(self, attrsD):
        context = self._getContext()
        context.setdefault('media_content', [])
        context['media_content'].append(attrsD)
        
    try:
        feedparser._FeedParserMixin._start_media_content = _start_media_content
    except:
        pass

    # Fix problem with the EA.com official podcast
    # https://bugs.gpodder.org/show_bug.cgi?id=588
    if '*/*' not in feedparser.ACCEPT_HEADER.split(','):
        feedparser.ACCEPT_HEADER += ',*/*'

    # Fix problem with YouTube feeds and pubDate/atom:modified
    # https://bugs.gpodder.org/show_bug.cgi?id=1492
    # http://code.google.com/p/feedparser/issues/detail?id=310
    def _end_updated(self):
        value = self.pop('updated')
        parsed_value = feedparser._parse_date(value)
        overwrite = ('youtube.com' not in self.baseuri)
        self._save('updated_parsed', parsed_value, overwrite=overwrite)

    try:
        feedparser._FeedParserMixin._end_updated = _end_updated
    except:
        pass


patch_feedparser()


class ExceptionWithData(Exception):
    """Base exception with additional payload"""
    def __init__(self, data):
        Exception.__init__(self)
        self.data = data

    def __str__(self):
        return '%s: %s' % (self.__class__.__name__, str(self.data))


# Temporary errors
class Offline(Exception): pass
class BadRequest(Exception): pass
class InternalServerError(Exception): pass
class WifiLogin(ExceptionWithData): pass

# Fatal errors
class Unsubscribe(Exception): pass
class NotFound(Exception): pass
class InvalidFeed(Exception): pass
class UnknownStatusCode(ExceptionWithData): pass

# Authentication error
class AuthenticationRequired(Exception): pass

# Successful parsing of the feed
class UpdatedFeed(ExceptionWithData): pass
class NewLocation(ExceptionWithData): pass
class NotModified(ExceptionWithData): pass


class Fetcher(object):
    # Supported types, see http://feedvalidator.org/docs/warning/EncodingMismatch.html
    FEED_TYPES = ('application/rss+xml',
                  'application/atom+xml',
                  'application/rdf+xml',
                  'application/xml',
                  'text/xml')

    def __init__(self, user_agent):
        self.user_agent = user_agent

    def _resolve_url(self, url):
        """Provide additional ways of resolving an URL

        Subclasses can override this method to provide more
        ways of resolving a given URL to a feed URL. If the
        Fetcher is in "autodiscovery" mode, it will try this
        method as a last resort for coming up with a feed URL.
        """
        return None

    def _autodiscover_feed(self, feed):
        try:
            # First, try all <link> elements if available
            for link in feed.feed.get('links', ()):
                is_feed = link.get('type', '') in self.FEED_TYPES
                is_alternate = link.get('rel', '') == 'alternate'
                url = link.get('href', None)

                if url and is_feed and is_alternate:
                    try:
                        self._parse_feed(url, None, None, False)
                    except UpdatedFeed, updated:
                        raise
                    except Exception:
                        pass

            # Second, try to resolve the URL
            url = self._resolve_url(feed.href)
            if url:
                self._parse_feed(url, None, None, False)
        except UpdatedFeed, updated:
            raise NewLocation(updated.data)
        except Exception, e:
            pass

    def _check_offline(self, feed):
        if not hasattr(feed, 'headers'):
            raise Offline()

    def _check_wifi_login_page(self, feed):
        html_page = 'text/html' in feed.headers.get('content-type', '')
        if not feed.version and feed.status == 302 and html_page:
            raise WifiLogin(feed.href)

    def _check_valid_feed(self, feed):
        if feed is None:
            raise InvalidFeed('feed is None')

        if not hasattr(feed, 'status'):
            raise InvalidFeed('feed has no status code')

        if not feed.version and feed.status != 304 and feed.status != 401:
            raise InvalidFeed('unknown feed type')

    def _normalize_status(self, status):
        # Based on Mark Pilgrim's "Atom aggregator behaviour" article
        if status in (200, 301, 302, 304, 400, 401, 403, 404, 410, 500):
            return status
        elif status >= 200 and status < 300:
            return 200
        elif status >= 300 and status < 400:
            return 302
        elif status >= 400 and status < 500:
            return 400
        elif status >= 500 and status < 600:
            return 500
        else:
            return status

    def _check_rss_redirect(self, feed):
        new_location = feed.feed.get('newlocation', None)
        if new_location:
            feed.href = feed.feed.newlocation
            raise NewLocation(feed)

    def _check_statuscode(self, feed):
        status = self._normalize_status(feed.status)
        if status == 200:
            raise UpdatedFeed(feed)
        elif status == 301:
            raise NewLocation(feed)
        elif status == 302:
            raise UpdatedFeed(feed)
        elif status == 304:
            raise NotModified(feed)
        elif status == 400:
            raise BadRequest('bad request')
        elif status == 401:
            raise AuthenticationRequired('authentication required')
        elif status == 403:
            raise Unsubscribe('forbidden')
        elif status == 404:
            raise NotFound('not found')
        elif status == 410:
            raise Unsubscribe('resource is gone')
        elif status == 500:
            raise InternalServerError('internal server error')
        else:
            raise UnknownStatusCode(status)

    def _parse_feed(self, url, etag, modified, autodiscovery=True):
        """Parse the feed and raise the result."""
        if url.startswith('file://'):
            is_local = True
            url = url[len('file://'):]
        else:
            is_local = False

        feed = feedparser.parse(url,
                agent=self.user_agent,
                modified=modified,
                etag=etag)

        if is_local:
            if feed.version:
                feed.headers = {}
                raise UpdatedFeed(feed)
            else:
                raise InvalidFeed('Not a valid feed file')
        else:
            self._check_offline(feed)
            self._check_wifi_login_page(feed)

            if feed.status != 304 and not feed.version and autodiscovery:
                self._autodiscover_feed(feed)

            self._check_valid_feed(feed)
            self._check_rss_redirect(feed)
            self._check_statuscode(feed)

    def fetch(self, url, etag=None, modified=None):
        """Download a feed, with optional etag an modified values

        This method will always raise an exception that tells
        the calling code the result of the fetch operation. See
        the code for the feedcore module for all the possible
        exception types.
        """
        self._parse_feed(url, etag, modified)


def get_pubdate(entry):
    """Try to determine the real pubDate of a feedparser entry

    This basically takes the updated_parsed value, but also uses some more
    advanced techniques to work around various issues with ugly feeds.

    "published" now also takes precedence over "updated" (with updated used as
    a fallback if published is not set/available). RSS' "pubDate" element is
    "updated", and will only be used if published_parsed is not available.
    """

    pubdate = entry.get('published_parsed', None)

    if pubdate is None:
        pubdate = entry.get('updated_parsed', None)

    if pubdate is None:
        # See http://code.google.com/p/feedparser/issues/detail?id=327
        updated = entry.get('published', entry.get('updated', None))
        if updated is not None:
            # FIXME: This is kludgy. We should write our own date handler
            # and register it with feedparser.registerDateHandler() and/or
            # wait for feedparser to add support for this bogus date format.
            pubdate = feedparser._parse_date(updated.replace(',', ''))

    if pubdate is None:
        # Cannot determine pubdate - party like it's 1970!
        return 0

    return mktime_tz(pubdate + (0,))
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`# -- coding: utf-8 --`
			`#`
			`# gPodder - A media aggregator and podcast client`
Update copyright years (add 2012) 2012-01-09 21:19:24 +01:00			`# Copyright (c) 2005-2012 Thomas Perl and the gPodder Team`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`#`
			`# gPodder is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation; either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# gPodder is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`#`

			`#`
			`# Generic feed fetching module for aggregators`
Update website URL and e-mail address 2010-11-22 19:40:29 +01:00			`# Thomas Perl <thp@gpodder.org>; 2009-06-11`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`#`

			`import feedparser`

Feedcore: Improve parsing of publishing date This works around problems with invalid date formats and also prefers the "published" field over "updated". See also Feedparser issue 327: http://code.google.com/p/feedparser/issues/detail?id=327 2012-02-12 11:38:26 +01:00			`try:`
			`# Python 2`
			`from rfc822 import mktime_tz`
			`except ImportError:`
			`# Python 3`
			`from email.utils import mktime_tz`

First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`def patch_feedparser():`
Add EA.com compatibility to feedparser (bug 588) Modify feedparser's default "Accept:" header to also include a plain "/" content type, so that the web server at EA.com will happily return the RSS feed contents instead of an error. Thanks to Romain Janvier for reporting this bug. 2009-10-05 19:27:25 +02:00			`"""Monkey-patch the Universal Feed Parser"""`
			`# Detect the 'plain' content type as 'text/plain'`
			`# http://code.google.com/p/feedparser/issues/detail?id=80`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`def mapContentType2(self, contentType):`
			`contentType = contentType.lower()`
			`if contentType == 'text' or contentType == 'plain':`
			`contentType = 'text/plain'`
			`elif contentType == 'html':`
			`contentType = 'text/html'`
			`elif contentType == 'xhtml':`
			`contentType = 'application/xhtml+xml'`
			`return contentType`

			`try:`
			`if feedparser._FeedParserMixin().mapContentType('plain') == 'plain':`
			`feedparser._FeedParserMixin.mapContentType = mapContentType2`
			`except:`
			`pass`
Monkey-patch feedparser to support Media RSS (bug 507) Support Media RSS' media_content tag for Media RSS podcasts. See http://code.google.com/p/feedparser/issues/detail?id=100#c4 2009-07-18 16:23:21 +02:00
			`# Fix parsing of Media RSS with feedparser, as described here:`
			`# http://code.google.com/p/feedparser/issues/detail?id=100#c4`
			`def _start_media_content(self, attrsD):`
			`context = self._getContext()`
			`context.setdefault('media_content', [])`
			`context['media_content'].append(attrsD)`

			`try:`
			`feedparser._FeedParserMixin._start_media_content = _start_media_content`
			`except:`
			`pass`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00
Add EA.com compatibility to feedparser (bug 588) Modify feedparser's default "Accept:" header to also include a plain "/" content type, so that the web server at EA.com will happily return the RSS feed contents instead of an error. Thanks to Romain Janvier for reporting this bug. 2009-10-05 19:27:25 +02:00			`# Fix problem with the EA.com official podcast`
			`# https://bugs.gpodder.org/show_bug.cgi?id=588`
			`if '/' not in feedparser.ACCEPT_HEADER.split(','):`
			`feedparser.ACCEPT_HEADER += ',/'`

YouTube: Fix pubDate via monkey-patching (bug 1492) This is very dirty, but it works for now, and avoids the annoying sorting/publishing issues with YouTube. 2011-12-05 09:57:49 +01:00			`# Fix problem with YouTube feeds and pubDate/atom:modified`
			`# https://bugs.gpodder.org/show_bug.cgi?id=1492`
			`# http://code.google.com/p/feedparser/issues/detail?id=310`
			`def _end_updated(self):`
			`value = self.pop('updated')`
			`parsed_value = feedparser._parse_date(value)`
			`overwrite = ('youtube.com' not in self.baseuri)`
			`self._save('updated_parsed', parsed_value, overwrite=overwrite)`

			`try:`
			`feedparser._FeedParserMixin._end_updated = _end_updated`
			`except:`
			`pass`


First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`patch_feedparser()`


			`class ExceptionWithData(Exception):`
			`"""Base exception with additional payload"""`
			`def __init__(self, data):`
			`Exception.__init__(self)`
			`self.data = data`

			`def __str__(self):`
			`return '%s: %s' % (self.__class__.__name__, str(self.data))`


			`# Temporary errors`
			`class Offline(Exception): pass`
			`class BadRequest(Exception): pass`
			`class InternalServerError(Exception): pass`
			`class WifiLogin(ExceptionWithData): pass`

			`# Fatal errors`
			`class Unsubscribe(Exception): pass`
			`class NotFound(Exception): pass`
			`class InvalidFeed(Exception): pass`
			`class UnknownStatusCode(ExceptionWithData): pass`

			`# Authentication error`
			`class AuthenticationRequired(Exception): pass`

			`# Successful parsing of the feed`
			`class UpdatedFeed(ExceptionWithData): pass`
			`class NewLocation(ExceptionWithData): pass`
			`class NotModified(ExceptionWithData): pass`



			`class Fetcher(object):`
			`# Supported types, see http://feedvalidator.org/docs/warning/EncodingMismatch.html`
			`FEED_TYPES = ('application/rss+xml',`
			`'application/atom+xml',`
			`'application/rdf+xml',`
			`'application/xml',`
			`'text/xml')`

			`def __init__(self, user_agent):`
			`self.user_agent = user_agent`

			`def _resolve_url(self, url):`
			`"""Provide additional ways of resolving an URL`

			`Subclasses can override this method to provide more`
			`ways of resolving a given URL to a feed URL. If the`
			`Fetcher is in "autodiscovery" mode, it will try this`
			`method as a last resort for coming up with a feed URL.`
			`"""`
			`return None`

			`def _autodiscover_feed(self, feed):`
			`try:`
			`# First, try all <link> elements if available`
			`for link in feed.feed.get('links', ()):`
			`is_feed = link.get('type', '') in self.FEED_TYPES`
			`is_alternate = link.get('rel', '') == 'alternate'`
			`url = link.get('href', None)`

			`if url and is_feed and is_alternate:`
			`try:`
			`self._parse_feed(url, None, None, False)`
			`except UpdatedFeed, updated:`
			`raise`
			`except Exception:`
			`pass`

			`# Second, try to resolve the URL`
			`url = self._resolve_url(feed.href)`
			`if url:`
			`self._parse_feed(url, None, None, False)`
			`except UpdatedFeed, updated:`
			`raise NewLocation(updated.data)`
			`except Exception, e:`
			`pass`

			`def _check_offline(self, feed):`
			`if not hasattr(feed, 'headers'):`
			`raise Offline()`

			`def _check_wifi_login_page(self, feed):`
			`html_page = 'text/html' in feed.headers.get('content-type', '')`
			`if not feed.version and feed.status == 302 and html_page:`
			`raise WifiLogin(feed.href)`

			`def _check_valid_feed(self, feed):`
			`if feed is None:`
			`raise InvalidFeed('feed is None')`

			`if not hasattr(feed, 'status'):`
			`raise InvalidFeed('feed has no status code')`

Fix adding password-protected feeds (bug 544) Don't require feed.version when the feed could not be retrieve due to authentication request. 2009-09-02 18:46:00 +02:00			`if not feed.version and feed.status != 304 and feed.status != 401:`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`raise InvalidFeed('unknown feed type')`

			`def _normalize_status(self, status):`
			`# Based on Mark Pilgrim's "Atom aggregator behaviour" article`
			`if status in (200, 301, 302, 304, 400, 401, 403, 404, 410, 500):`
			`return status`
			`elif status >= 200 and status < 300:`
			`return 200`
			`elif status >= 300 and status < 400:`
			`return 302`
			`elif status >= 400 and status < 500:`
			`return 400`
			`elif status >= 500 and status < 600:`
			`return 500`
			`else:`
			`return status`

Support for RSS redirects in feedcore (bug 1292) Thanks to Ilkka Laukkanen for the initial feature request and for Kurt McKee and Stefan Kögl for the implementation in feedparser (since 5.0). This feature will only work with feedparser >= 5.0, but will gracefully degrade (ignore RSS redirects) with older versions of feedparser. 2011-08-08 21:51:19 +02:00			`def _check_rss_redirect(self, feed):`
			`new_location = feed.feed.get('newlocation', None)`
			`if new_location:`
			`feed.href = feed.feed.newlocation`
			`raise NewLocation(feed)`

First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`def _check_statuscode(self, feed):`
			`status = self._normalize_status(feed.status)`
			`if status == 200:`
			`raise UpdatedFeed(feed)`
			`elif status == 301:`
			`raise NewLocation(feed)`
			`elif status == 302:`
			`raise UpdatedFeed(feed)`
			`elif status == 304:`
			`raise NotModified(feed)`
			`elif status == 400:`
			`raise BadRequest('bad request')`
			`elif status == 401:`
			`raise AuthenticationRequired('authentication required')`
			`elif status == 403:`
			`raise Unsubscribe('forbidden')`
			`elif status == 404:`
			`raise NotFound('not found')`
			`elif status == 410:`
			`raise Unsubscribe('resource is gone')`
			`elif status == 500:`
			`raise InternalServerError('internal server error')`
			`else:`
			`raise UnknownStatusCode(status)`

			`def _parse_feed(self, url, etag, modified, autodiscovery=True):`
			`"""Parse the feed and raise the result."""`
Add support for file:// URLs for feeds (bug 1229) 2010-12-16 19:24:06 +01:00			`if url.startswith('file://'):`
			`is_local = True`
			`url = url[len('file://'):]`
			`else:`
			`is_local = False`

First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00			`feed = feedparser.parse(url,`
			`agent=self.user_agent,`
			`modified=modified,`
Feedcore: Cleanup unused code parts 2012-01-02 15:17:03 +01:00			`etag=etag)`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00
Add support for file:// URLs for feeds (bug 1229) 2010-12-16 19:24:06 +01:00			`if is_local:`
			`if feed.version:`
			`feed.headers = {}`
			`raise UpdatedFeed(feed)`
			`else:`
			`raise InvalidFeed('Not a valid feed file')`
			`else:`
			`self._check_offline(feed)`
			`self._check_wifi_login_page(feed)`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00
Add support for file:// URLs for feeds (bug 1229) 2010-12-16 19:24:06 +01:00			`if feed.status != 304 and not feed.version and autodiscovery:`
			`self._autodiscover_feed(feed)`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00
Add support for file:// URLs for feeds (bug 1229) 2010-12-16 19:24:06 +01:00			`self._check_valid_feed(feed)`
Support for RSS redirects in feedcore (bug 1292) Thanks to Ilkka Laukkanen for the initial feature request and for Kurt McKee and Stefan Kögl for the implementation in feedparser (since 5.0). This feature will only work with feedparser >= 5.0, but will gracefully degrade (ignore RSS redirects) with older versions of feedparser. 2011-08-08 21:51:19 +02:00			`self._check_rss_redirect(feed)`
Add support for file:// URLs for feeds (bug 1229) 2010-12-16 19:24:06 +01:00			`self._check_statuscode(feed)`
First cut of new feedcore module + related changes This might break a lot, but it makes feed parsing and updating so much cleaner, and also helps with error reporting, because exceptions are bubbled up to the UI. Removed: * Changing feed URL (this introduces too many problems) * Support for FTP proxy servers (who uses FTP for podcasts?) 2009-06-12 00:51:13 +02:00
			`def fetch(self, url, etag=None, modified=None):`
			`"""Download a feed, with optional etag an modified values`

			`This method will always raise an exception that tells`
			`the calling code the result of the fetch operation. See`
			`the code for the feedcore module for all the possible`
			`exception types.`
			`"""`
			`self._parse_feed(url, etag, modified)`

Feedcore: Improve parsing of publishing date This works around problems with invalid date formats and also prefers the "published" field over "updated". See also Feedparser issue 327: http://code.google.com/p/feedparser/issues/detail?id=327 2012-02-12 11:38:26 +01:00
			`def get_pubdate(entry):`
			`"""Try to determine the real pubDate of a feedparser entry`

			`This basically takes the updated_parsed value, but also uses some more`
			`advanced techniques to work around various issues with ugly feeds.`

			`"published" now also takes precedence over "updated" (with updated used as`
			`a fallback if published is not set/available). RSS' "pubDate" element is`
			`"updated", and will only be used if published_parsed is not available.`
			`"""`

			`pubdate = entry.get('published_parsed', None)`

			`if pubdate is None:`
			`pubdate = entry.get('updated_parsed', None)`

			`if pubdate is None:`
			`# See http://code.google.com/p/feedparser/issues/detail?id=327`
			`updated = entry.get('published', entry.get('updated', None))`
			`if updated is not None:`
			`# FIXME: This is kludgy. We should write our own date handler`
			`# and register it with feedparser.registerDateHandler() and/or`
			`# wait for feedparser to add support for this bogus date format.`
			`pubdate = feedparser._parse_date(updated.replace(',', ''))`

			`if pubdate is None:`
			`# Cannot determine pubdate - party like it's 1970!`
			`return 0`

			`return mktime_tz(pubdate + (0,))`