First cut of new feedcore module + related changes

This might break a lot, but it makes feed parsing and
updating so much cleaner, and also helps with error
reporting, because exceptions are bubbled up to the UI.

Removed:

 * Changing feed URL (this introduces too many problems)
 * Support for FTP proxy servers (who uses FTP for podcasts?)
This commit is contained in:
Thomas Perl 2009-06-12 00:51:13 +02:00
parent c7137ba0a5
commit 4c7088dfa3
10 changed files with 376 additions and 422 deletions

View File

@ -121,13 +121,6 @@
<signal handler="on_itemEditChannel_activate" name="activate"/>
</object>
</child>
<child>
<object class="GtkAction" id="item_change_podcast_feed_url">
<property name="name">item_change_podcast_feed_url</property>
<property name="label" translatable="yes">Change feed URL of selected podcast</property>
<signal handler="change_current_podcast_url" name="activate"/>
</object>
</child>
<child>
<object class="GtkAction" id="itemRemoveChannel">
<property name="stock_id">gtk-remove</property>
@ -406,7 +399,6 @@
<separator/>
<menuitem action="itemAddChannel"/>
<menuitem action="itemEditChannel"/>
<menuitem action="item_change_podcast_feed_url"/>
<menuitem action="itemRemoveChannel"/>
<separator/>
<menuitem action="itemUpdateChannel"/>

View File

@ -593,23 +593,6 @@
<property name="y_options"/>
</packing>
</child>
<child>
<object class="GtkEntry" id="ftpProxy">
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="max_length">0</property>
<property name="has_frame">True</property>
<property name="invisible_char">*</property>
<property name="activates_default">False</property>
</object>
<packing>
<property name="left_attach">2</property>
<property name="right_attach">3</property>
<property name="top_attach">1</property>
<property name="bottom_attach">2</property>
<property name="y_options"/>
</packing>
</child>
<child>
<object class="GtkLabel" id="label13">
<property name="visible">True</property>
@ -631,27 +614,6 @@
<property name="y_options"/>
</packing>
</child>
<child>
<object class="GtkLabel" id="label72">
<property name="visible">True</property>
<property name="label" translatable="yes">FTP Proxy:</property>
<property name="use_underline">False</property>
<property name="use_markup">False</property>
<property name="wrap">False</property>
<property name="selectable">False</property>
<property name="xalign">0</property>
<property name="width_chars">-1</property>
<property name="single_line_mode">False</property>
</object>
<packing>
<property name="left_attach">1</property>
<property name="right_attach">2</property>
<property name="top_attach">1</property>
<property name="bottom_attach">2</property>
<property name="x_options">fill</property>
<property name="y_options"/>
</packing>
</child>
<child>
<object class="GtkLabel" id="label73">
<property name="visible">True</property>

View File

@ -1,243 +0,0 @@
# -*- coding: utf-8 -*-
#
# python-feedcache (customized by Thomas Perl for use in gPodder)
#
# Copyright 2007 Doug Hellmann.
#
#
# All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software and
# its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
# copies and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of Doug
# Hellmann not be used in advertising or publicity pertaining to
# distribution of the software without specific, written prior
# permission.
#
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
import feedparser
import string
import re
import time
import urllib
import urlparse
import urllib2
import gpodder
from gpodder import resolver
from gpodder.liblogger import log
_ = gpodder.gettext
def patch_feedparser():
"""Fix a bug in feedparser 4.1
This replaces the mapContentType method of the
_FeedParserMixin class to correctly detect the
"plain" content type as "text/plain".
See also:
http://code.google.com/p/feedparser/issues/detail?id=80
Added by Thomas Perl for gPodder 2007-12-29
"""
def mapContentType2(self, contentType):
contentType = contentType.lower()
if contentType == 'text' or contentType == 'plain':
contentType = 'text/plain'
elif contentType == 'html':
contentType = 'text/html'
elif contentType == 'xhtml':
contentType = 'application/xhtml+xml'
return contentType
try:
if feedparser._FeedParserMixin().mapContentType('plain') == 'plain':
log('Patching feedparser module... (mapContentType bugfix)')
feedparser._FeedParserMixin.mapContentType = mapContentType2
except:
log('Warning: feedparser unpatched - might be broken!')
patch_feedparser()
class Cache:
"""A class to wrap Mark Pilgrim's Universal Feed Parser module
(http://www.feedparser.org) so that parameters can be used to
cache the feed results locally instead of fetching the feed every
time it is requested. Uses both etag and modified times for
caching.
"""
# Supported types, see http://feedvalidator.org/docs/warning/EncodingMismatch.html
SUPPORTED_FEED_TYPES = ('application/rss+xml', 'application/atom+xml',
'application/rdf+xml', 'application/xml', 'text/xml')
def __init__(self, timeToLiveSeconds=3600):
"""
Arguments:
storage -- Backing store for the cache. It should follow
the dictionary API, with URLs used as keys. It should
persist data.
timeToLiveSeconds=300 -- The length of time content should
live in the cache before an update is attempted.
"""
self.time_to_live = timeToLiveSeconds
self.user_agent = gpodder.user_agent
return
def fetch(self, url, old_channel=None, use_proxies=False,
http_proxy=None, ftp_proxy=None):
"""
Returns an (updated, feed) tuple for the feed at the specified
URL. If the feed hasn't updated since the last run, updated
will be False. If it has been updated, updated will be True.
If updated is False, the feed value is None and you have to use
the old channel which you passed to this function.
If use_proxies is set to True, the cache generates a ProxyHandler
from the http_proxy and ftp_proxy variables.
"""
if old_channel is not None:
etag = old_channel.etag
modified = feedparser._parse_date(old_channel.last_modified)
else:
etag = None
modified = None
original_url = url
# If we have a username or password, rebuild the url with them included
# Note: using a HTTPBasicAuthHandler would be pain because we need to
# know the realm. It can be done, but I think this method will work fine
if old_channel is not None and (
old_channel.username or old_channel.password ):
username = urllib.quote(old_channel.username)
password = urllib.quote(old_channel.password)
auth_string = string.join( [username, password], ':' )
url_parts = list(urlparse.urlsplit(url))
url_parts[1] = string.join( [auth_string, url_parts[1]], '@' )
url = urlparse.urlunsplit(url_parts)
handlers = []
if use_proxies:
# Add a ProxyHandler for fetching data via a proxy server
proxies = {}
if http_proxy:
proxies['http'] = http_proxy
log('Using proxy for HTTP: %s', http_proxy, sender=self)
if ftp_proxy:
proxies['ftp'] = ftp_proxy
log('Using proxy for FTP: %s', ftp_proxy, sender=self)
handlers.append(urllib2.ProxyHandler(proxies))
# We know we need to fetch, so go ahead and do it.
parsed_result = feedparser.parse(url,
agent=self.user_agent,
modified=modified,
etag=etag,
handlers=handlers,
)
# Sometimes, the status code is not set (ugly feed?)
status = parsed_result.get('status', None)
# 304: Not Modified
if status == 304:
log('Not Modified: %s', url, sender=self)
return (False, None)
if status == 401:
log('HTTP authentication required: %s', original_url, sender=self)
return (False, parsed_result)
if not hasattr(parsed_result, 'headers'):
log('The requested object does not have a "headers" attribute.', sender=self)
return (False, None)
content_type = parsed_result.headers.get('content-type', '').lower()
# TODO: Also detect OPML feeds and other content types here
if parsed_result.version == '':
log('%s looks like a webpage - trying feed autodiscovery.', url, sender=self)
if not hasattr(parsed_result.feed, 'links'):
return (False, None)
try:
found_alternate_feed = False
for link in parsed_result.feed.links:
if hasattr(link, 'type') and hasattr(link, 'href') and hasattr(link, 'rel'):
if link.type in self.SUPPORTED_FEED_TYPES and link.rel == 'alternate':
log('Found alternate feed link: %s', link.href, sender=self)
parsed_result = feedparser.parse(link.href,
agent=self.user_agent,
modified=modified,
etag=etag,
)
found_alternate_feed = True
break
# YouTube etc feed lookup (after the normal link lookup in case
# they provide a standard feed discovery mechanism in the future).
if not found_alternate_feed:
next = resolver.get_real_channel_url(url)
if next is not None:
parsed_result = feedparser.parse(next, agent=self.user_agent, modified=modified, etag=etag)
found_alternate_feed = True
# We have not found a valid feed - abort here!
if not found_alternate_feed:
return (False, None)
except:
log('Error while trying to get feed URL from webpage', sender=self, traceback=True)
updated = False
status = parsed_result.get('status', None)
if status == 304:
# No new data, based on the etag or modified values.
# We need to update the modified time in the
# storage, though, so we know that what we have
# stored is up to date.
log('Using cached feed: %s', url, sender=self)
elif status in (200, 301, 302, 303, 307):
# log('===============')
# log('[%s]', url)
# log('LM old: %s', old_channel.last_modified)
# log('LM new: %s', parsed_result.headers.get('last-modified'))
# log('=======')
# log('ET old: %s', old_channel.etag)
# log('ET new: %s', parsed_result.headers.get('etag'))
# log('===============')
updated = True
# There is new content, so store it unless there was an error.
# Store it regardless of errors when we don't have anything yet
error = parsed_result.get('bozo_exception')
# Detect HTTP authentication pages
if isinstance(error, feedparser.NonXMLContentType) and \
status == 302 and hasattr(c, 'headers') and \
c.header.get('content-type').startswith('text/html'):
log('Warning: Looks like a Wifi authentication page: %s', c.url, sender=self)
log('Acting as if the feed was not updated (FIXME!)', sender=self)
return (True, None)
if error:
log('Warning: %s (%s)', url, str(error), sender=self)
parsed_result['bozo_exception'] = str(error)
else:
log('Strange status code: %s (%s)', url, status, sender=self)
return (updated, parsed_result)

View File

@ -59,9 +59,6 @@ gPodderSettings = {
'http_proxy': ( str, '',
_("The URL for proxy to use for HTTP downloads. "
"Requires that 'proxy_use_environment' be 'False'")),
'ftp_proxy': ( str, '',
_("The URL for proxy to use for FTP downloads. "
"Requires that 'proxy_use_environment' be 'False'")),
'custom_sync_name': ( str, '{episode.basename}',
_("The name used when copying a file to a FS-based device. Available "
"options are: episode.basename, episode.title, episode.published")),
@ -92,7 +89,7 @@ gPodderSettings = {
_("Only sync episodes to a device that have not been marked played in gPodder.")),
'proxy_use_environment': ( bool, True,
_("Use your environment's (Eg. Gnome's) proxy settings instead of "
"gPodder's. See 'http_proxy' and 'ftp_proxy'.")),
"gPodder's. See 'http_proxy'.")),
'update_tags': ( bool, False,
_("Update tags on files after they've been downloaded. This sets the "
"artist tag to the title of the feed (can be customized in the feed's "

View File

@ -352,7 +352,7 @@ class Storage(object):
return data
def save_channel(self, c, bulk=False):
def save_channel(self, c):
if c.id is None:
c.id = self.find_channel_id(c.url)
@ -452,7 +452,7 @@ class Storage(object):
else:
return None
def save_episode(self, e, bulk=False):
def save_episode(self, e):
if not e.guid:
log('Refusing to save an episode without guid: %s', e)
return

View File

@ -163,12 +163,10 @@ class DownloadURLOpener(urllib.FancyURLopener):
def __init__( self, channel):
if gl.config.proxy_use_environment:
proxies = None
elif gl.config.http_proxy:
proxies = {'http': gl.config.http_proxy}
else:
proxies = {}
if gl.config.http_proxy:
proxies['http'] = gl.config.http_proxy
if gl.config.ftp_proxy:
proxies['ftp'] = gl.config.ftp_proxy
self.channel = channel
urllib.FancyURLopener.__init__( self, proxies)

233
src/gpodder/feedcore.py Normal file
View File

@ -0,0 +1,233 @@
# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
# Generic feed fetching module for aggregators
# Thomas Perl <thpinfo.com>; 2009-06-11
#
import feedparser
import urllib
import urlparse
import urllib2
def patch_feedparser():
"""Fix a bug in feedparser 4.1
This replaces the mapContentType method of the
_FeedParserMixin class to correctly detect the
"plain" content type as "text/plain".
See also:
http://code.google.com/p/feedparser/issues/detail?id=80
Added by Thomas Perl for gPodder 2007-12-29
"""
def mapContentType2(self, contentType):
contentType = contentType.lower()
if contentType == 'text' or contentType == 'plain':
contentType = 'text/plain'
elif contentType == 'html':
contentType = 'text/html'
elif contentType == 'xhtml':
contentType = 'application/xhtml+xml'
return contentType
try:
if feedparser._FeedParserMixin().mapContentType('plain') == 'plain':
feedparser._FeedParserMixin.mapContentType = mapContentType2
except:
pass
patch_feedparser()
class ExceptionWithData(Exception):
"""Base exception with additional payload"""
def __init__(self, data):
Exception.__init__(self)
self.data = data
def __str__(self):
return '%s: %s' % (self.__class__.__name__, str(self.data))
# Temporary errors
class Offline(Exception): pass
class BadRequest(Exception): pass
class InternalServerError(Exception): pass
class WifiLogin(ExceptionWithData): pass
# Fatal errors
class Unsubscribe(Exception): pass
class NotFound(Exception): pass
class InvalidFeed(Exception): pass
class UnknownStatusCode(ExceptionWithData): pass
# Authentication error
class AuthenticationRequired(Exception): pass
# Successful parsing of the feed
class UpdatedFeed(ExceptionWithData): pass
class NewLocation(ExceptionWithData): pass
class NotModified(ExceptionWithData): pass
class Fetcher(object):
# Supported types, see http://feedvalidator.org/docs/warning/EncodingMismatch.html
FEED_TYPES = ('application/rss+xml',
'application/atom+xml',
'application/rdf+xml',
'application/xml',
'text/xml')
def __init__(self, user_agent):
self.user_agent = user_agent
def _get_handlers(self):
"""Provide additional urllib2 handler objects
Subclasses can override this method to inject urllib2
handler objects into the feedparser.parse() call to
extent the functionalty of this Fetcher (for proxies, ..)
"""
return []
def _resolve_url(self, url):
"""Provide additional ways of resolving an URL
Subclasses can override this method to provide more
ways of resolving a given URL to a feed URL. If the
Fetcher is in "autodiscovery" mode, it will try this
method as a last resort for coming up with a feed URL.
"""
return None
def _autodiscover_feed(self, feed):
try:
# First, try all <link> elements if available
for link in feed.feed.get('links', ()):
is_feed = link.get('type', '') in self.FEED_TYPES
is_alternate = link.get('rel', '') == 'alternate'
url = link.get('href', None)
if url and is_feed and is_alternate:
try:
self._parse_feed(url, None, None, False)
except UpdatedFeed, updated:
raise
except Exception:
pass
# Second, try to resolve the URL
url = self._resolve_url(feed.href)
if url:
self._parse_feed(url, None, None, False)
except UpdatedFeed, updated:
raise NewLocation(updated.data)
except Exception, e:
pass
def _check_offline(self, feed):
if not hasattr(feed, 'headers'):
raise Offline()
def _check_wifi_login_page(self, feed):
html_page = 'text/html' in feed.headers.get('content-type', '')
if not feed.version and feed.status == 302 and html_page:
raise WifiLogin(feed.href)
def _check_valid_feed(self, feed):
if feed is None:
raise InvalidFeed('feed is None')
if not hasattr(feed, 'status'):
raise InvalidFeed('feed has no status code')
if not feed.version and feed.status != 304:
raise InvalidFeed('unknown feed type')
def _normalize_status(self, status):
# Based on Mark Pilgrim's "Atom aggregator behaviour" article
if status in (200, 301, 302, 304, 400, 401, 403, 404, 410, 500):
return status
elif status >= 200 and status < 300:
return 200
elif status >= 300 and status < 400:
return 302
elif status >= 400 and status < 500:
return 400
elif status >= 500 and status < 600:
return 500
else:
return status
def _check_statuscode(self, feed):
status = self._normalize_status(feed.status)
if status == 200:
raise UpdatedFeed(feed)
elif status == 301:
raise NewLocation(feed)
elif status == 302:
raise UpdatedFeed(feed)
elif status == 304:
raise NotModified(feed)
elif status == 400:
raise BadRequest('bad request')
elif status == 401:
raise AuthenticationRequired('authentication required')
elif status == 403:
raise Unsubscribe('forbidden')
elif status == 404:
raise NotFound('not found')
elif status == 410:
raise Unsubscribe('resource is gone')
elif status == 500:
raise InternalServerError('internal server error')
else:
raise UnknownStatusCode(status)
def _parse_feed(self, url, etag, modified, autodiscovery=True):
"""Parse the feed and raise the result."""
feed = feedparser.parse(url,
agent=self.user_agent,
modified=modified,
etag=etag,
handlers=self._get_handlers())
self._check_offline(feed)
self._check_wifi_login_page(feed)
if feed.status != 304 and not feed.version and autodiscovery:
self._autodiscover_feed(feed)
self._check_valid_feed(feed)
self._check_statuscode(feed)
def fetch(self, url, etag=None, modified=None):
"""Download a feed, with optional etag an modified values
This method will always raise an exception that tells
the calling code the result of the fetch operation. See
the code for the feedcore module for all the possible
exception types.
"""
self._parse_feed(url, etag, modified)

View File

@ -66,6 +66,7 @@ except ImportError:
pass
from gpodder import feedcore
from gpodder import libtagupdate
from gpodder import util
from gpodder import opml
@ -97,7 +98,6 @@ from libpodcasts import load_channels
from libpodcasts import update_channels
from libpodcasts import save_channels
from libpodcasts import can_restore_from_opml
from libpodcasts import HTTPAuthError
from gpodder.libgpodder import gl
@ -1264,17 +1264,6 @@ class gPodder(BuilderWidget, dbus.service.Object):
menu.popup(None, None, None, event.button, event.time)
return True
def change_current_podcast_url(self, *args):
if self.active_channel is None:
return
url_callback = lambda new_url: self.change_channel_url(self.active_channel, new_url)
gPodderAddPodcastDialog(url_callback=url_callback, \
custom_title=_('Change feed URL of %s') % self.active_channel.title, \
custom_label=_('Change to:'), \
preset_url=self.active_channel.url, \
btn_add_stock_id=_('Change URL'))
def treeview_channels_button_pressed( self, treeview, event):
global WEB_BROWSER_ICON
@ -1354,10 +1343,6 @@ class gPodder(BuilderWidget, dbus.service.Object):
item.connect( 'activate', self.on_itemEditChannel_activate)
menu.append( item)
item = gtk.ImageMenuItem(_('Change feed URL'))
item.connect('activate', self.change_current_podcast_url)
menu.append(item)
item = gtk.ImageMenuItem(gtk.STOCK_DELETE)
item.connect( 'activate', self.on_itemRemoveChannel_activate)
menu.append( item)
@ -1767,6 +1752,8 @@ class gPodder(BuilderWidget, dbus.service.Object):
url = model.get_value( model.get_iter( path), 0)
episode = self.active_channel.find_episode(url)
if episode is None:
continue
if episode.file_type() not in ('audio', 'video'):
open_instead_of_play = True
@ -2013,7 +2000,9 @@ class gPodder(BuilderWidget, dbus.service.Object):
channel = error = None
try:
channel = PodcastChannel.load(url=url, create=True, authentication_tokens=authentication_tokens)
except HTTPAuthError, e:
except feedcore.AuthenticationRequired, e:
error = e
except feedcore.WifiLogin, e:
error = e
except Exception, e:
log('Error in PodcastChannel.load(%s): %s', url, e, traceback=True, sender=self)
@ -2052,21 +2041,29 @@ class gPodder(BuilderWidget, dbus.service.Object):
if len(new_episodes):
self.new_episodes_show(new_episodes)
elif isinstance( error, HTTPAuthError ):
elif isinstance(error, feedcore.AuthenticationRequired):
response, auth_tokens = self.UsernamePasswordDialog(
_('Feed requires authentication'), _('Please enter your username and password.'))
if response:
self.add_new_channel( url, authentication_tokens=auth_tokens )
elif isinstance(error, feedcore.WifiLogin):
if self.show_confirmation(_('The URL you are trying to add redirects to the website %s. Do you want to visit the website to login now?') % saxutils.escape(error.data), _('Website redirection detected')):
util.open_website(error.data)
if self.show_confirmation(_('Please login to the website now. Should I retry subscribing to the podcast at %s?') % saxutils.escape(url), _('Retry adding channel')):
self.add_new_channel(url)
else:
# Ok, the URL is not a channel, or there is some other
# error - let's see if it's a web page or OPML file...
handled = False
try:
data = urllib2.urlopen(url).read().lower()
if '</opml>' in data:
# This looks like an OPML feed
self.on_item_import_from_file_activate(None, url)
handled = True
elif '</html>' in data:
# This looks like a web page
@ -2074,13 +2071,15 @@ class gPodder(BuilderWidget, dbus.service.Object):
message = _('The URL you specified points to a web page. You need to find the "feed" URL of the podcast to add to gPodder. Do you want to visit this website now and look for the podcast feed URL?\n\n(Hint: Look for "XML feed", "RSS feed" or "Podcast feed" if you are unsure for what to look. If there is only an iTunes URL, try adding this one.)')
if self.show_confirmation(message, title):
util.open_website(url)
handled = True
except Exception, e:
log('Error trying to handle the URL as OPML or web page: %s', e, sender=self)
title = _('Error adding podcast')
message = _('The podcast could not be added. Please check the spelling of the URL or try again later.')
self.show_message( message, title)
if not handled:
title = _('Error adding podcast')
message = _('The podcast could not be added. Please check the spelling of the URL or try again later.')
self.show_message( message, title)
self.entryAddChannel.set_text(self.ENTER_URL_TEXT)
self.entryAddChannel.set_sensitive(True)
@ -2148,7 +2147,15 @@ class gPodder(BuilderWidget, dbus.service.Object):
for updated, channel in enumerate(channels):
if not self.feed_cache_update_cancelled:
channel.update()
try:
channel.update()
except feedcore.Offline:
self.feed_cache_update_cancelled = True
if not self.minimized:
util.idle_add(self.show_message, _('The feed update has been cancelled because you appear to be offline.'), _('Cannot connect to server'))
break
except Exception, e:
util.idle_add(self.show_message, _('There has been an error updating %s: %s') % (saxutils.escape(channel.url), saxutils.escape(str(e))), _('Error while updating feed'))
# By the time we get here the update may have already been cancelled
if not self.feed_cache_update_cancelled:
@ -2896,35 +2903,6 @@ class gPodder(BuilderWidget, dbus.service.Object):
gPodderChannel(channel=self.active_channel, callback_closed=lambda: self.updateComboBox(only_selected_channel=True))
def change_channel_url(self, channel, new_url):
old_url = channel.url
if old_url == new_url:
log('Channel URL %s unchanged.', old_url, sender=self)
return
else:
log('Changing channel URL from %s to %s', old_url, new_url, sender=self)
channel.url = new_url
# remove etag and last_modified to force an update
channel.etag = ''
channel.last_modified = ''
# Remove old episodes which haven't been downloaded.
db.delete_empty_episodes(channel.id)
(success, error) = channel.update()
if not success:
self.show_message(_('The specified URL is invalid. The old URL has been used instead.'), _('Invalid URL'))
channel.url = old_url
else:
# Only allow the last podcast to be new when changing URLs
db.force_last_new(channel)
# Update the OPML file.
save_channels(self.channels)
# update feed cache and select the podcast with the new URL afterwards
self.update_feed_cache(force_update=False, select_url_afterwards=channel.url)
def on_itemRemoveChannel_activate(self, widget, *args):
try:
if gpodder.interface == gpodder.GUI:
@ -3630,7 +3608,6 @@ class gPodderProperties(BuilderWidget):
self.gPodderProperties.fullscreen()
gl.config.connect_gtk_editable( 'http_proxy', self.httpProxy)
gl.config.connect_gtk_editable( 'ftp_proxy', self.ftpProxy)
gl.config.connect_gtk_editable( 'player', self.openApp)
gl.config.connect_gtk_editable('videoplayer', self.openVideoApp)
gl.config.connect_gtk_editable( 'custom_sync_name', self.entryCustomSyncName)
@ -3847,7 +3824,6 @@ class gPodderProperties(BuilderWidget):
def on_cbEnvironmentVariables_toggled(self, widget, *args):
sens = not self.cbEnvironmentVariables.get_active()
self.httpProxy.set_sensitive( sens)
self.ftpProxy.set_sensitive( sens)
def on_comboboxDeviceType_changed(self, widget, *args):
active_item = self.comboboxDeviceType.get_active()

View File

@ -175,7 +175,7 @@ class gPodderLib(object):
episode.is_played = True
if (episode.file_exists()):
episode.mark(state=db.STATE_DOWNLOADED)
episode.save(bulk=True)
episode.save()
p += p_step_2
# flush the localdb updates for this channel
status_callback(p, _('Writing changes to database'))

View File

@ -34,7 +34,7 @@ import pango
import gpodder
from gpodder import util
from gpodder import opml
from gpodder import cache
from gpodder import feedcore
from gpodder import services
from gpodder import draw
from gpodder import libtagupdate
@ -79,7 +79,45 @@ else:
ICON_NEW = gtk.STOCK_ABOUT
class HTTPAuthError(Exception): pass
class gPodderFetcher(feedcore.Fetcher):
"""
This class extends the feedcore Fetcher with the gPodder User-Agent and the
Proxy handler based on the current settings in gPodder and provides a
convenience method (fetch_channel) for use by PodcastChannel objects.
"""
def __init__(self):
feedcore.Fetcher.__init__(self, gpodder.user_agent)
def fetch_channel(self, channel):
etag = channel.etag
modified = feedparser._parse_date(channel.last_modified)
# If we have a username or password, rebuild the url with them included
# Note: using a HTTPBasicAuthHandler would be pain because we need to
# know the realm. It can be done, but I think this method works, too
if channel.username or channel.password:
username = urllib.quote(channel.username)
password = urllib.quote(channel.password)
auth_string = ':'.join((username, password))
url_parts = list(urlparse.urlsplit(url))
url_parts[1] = '@'.join((auth_string, url_parts[1]))
url = urlparse.urlunsplit(url_parts)
else:
url = channel.url
self.fetch(url, etag, modified)
def _resolve_url(self, url):
return resolver.get_real_channel_url(url)
def _get_handlers(self):
handlers = []
if not gl.config.proxy_use_environment:
# Add a ProxyHandler for fetching data via a proxy server
proxies = {}
if gl.config.http_proxy:
proxies['http'] = gl.config.http_proxy
handlers.append(urllib2.ProxyHandler(proxies))
return handlers
class PodcastModelObject(object):
@ -113,7 +151,7 @@ class PodcastChannel(PodcastModelObject):
MAX_FOLDERNAME_LENGTH = 150
icon_cache = {}
fc = cache.Cache()
feed_fetcher = gPodderFetcher()
@classmethod
def load(cls, url, create=True, authentication_tokens=None):
@ -128,12 +166,8 @@ class PodcastChannel(PodcastModelObject):
if authentication_tokens is not None:
tmp.username = authentication_tokens[0]
tmp.password = authentication_tokens[1]
success, error_code = tmp.update()
if not success:
if error_code == 401:
raise HTTPAuthError
else:
return None
tmp.update()
tmp.save()
db.force_last_new(tmp)
return tmp
@ -148,78 +182,43 @@ class PodcastChannel(PodcastModelObject):
"""
return PodcastEpisode.create_from_dict(d, self)
def update(self):
(updated, c) = self.fc.fetch(self.url, self,
not gl.config.proxy_use_environment,
gl.config.http_proxy,
gl.config.ftp_proxy)
if c is None:
return ( False, None )
if c.status == 401:
return ( False, 401 )
if self.url != c.url and c.status != 302:
# The URL has changed, and the status code is not a temporary
# redirect, so update the channel's URL accordingly for future use
log('Updating channel URL from %s to %s',
self.url, c.url, sender=self)
self.url = c.url
def _consume_updated_feed(self, feed):
# update the cover if it's not there
self.update_cover()
# If we have an old instance of this channel, and
# feedcache says the feed hasn't changed, return old
if not updated:
log('Channel %s is up to date', self.url)
return ( True, None )
self.parse_error = feed.get('bozo_exception', None)
# Save etag and last-modified for later reuse
if c.headers.get('etag'):
self.etag = c.headers.get('etag')
if c.headers.get('last-modified'):
self.last_modified = c.headers.get('last-modified')
self.title = feed.feed.get('title', self.url)
self.link = feed.feed.get('link', self.link)
self.description = feed.feed.get('subtitle', self.description)
# Start YouTube-specific title FIX
YOUTUBE_PREFIX = 'Uploads by '
if self.title.startswith(YOUTUBE_PREFIX):
self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
# End YouTube-specific title FIX
self.parse_error = c.get('bozo_exception', None)
if hasattr(c.feed, 'title'):
self.title = c.feed.title
# Start YouTube-specific title FIX
YOUTUBE_PREFIX = 'Videos uploaded by '
if self.title.startswith(YOUTUBE_PREFIX):
self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
# End YouTube-specific title FIX
else:
self.title = self.url
if hasattr( c.feed, 'link'):
self.link = c.feed.link
if hasattr( c.feed, 'subtitle'):
self.description = c.feed.subtitle
if hasattr(c.feed, 'updated_parsed') and c.feed.updated_parsed is not None:
self.pubDate = rfc822.mktime_tz(c.feed.updated_parsed+(0,))
else:
try:
self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
except:
self.pubDate = time.time()
if hasattr( c.feed, 'image'):
if hasattr(c.feed.image, 'href') and c.feed.image.href:
if hasattr(feed.feed, 'image'):
if hasattr(feed.feed.image, 'href') and feed.feed.image.href:
old = self.image
self.image = c.feed.image.href
self.image = feed.feed.image.href
if old != self.image:
self.update_cover(force=True)
# Marked as bulk because we commit after importing episodes.
db.save_channel(self, bulk=True)
self.save()
# Load all episodes to update them properly.
existing = self.get_all_episodes()
# We can limit the maximum number of entries that gPodder will parse
# via the "max_episodes_per_feed" configuration option.
if len(c.entries) > gl.config.max_episodes_per_feed:
if len(feed.entries) > gl.config.max_episodes_per_feed:
log('Limiting number of episodes for %s to %d', self.title, gl.config.max_episodes_per_feed)
for entry in c.entries[:min(gl.config.max_episodes_per_feed, len(c.entries))]:
for entry in feed.entries[:min(gl.config.max_episodes_per_feed, len(feed.entries))]:
episode = None
try:
@ -237,15 +236,52 @@ class PodcastChannel(PodcastModelObject):
self.count_new -= 1
episode = ex
episode.save(bulk=True)
episode.save()
# This *might* cause episodes to be skipped if there were more than
# max_episodes_per_feed items added to the feed between updates.
# The benefit is that it prevents old episodes from apearing as new
# in certain situations (see bug #340).
db.purge(gl.config.max_episodes_per_feed, self.id)
def _update_etag_modified(self, feed):
self.etag = feed.headers.get('etag', self.etag)
self.last_modified = feed.headers.get('last-modified', self.last_modified)
def update(self):
try:
self.feed_fetcher.fetch_channel(self)
except feedcore.UpdatedFeed, updated:
feed = updated.data
self._consume_updated_feed(feed)
self._update_etag_modified(feed)
self.save()
except feedcore.NewLocation, updated:
feed = updated.data
self.url = feed.href
self._consume_updated_feed(feed)
self._update_etag_modified(feed)
self.save()
except feedcore.NotModified, updated:
feed = updated.data
self._update_etag_modified(feed)
self.save()
except Exception, e:
# "Not really" errors
#feedcore.AuthenticationRequired
# Temporary errors
#feedcore.Offline
#feedcore.BadRequest
#feedcore.InternalServerError
#feedcore.WifiLogin
# Permanent errors
#feedcore.Unsubscribe
#feedcore.NotFound
#feedcore.InvalidFeed
#feedcore.UnknownStatusCode
raise
db.commit()
return ( True, None )
def update_cover(self, force=False):
if self.cover_file is None or not os.path.exists(self.cover_file) or force:
@ -731,10 +767,10 @@ class PodcastEpisode(PodcastModelObject):
self.is_played = False
self.is_locked = channel.channel_is_locked
def save(self, bulk=False):
def save(self):
if self.state != db.STATE_DOWNLOADED and self.file_exists():
self.state = db.STATE_DOWNLOADED
db.save_episode(self, bulk=bulk)
db.save_episode(self)
def set_state(self, state):
self.state = state
@ -1055,7 +1091,10 @@ def update_channel_model_by_iter( model, iter, channel,
description = ''.join(d+['\n', '<small>', description_markup, '</small>'])
model.set(iter, 2, description)
model.set(iter, 6, channel.parse_error)
if channel.parse_error:
model.set(iter, 6, str(channel.parse_error))
else:
model.set(iter, 6, None)
if count_unplayed > 0 or count_downloaded > 0:
model.set(iter, 3, draw.draw_pill_pixbuf(str(count_unplayed), str(count_downloaded)))