Pickle-based storage method (dumbshelve); HTTP HEAD requests; buggy feed fixes

git-svn-id: svn://svn.berlios.de/gpodder/trunk@596 b0d088ad-0a06-0410-aad2-9ed5178a7e87
This commit is contained in:
Thomas Perl 2008-03-02 12:56:16 +00:00
parent 6aab1c3d73
commit 34f4b6e8b2
5 changed files with 179 additions and 8 deletions

View File

@ -1,3 +1,21 @@
Sun, 02 Mar 2008 13:52:42 +0100 <thp@perli.net>
Pickle-based storage method (dumbshelve); HTTP HEAD requests; buggy feed fixes
* src/gpodder/dumbshelve.py: Added (replacement for "shelve" using
pickle)
* src/gpodder/libgpodder.py: Rename *.db files to *.pickle.db, so we
don't clash with old-style shelve .db files
* src/gpodder/libpodcasts.py: Use dumbshelve as a replacement for
shelve; add EpisodeURLMetainfo that keeps track of metainfo downloaded
via HTTP HEAD requests; make getting episode length and pubDate a bit
more intelligent by trying to find correct values via HTTP HEAD;
improve episode sorting by falling back to episode titles when no
pubDate has been found (or pubDate is equal);
* src/gpodder/util.py: Add get_episode_info_from_url() function that
tries to find out the length and pubDate of an episode by looking at
the data from the HTTP HEAD; also support HTTP proxys via an optional
"proxy" keyword argument
Wed, 27 Feb 2008 10:44:48 +0100 <thp@perli.net>
Notify user about no new episodes when updating from tray icon

56
src/gpodder/dumbshelve.py Normal file
View File

@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# dumbshelve.py - Temporary implementation of a shelve replacement
# 2008-02-27 Thomas Perl <thpinfo.com>
from gpodder.liblogger import log
import UserDict
import pickle
import os.path
class DumbShelve(UserDict.UserDict):
"""
Simply tries to act like a "shelve" object..
"""
def __init__(self, filename=None):
UserDict.UserDict.__init__(self)
self.__filename = filename
def sync(self, filename=None):
if filename is not None:
self.__filename = filename
try:
pickle.dump(self, open(self.__filename, 'w'))
return True
except:
log('Cannot pickle me to %s', self.__filename, sender=self, traceback=True)
return False
def open_shelve(filename):
if not os.path.exists(filename):
return DumbShelve(filename)
else:
try:
return pickle.load(open(filename, 'r'))
except:
log('Error loading %s. Creating new DumbShelve.', filename, traceback=True)
return DumbShelve(filename)

View File

@ -61,8 +61,9 @@ class gPodderLibClass( object):
util.make_directory( gpodder_dir)
self.tempdir = gpodder_dir
self.feed_cache_file = os.path.join( gpodder_dir, 'feedcache.db')
self.channel_settings_file = os.path.join( gpodder_dir, 'channelsettings.db')
self.feed_cache_file = os.path.join(gpodder_dir, 'feedcache.pickle.db')
self.channel_settings_file = os.path.join(gpodder_dir, 'channelsettings.pickle.db')
self.episode_metainfo_file = os.path.join(gpodder_dir, 'episodemetainfo.pickle.db')
self.channel_opml_file = os.path.join(gpodder_dir, 'channels.opml')
self.channel_xml_file = os.path.join(gpodder_dir, 'channels.xml')

View File

@ -73,12 +73,12 @@ import md5
import string
import shelve
from gpodder import dumbshelve
global_lock = threading.RLock()
class ChannelSettings(object):
storage = shelve.open( libgpodder.gPodderLib().channel_settings_file)
storage = dumbshelve.open_shelve(libgpodder.gPodderLib().channel_settings_file)
@classmethod
def get_settings_by_url( cls, url):
@ -98,12 +98,29 @@ class ChannelSettings(object):
cls.storage.sync()
class EpisodeURLMetainfo(object):
storage = dumbshelve.open_shelve(libgpodder.gPodderLib().episode_metainfo_file)
@classmethod
def get_metadata_by_url(cls, url):
if isinstance(url, unicode):
url = url.encode('utf-8')
if cls.storage.has_key(url):
return cls.storage[url]
else:
log('Trying to download metainfo for %s', url)
result = util.get_episode_info_from_url(url, libgpodder.gPodderLib().config.http_proxy)
cls.storage[url] = result
cls.storage.sync()
return result
class podcastChannel(ListType):
"""holds data for a complete channel"""
SETTINGS = ('sync_to_devices', 'is_music_channel', 'device_playlist_name','override_title','username','password')
icon_cache = {}
storage = shelve.open( libgpodder.gPodderLib().feed_cache_file)
storage = dumbshelve.open_shelve(libgpodder.gPodderLib().feed_cache_file)
fc = cache.Cache( storage)
@classmethod
@ -289,7 +306,7 @@ class podcastChannel(ListType):
gl = libgpodder.gPodderLib()
if not last_pubdate:
return self[0:min(len(self),gl.config.default_new)]
return [episode for episode in self[0:min(len(self),gl.config.default_new)] if self.episode_is_new(episode)]
new_episodes = []
for episode in self.get_all_episodes():
@ -540,8 +557,24 @@ class podcastItem(object):
if not episode.url:
raise ValueError( 'Episode has an invalid URL')
if not episode.pubDate:
metainfo = episode.get_metainfo()
if 'pubdate' in metainfo:
log('Patching pubdate in from metainfo :)')
episode.pubDate = metainfo['pubdate']
if hasattr( enclosure, 'length'):
episode.length = enclosure.length
try:
episode.length = int(enclosure.length)
except:
episode.length = -1
if episode.length <= 0:
metainfo = episode.get_metainfo()
if 'length' in metainfo:
log('Patching length in from metainfo :)')
episode.length = metainfo['length']
if hasattr( enclosure, 'type'):
episode.mimetype = enclosure.type
@ -563,6 +596,9 @@ class podcastItem(object):
self.channel = channel
self.pubDate = ''
def get_metainfo(self):
return EpisodeURLMetainfo.get_metadata_by_url(self.url)
def is_played(self):
gl = libgpodder.gPodderLib()
return gl.history_is_played(self.url)
@ -624,6 +660,10 @@ class podcastItem(object):
return '00000000'
def __cmp__( self, other):
if self.pubDate == other.pubDate:
log('pubDate equal, comparing titles (buggy feed?)', sender=self)
return cmp(self.title, other.title)
try:
timestamp_self = int(mktime_tz( parsedate_tz( self.pubDate)))
timestamp_other = int(mktime_tz( parsedate_tz( other.pubDate)))
@ -675,7 +715,7 @@ class podcastItem(object):
def calculate_filesize( self):
try:
self.length = str(os.path.getsize( self.local_filename()))
self.length = os.path.getsize(self.local_filename())
except:
log( 'Could not get filesize for %s.', self.url)

View File

@ -51,6 +51,9 @@ import datetime
import urlparse
import urllib
import urllib2
import httplib
import feedparser
import StringIO
import xml.dom.minidom
@ -757,3 +760,56 @@ def format_seconds_to_hour_min_sec(seconds):
else:
return result[0]
def get_episode_info_from_url(url, proxy=None):
"""
Try to get information about a podcast episode by sending
a HEAD request to the HTTP server and parsing the result.
The return value is a dict containing all fields that
could be parsed from the URL. This currently contains:
"length": The size of the file in bytes
"pubdate": A formatted representation of the pubDate
If the "proxy" parameter is used, it has to be the URL
of the HTTP proxy server to use, e.g. http://proxy:8080/
If there is an error, this function returns {}. This will
only function with http:// and https:// URLs.
"""
if not (url.startswith('http://') or url.startswith('https://')):
return {}
if proxy is None or proxy.strip() == '':
(scheme, netloc, path, parms, qry, fragid) = urlparse.urlparse(url)
conn = httplib.HTTPConnection(netloc)
start = len(scheme) + len('://') + len(netloc)
conn.request('HEAD', url[start:])
else:
(scheme, netloc, path, parms, qry, fragid) = urlparse.urlparse(proxy)
conn = httplib.HTTPConnection(netloc)
conn.request('HEAD', url)
r = conn.getresponse()
result = {}
log('Trying to get metainfo for %s', url)
if 'content-length' in r.msg:
try:
length = int(r.msg['content-length'])
result['length'] = length
except ValueError, e:
log('Error converting content-length header.')
if 'last-modified' in r.msg:
try:
parsed_date = feedparser._parse_date(r.msg['last-modified'])
pubdate = updated_parsed_to_rfc2822(parsed_date)
result['pubdate'] = pubdate
except:
log('Error converting last-modified header.')
return result