Pickle-based storage method (dumbshelve); HTTP HEAD requests; buggy feed fixes
git-svn-id: svn://svn.berlios.de/gpodder/trunk@596 b0d088ad-0a06-0410-aad2-9ed5178a7e87
This commit is contained in:
parent
6aab1c3d73
commit
34f4b6e8b2
18
ChangeLog
18
ChangeLog
|
@ -1,3 +1,21 @@
|
|||
Sun, 02 Mar 2008 13:52:42 +0100 <thp@perli.net>
|
||||
Pickle-based storage method (dumbshelve); HTTP HEAD requests; buggy feed fixes
|
||||
|
||||
* src/gpodder/dumbshelve.py: Added (replacement for "shelve" using
|
||||
pickle)
|
||||
* src/gpodder/libgpodder.py: Rename *.db files to *.pickle.db, so we
|
||||
don't clash with old-style shelve .db files
|
||||
* src/gpodder/libpodcasts.py: Use dumbshelve as a replacement for
|
||||
shelve; add EpisodeURLMetainfo that keeps track of metainfo downloaded
|
||||
via HTTP HEAD requests; make getting episode length and pubDate a bit
|
||||
more intelligent by trying to find correct values via HTTP HEAD;
|
||||
improve episode sorting by falling back to episode titles when no
|
||||
pubDate has been found (or pubDate is equal);
|
||||
* src/gpodder/util.py: Add get_episode_info_from_url() function that
|
||||
tries to find out the length and pubDate of an episode by looking at
|
||||
the data from the HTTP HEAD; also support HTTP proxys via an optional
|
||||
"proxy" keyword argument
|
||||
|
||||
Wed, 27 Feb 2008 10:44:48 +0100 <thp@perli.net>
|
||||
Notify user about no new episodes when updating from tray icon
|
||||
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# gPodder - A media aggregator and podcast client
|
||||
# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
|
||||
#
|
||||
# gPodder is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gPodder is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
# dumbshelve.py - Temporary implementation of a shelve replacement
|
||||
# 2008-02-27 Thomas Perl <thpinfo.com>
|
||||
|
||||
from gpodder.liblogger import log
|
||||
|
||||
import UserDict
|
||||
import pickle
|
||||
import os.path
|
||||
|
||||
class DumbShelve(UserDict.UserDict):
|
||||
"""
|
||||
Simply tries to act like a "shelve" object..
|
||||
"""
|
||||
def __init__(self, filename=None):
|
||||
UserDict.UserDict.__init__(self)
|
||||
self.__filename = filename
|
||||
|
||||
def sync(self, filename=None):
|
||||
if filename is not None:
|
||||
self.__filename = filename
|
||||
try:
|
||||
pickle.dump(self, open(self.__filename, 'w'))
|
||||
return True
|
||||
except:
|
||||
log('Cannot pickle me to %s', self.__filename, sender=self, traceback=True)
|
||||
return False
|
||||
|
||||
def open_shelve(filename):
|
||||
if not os.path.exists(filename):
|
||||
return DumbShelve(filename)
|
||||
else:
|
||||
try:
|
||||
return pickle.load(open(filename, 'r'))
|
||||
except:
|
||||
log('Error loading %s. Creating new DumbShelve.', filename, traceback=True)
|
||||
return DumbShelve(filename)
|
||||
|
|
@ -61,8 +61,9 @@ class gPodderLibClass( object):
|
|||
util.make_directory( gpodder_dir)
|
||||
|
||||
self.tempdir = gpodder_dir
|
||||
self.feed_cache_file = os.path.join( gpodder_dir, 'feedcache.db')
|
||||
self.channel_settings_file = os.path.join( gpodder_dir, 'channelsettings.db')
|
||||
self.feed_cache_file = os.path.join(gpodder_dir, 'feedcache.pickle.db')
|
||||
self.channel_settings_file = os.path.join(gpodder_dir, 'channelsettings.pickle.db')
|
||||
self.episode_metainfo_file = os.path.join(gpodder_dir, 'episodemetainfo.pickle.db')
|
||||
|
||||
self.channel_opml_file = os.path.join(gpodder_dir, 'channels.opml')
|
||||
self.channel_xml_file = os.path.join(gpodder_dir, 'channels.xml')
|
||||
|
|
|
@ -73,12 +73,12 @@ import md5
|
|||
|
||||
import string
|
||||
|
||||
import shelve
|
||||
from gpodder import dumbshelve
|
||||
|
||||
global_lock = threading.RLock()
|
||||
|
||||
class ChannelSettings(object):
|
||||
storage = shelve.open( libgpodder.gPodderLib().channel_settings_file)
|
||||
storage = dumbshelve.open_shelve(libgpodder.gPodderLib().channel_settings_file)
|
||||
|
||||
@classmethod
|
||||
def get_settings_by_url( cls, url):
|
||||
|
@ -98,12 +98,29 @@ class ChannelSettings(object):
|
|||
cls.storage.sync()
|
||||
|
||||
|
||||
class EpisodeURLMetainfo(object):
|
||||
storage = dumbshelve.open_shelve(libgpodder.gPodderLib().episode_metainfo_file)
|
||||
|
||||
@classmethod
|
||||
def get_metadata_by_url(cls, url):
|
||||
if isinstance(url, unicode):
|
||||
url = url.encode('utf-8')
|
||||
if cls.storage.has_key(url):
|
||||
return cls.storage[url]
|
||||
else:
|
||||
log('Trying to download metainfo for %s', url)
|
||||
result = util.get_episode_info_from_url(url, libgpodder.gPodderLib().config.http_proxy)
|
||||
cls.storage[url] = result
|
||||
cls.storage.sync()
|
||||
return result
|
||||
|
||||
|
||||
class podcastChannel(ListType):
|
||||
"""holds data for a complete channel"""
|
||||
SETTINGS = ('sync_to_devices', 'is_music_channel', 'device_playlist_name','override_title','username','password')
|
||||
icon_cache = {}
|
||||
|
||||
storage = shelve.open( libgpodder.gPodderLib().feed_cache_file)
|
||||
storage = dumbshelve.open_shelve(libgpodder.gPodderLib().feed_cache_file)
|
||||
fc = cache.Cache( storage)
|
||||
|
||||
@classmethod
|
||||
|
@ -289,7 +306,7 @@ class podcastChannel(ListType):
|
|||
gl = libgpodder.gPodderLib()
|
||||
|
||||
if not last_pubdate:
|
||||
return self[0:min(len(self),gl.config.default_new)]
|
||||
return [episode for episode in self[0:min(len(self),gl.config.default_new)] if self.episode_is_new(episode)]
|
||||
|
||||
new_episodes = []
|
||||
for episode in self.get_all_episodes():
|
||||
|
@ -540,8 +557,24 @@ class podcastItem(object):
|
|||
if not episode.url:
|
||||
raise ValueError( 'Episode has an invalid URL')
|
||||
|
||||
if not episode.pubDate:
|
||||
metainfo = episode.get_metainfo()
|
||||
if 'pubdate' in metainfo:
|
||||
log('Patching pubdate in from metainfo :)')
|
||||
episode.pubDate = metainfo['pubdate']
|
||||
|
||||
if hasattr( enclosure, 'length'):
|
||||
episode.length = enclosure.length
|
||||
try:
|
||||
episode.length = int(enclosure.length)
|
||||
except:
|
||||
episode.length = -1
|
||||
|
||||
if episode.length <= 0:
|
||||
metainfo = episode.get_metainfo()
|
||||
if 'length' in metainfo:
|
||||
log('Patching length in from metainfo :)')
|
||||
episode.length = metainfo['length']
|
||||
|
||||
if hasattr( enclosure, 'type'):
|
||||
episode.mimetype = enclosure.type
|
||||
|
||||
|
@ -563,6 +596,9 @@ class podcastItem(object):
|
|||
self.channel = channel
|
||||
self.pubDate = ''
|
||||
|
||||
def get_metainfo(self):
|
||||
return EpisodeURLMetainfo.get_metadata_by_url(self.url)
|
||||
|
||||
def is_played(self):
|
||||
gl = libgpodder.gPodderLib()
|
||||
return gl.history_is_played(self.url)
|
||||
|
@ -624,6 +660,10 @@ class podcastItem(object):
|
|||
return '00000000'
|
||||
|
||||
def __cmp__( self, other):
|
||||
if self.pubDate == other.pubDate:
|
||||
log('pubDate equal, comparing titles (buggy feed?)', sender=self)
|
||||
return cmp(self.title, other.title)
|
||||
|
||||
try:
|
||||
timestamp_self = int(mktime_tz( parsedate_tz( self.pubDate)))
|
||||
timestamp_other = int(mktime_tz( parsedate_tz( other.pubDate)))
|
||||
|
@ -675,7 +715,7 @@ class podcastItem(object):
|
|||
|
||||
def calculate_filesize( self):
|
||||
try:
|
||||
self.length = str(os.path.getsize( self.local_filename()))
|
||||
self.length = os.path.getsize(self.local_filename())
|
||||
except:
|
||||
log( 'Could not get filesize for %s.', self.url)
|
||||
|
||||
|
|
|
@ -51,6 +51,9 @@ import datetime
|
|||
import urlparse
|
||||
import urllib
|
||||
import urllib2
|
||||
import httplib
|
||||
|
||||
import feedparser
|
||||
|
||||
import StringIO
|
||||
import xml.dom.minidom
|
||||
|
@ -757,3 +760,56 @@ def format_seconds_to_hour_min_sec(seconds):
|
|||
else:
|
||||
return result[0]
|
||||
|
||||
|
||||
def get_episode_info_from_url(url, proxy=None):
|
||||
"""
|
||||
Try to get information about a podcast episode by sending
|
||||
a HEAD request to the HTTP server and parsing the result.
|
||||
|
||||
The return value is a dict containing all fields that
|
||||
could be parsed from the URL. This currently contains:
|
||||
|
||||
"length": The size of the file in bytes
|
||||
"pubdate": A formatted representation of the pubDate
|
||||
|
||||
If the "proxy" parameter is used, it has to be the URL
|
||||
of the HTTP proxy server to use, e.g. http://proxy:8080/
|
||||
|
||||
If there is an error, this function returns {}. This will
|
||||
only function with http:// and https:// URLs.
|
||||
"""
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
return {}
|
||||
|
||||
if proxy is None or proxy.strip() == '':
|
||||
(scheme, netloc, path, parms, qry, fragid) = urlparse.urlparse(url)
|
||||
conn = httplib.HTTPConnection(netloc)
|
||||
start = len(scheme) + len('://') + len(netloc)
|
||||
conn.request('HEAD', url[start:])
|
||||
else:
|
||||
(scheme, netloc, path, parms, qry, fragid) = urlparse.urlparse(proxy)
|
||||
conn = httplib.HTTPConnection(netloc)
|
||||
conn.request('HEAD', url)
|
||||
|
||||
r = conn.getresponse()
|
||||
result = {}
|
||||
|
||||
log('Trying to get metainfo for %s', url)
|
||||
|
||||
if 'content-length' in r.msg:
|
||||
try:
|
||||
length = int(r.msg['content-length'])
|
||||
result['length'] = length
|
||||
except ValueError, e:
|
||||
log('Error converting content-length header.')
|
||||
|
||||
if 'last-modified' in r.msg:
|
||||
try:
|
||||
parsed_date = feedparser._parse_date(r.msg['last-modified'])
|
||||
pubdate = updated_parsed_to_rfc2822(parsed_date)
|
||||
result['pubdate'] = pubdate
|
||||
except:
|
||||
log('Error converting last-modified header.')
|
||||
|
||||
return result
|
||||
|
||||
|
|
Loading…
Reference in New Issue