Replace wget with new gpodder.download module; User-agent support

git-svn-id: svn://svn.berlios.de/gpodder/trunk@421 b0d088ad-0a06-0410-aad2-9ed5178a7e87
This commit is contained in:
Thomas Perl 2007-09-18 18:25:25 +00:00
parent 93aca8365e
commit f84d984780
11 changed files with 207 additions and 312 deletions

View file

@ -1,3 +1,30 @@
Tue, 18 Sep 2007 20:15:56 +0200 <thp@perli.net>
Replace wget with new gpodder.download module; User-agent support
* bin/gpodder: Set "gpodder.user_agent" field on startup; remove check
for wget, as this is not needed anymore
* src/gpodder/cache.py: Clean-up; remove old logging code; add support
for gpodder.user_agent; log info when there is an error in parsing the
feed, so the user knows why this feed is not cached
* src/gpodder/console.py: Remove DownloadPool, Use new
gpodder.download module for carrying out downloads, remove
wget_version() tester, as this is not needed anymore =)
* src/gpodder/download.py: Added new downloader module that uses
urllib and some custom classes and functions to provide the equivalent
functionality of the obsolete "libwget", but without the wget
dependency and with better accuracy (progress reporting, etc..)
* src/gpodder/gui.py: Utilize new gpodder.download module instead of
libwget
* src/gpodder/__init__.py: Add "user_agent" variable to the gpodder
module that holds the value of the "User-agent" header to send to web
servers when requesting OPMLs, Feeds or download data
* src/gpodder/opml.py: Add support for sending the User-agent header
* src/gpodder/services.py: Make the progress column a float column to
have smoother progress indicuation; add the "acquired" keyword
argument to s_release(); default 'speed' to a translated "Queued"
* src/gpodder/libwget.py: Removed
* doc/dev/redhat-wget-output.txt: Removed
Tue, 18 Sep 2007 02:30:04 +0200 <thp@perli.net>
Refreshed pot files and po templates; updated German translation

View file

@ -99,18 +99,14 @@ def main( argv = sys.argv):
if options.local:
sys.path = [ os.path.join( prefix, 'src') ] + sys.path
import gpodder
gpodder.user_agent = 'gPodder/%s (+http://gpodder.berlios.de/)' % __version__
if options.verbose:
from gpodder.liblogger import enable_verbose
enable_verbose()
# wget installation detection
from gpodder import console
which_wget = console.wget_version()
if which_wget == "":
print _("Error: cannot find wget.")
return 20
# which_wget
if options.list:
console.list_channels()
elif options.run:

View file

@ -1,34 +0,0 @@
RedHat Linux / Fedora Core seem to use a custom version of GNU wget, at
least as of early 2007. To overcome this problem, we're checking for both
the "normal" wget output and RedHat/Fedora output so we can parse both
variants for the speed string.
For more information, please look at the gpodder-devel archives of January 2007.
-- thp [thp at perli.net], 2007-01-22
-----------------------------------
From: nikosapi <nikosapi@gmail.com>
To: Development for gPodder <gpodder-devel@lists.berlios.de>
Date: Wed, 17 Jan 2007 15:40:10 -0500
Subject: Re: [gpodder-devel] gPodder 0.9.0 preparations: Please translate
and test!
Ok, I dug into this and found the problem. It seems FC6 uses they're own
version of wget (as you suspected). I don't know why they do this, the
official GNU one is more informative in a terminal.
I looked through libwget and reenacted it on the command line and these are my
results (this is right before the regexp is tested against msg):
Stock FC6 wget:
'500K .......... .......... .......... .......... .......... 1% 518K 84s'
Official GNU wget:
'700K .......... .......... .......... .......... .......... 2% 505.27 KB/s'
-----------------------------------

View file

@ -17,3 +17,5 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
user_agent = 'gPodder'

View file

@ -25,37 +25,15 @@
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
"""
"""
__module_id__ = "$Id: cache.py 863 2007-08-12 15:02:16Z dhellmann $"
#
# Import system modules
#
import feedparser
import logging
import time
import gpodder
#
# Import local modules
#
from gpodder.liblogger import log
#
# Module
#
class dummylogger(object):
def debug(self,s):
pass
def warning(self,s):
pass
logger = dummylogger()
class Cache:
"""A class to wrap Mark Pilgrim's Universal Feed Parser module
(http://www.feedparser.org) so that parameters can be used to
@ -64,7 +42,7 @@ class Cache:
caching.
"""
def __init__(self, storage, timeToLiveSeconds=3600, userAgent='feedcache'):
def __init__(self, storage, timeToLiveSeconds=3600):
"""
Arguments:
@ -74,19 +52,14 @@ class Cache:
timeToLiveSeconds=300 -- The length of time content should
live in the cache before an update is attempted.
userAgent='feedcache' -- User agent string to be used when
fetching feed contents.
"""
self.storage = storage
self.time_to_live = timeToLiveSeconds
self.user_agent = userAgent
self.user_agent = gpodder.user_agent
return
def fetch(self, url, force_update = False, offline = False):
"Return the feed at url."
logger.debug('url="%s"' % url)
modified = None
etag = None
@ -99,30 +72,19 @@ class Cache:
# Does the storage contain a version of the data
# which is older than the time-to-live?
logger.debug('cache modified time: %s' % str(cached_time))
if cached_time is not None and not force_update:
if self.time_to_live:
age = now - cached_time
if age <= self.time_to_live:
logger.debug('cache contents still valid')
return cached_content
else:
logger.debug('cache contents older than TTL')
else:
logger.debug('no TTL value')
# The cache is out of date, but we have
# something. Try to use the etag and modified_time
# values from the cached content.
etag = cached_content.get('etag')
modified = cached_content.get('modified')
logger.debug('cached etag=%s' % etag)
logger.debug('cached modified=%s' % str(modified))
else:
logger.debug('nothing in the cache')
# We know we need to fetch, so go ahead and do it.
logger.debug('fetching...')
parsed_result = feedparser.parse(url,
agent=self.user_agent,
modified=modified,
@ -130,7 +92,6 @@ class Cache:
)
status = parsed_result.get('status', None)
logger.debug('status=%s' % status)
if status == 304:
# No new data, based on the etag or modified values.
# We need to update the modified time in the
@ -145,10 +106,9 @@ class Cache:
# There is new content, so store it unless there was an error.
error = parsed_result.get('bozo_exception')
if not error:
logger.debug('Updating stored data for %s' % url)
self.storage[url] = (now, parsed_result)
else:
logger.warning('Not storing data with exception: %s' % str(error))
log( 'Not storing result: %s', str( error), sender = self)
return parsed_result

View file

@ -18,38 +18,19 @@
#
from gpodder import util
from gpodder import download
from gpodder.liblogger import msg
from libpodcasts import load_channels
from libpodcasts import save_channels
from libpodcasts import podcastChannel
from libwget import downloadThread
import time
import popen2
import urllib
class DownloadPool(object):
def __init__( self, max_downloads = 1):
self.max_downloads = max_downloads
self.cur_downloads = 0
def add( self):
self.cur_downloads += 1
def set( self):
if self.cur_downloads < 1:
self.cur_downloads = 1
self.cur_downloads -= 1
def has_free_slot( self):
return self.cur_downloads < self.max_downloads
def list_channels():
for channel in load_channels( load_items = False):
msg( 'channel', urllib.unquote( channel.url))
@ -104,24 +85,9 @@ def update():
def run():
channels = update()
pool = DownloadPool()
for channel in channels:
episodes_to_download = channel.get_new_episodes()
for episode in episodes_to_download:
msg( 'queue', urllib.unquote( episode.url))
for episode in episodes_to_download:
while not pool.has_free_slot():
time.sleep( 3)
pool.add()
filename = episode.local_filename()
#thread will call pool.set() when finished
downloadThread( episode.url, filename, ready_event = pool, channelitem = channel, item = episode).download()
for episode in channel.get_new_episodes():
msg( 'downloading', urllib.unquote( episode.url))
def wget_version():
return popen2.Popen3( 'wget --version', True).fromchild.read().split('\n')[0].strip()
# Calling run() calls the code in the current thread
download.DownloadThread( channel, episode).run()

152
src/gpodder/download.py Normal file
View file

@ -0,0 +1,152 @@
# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
# download.py -- Download client using DownloadStatusManager
# Thomas Perl <thp@perli.net> 2007-09-15
#
# Based on libwget.py (2005-10-29)
#
from gpodder.liblogger import log
from gpodder import libgpodder
from gpodder import util
from gpodder import services
import gpodder
import threading
import urllib
import shutil
import os.path
import time
class DownloadCancelledException(Exception): pass
class DownloadURLOpener(urllib.FancyURLopener):
version = gpodder.user_agent
def __init__( self, channel):
gl = libgpodder.gPodderLib()
if gl.proxy_use_environment:
proxies = None
else:
proxies = {}
if gl.http_proxy:
proxies['http'] = gl.http_proxy
if gl.ftp_proxy:
proxies['ftp'] = gl.ftp_proxy
self.channel = channel
urllib.FancyURLopener.__init__( self, proxies)
self.addheader( 'Referer', self.channel.url)
def prompt_user_passwd( self, host, realm):
if self.channel.username or self.channel.password:
log( 'Authenticating as "%s" to "%s" for realm "%s".', self.channel.username, host, realm, sender = self)
return ( self.channel.username, self.channel.password )
return ( None, None )
class DownloadThread(threading.Thread):
def __init__( self, channel, episode):
threading.Thread.__init__( self)
self.setDaemon( True)
self.channel = channel
self.episode = episode
self.url = self.episode.url
self.filename = self.episode.local_filename()
self.tempname = os.path.join( os.path.dirname( self.filename), '.tmp-' + os.path.basename( self.filename))
gl = libgpodder.gPodderLib()
self.limit_rate = gl.limit_rate
self.limit_rate_value = gl.limit_rate_value
self.cancelled = False
self.start_time = 0.0
self.speed = _('Queued')
self.progress = 0.0
self.downloader = DownloadURLOpener( self.channel)
def cancel( self):
self.cancelled = True
def status_updated( self, count, blockSize, totalSize):
if totalSize:
self.progress = 100.0*float(count*blockSize)/float(totalSize)
else:
self.progress = 100.0
self.calculate_speed( count, blockSize)
services.download_status_manager.update_status( self.download_id, speed = self.speed, progress = self.progress)
if self.cancelled:
util.delete_file( self.tempname)
raise DownloadCancelledException()
def calculate_speed( self, count, blockSize):
if count % 5 == 0:
now = time.time()
if self.start_time > 0:
passed = now - self.start_time
speed = (count*blockSize)/passed
else:
self.start_time = now
passed = now - self.start_time
speed = count*blockSize
self.speed = '%s/s' % util.format_filesize( speed)
if self.limit_rate and speed > self.limit_rate_value:
# calculate the time that should have passed to reach
# the desired download rate and wait if necessary
should_have_passed = float(count*blockSize)/(self.limit_rate_value*1024.0)
if should_have_passed > passed:
# sleep a maximum of 10 seconds to not cause time-outs
delay = min( 10.0, float(should_have_passed-passed))
time.sleep( delay)
def run( self):
self.download_id = services.download_status_manager.reserve_download_id()
services.download_status_manager.register_download_id( self.download_id, self)
# Initial status update
services.download_status_manager.update_status( self.download_id, episode = self.episode.title, url = self.episode.url, speed = self.speed, progress = self.progress)
acquired = services.download_status_manager.s_acquire()
try:
try:
if self.cancelled:
return
util.delete_file( self.tempname)
self.downloader.retrieve( self.episode.url, self.tempname, reporthook = self.status_updated)
shutil.move( self.tempname, self.filename)
self.channel.addDownloadedItem( self.episode)
finally:
services.download_status_manager.remove_download_id( self.download_id)
services.download_status_manager.s_release( acquired)
except DownloadCancelledException:
log( 'Download has been cancelled: %s', self.episode.title, sender = self)
except:
log( 'Error while downloading "%s".', self.episode.title, sender = self, traceback = True)

View file

@ -35,6 +35,7 @@ from string import strip
from gpodder import util
from gpodder import opml
from gpodder import services
from gpodder import download
from gpodder import SimpleGladeApp
from libpodcasts import podcastChannel
@ -42,8 +43,6 @@ from libpodcasts import channelsToModel
from libpodcasts import load_channels
from libpodcasts import save_channels
from libwget import downloadThread
from libgpodder import gPodderLib
from liblogger import log
@ -699,7 +698,7 @@ class gPodder(GladeWidget):
return
if not os.path.exists( filename) and not services.download_status_manager.is_download_in_progress( current_podcast.url):
downloadThread( current_podcast.url, filename, None, current_podcast.title, current_channel, current_podcast).download()
download.DownloadThread( current_channel, current_podcast).start()
else:
if want_message_dialog and os.path.exists( filename) and not current_podcast.file_type() == 'torrent':
title = _('Episode already downloaded')
@ -806,7 +805,7 @@ class gPodder(GladeWidget):
for channel, episode in to_download:
filename = episode.local_filename()
if not os.path.exists( filename) and not services.download_status_manager.is_download_in_progress( episode.url):
downloadThread( episode.url, filename, None, episode.title, channel, episode).download()
download.DownloadThread( channel, episode).start()
else:
title = _('No new episodes')
message = _('There are no new episodes to download from your podcast subscriptions. Please check for new episodes later.')

View file

@ -1,179 +0,0 @@
# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
# libwget.py -- wget download functionality
# thomas perl <thp@perli.net> 20051029
#
#
from os.path import basename
from os.path import dirname
from os import system
from os import kill
from threading import Thread
from threading import Lock
from threading import Semaphore
from shutil import move
from gpodder import util
from gpodder import services
from liblogger import log
import libgpodder
import signal
import popen2
import re
import md5
import gtk
import gobject
class downloadThread( object):
def __init__( self, url, filename, ready_event = None, cutename = _("unknown"), channelitem = None, item = None):
self.url = url.replace( "%20", " ")
self.filename = filename
self.tempname = dirname( self.filename) + "/.tmp-" + basename( self.filename)
self.ready_event = ready_event
self.pid= -1
self.percentage = 0.0
self.speed = _("unknown")
self.thread = None
self.result = -1
self.cutename = cutename
self.channelitem = channelitem
self.item = item
self.is_cancelled = False
self.download_id = services.download_status_manager.reserve_download_id()
services.download_status_manager.register_download_id( self.download_id, self)
def thread_function( self):
acquired = False
gl = libgpodder.gPodderLib()
util.delete_file( self.tempname)
command = [ 'wget', '--timeout=120', '--continue', '--output-document="%s"' % self.tempname ]
if self.channelitem and (self.channelitem.username or self.channelitem.password):
command.append( '--user="%s"' % self.channelitem.username)
command.append( '--password="%s"' % self.channelitem.password)
if gl.limit_rate:
command.append( '--limit-rate=%.1fk' % gl.limit_rate_value)
command.append( '"%s"' % self.url)
command = ' '.join( command)
log( 'Command: %s', command)
services.download_status_manager.update_status( self.download_id, episode = self.cutename, speed = _('Queued'), progress = 0.0, url = self.url)
acquired = services.download_status_manager.s_acquire()
# if after acquiring the lock, we are already cancelled,
# the user has cancelled this download while it was queued
if self.is_cancelled:
services.download_status_manager.remove_download_id( self.download_id)
if self.ready_event != None:
self.ready_event.set()
if acquired:
services.download_status_manager.s_release()
return
process = popen2.Popen3( command, True)
self.pid = process.pid
stderr = process.childerr
while process.poll() == -1 and self.is_cancelled == False:
msg = stderr.readline( 80)
msg = msg.strip()
#log( 'wget> %s', msg)
if msg.find("%") != -1:
try:
self.percentage = max( self.percentage, (int(msg[(msg.find("%") - 2)] + msg[(msg.find("%") - 1)])+0.001)/100.0)
except:
pass
# Fedora/RedHat seem to have changed the output format of "wget", so we
# first try to "detect" the speed in the Fedora/RedHat format and if we
# don't succeed, we'll use a regular expression to find the speed string.
# Also see: doc/dev/redhat-wget-output.txt
try:
speed_msg = msg.split()[7]
except:
speed_msg = ''
if re.search('[KB]', speed_msg):
self.speed = speed_msg
else:
iter = re.compile('...\... .B\/s').finditer( msg)
for speed_string in iter:
self.speed = speed_string.group(0).strip()
services.download_status_manager.update_status( self.download_id, speed = self.speed, progress = int(self.percentage*100))
if process.wait() == 0:
try:
move( self.tempname, self.filename)
except:
log( 'Error happened during moving tempfile :/')
raise
else:
# Delete partially downloaded file
util.delete_file( self.tempname)
self.result = process.poll()
self.pid = -1
services.download_status_manager.remove_download_id( self.download_id)
if self.result == 0 and self.channelitem and self.item:
log( 'Download thread finished: Adding downloaded item to local database')
self.channelitem.addDownloadedItem( self.item)
if self.ready_event != None:
self.ready_event.set()
if acquired:
services.download_status_manager.s_release()
def cancel( self):
self.is_cancelled = True
if self.pid != -1:
kill( self.pid, signal.SIGKILL)
def download( self):
self.thread = Thread( target=self.thread_function)
self.thread.start()

View file

@ -46,6 +46,7 @@ import urllib
import urllib2
import datetime
import gpodder
class Importer(object):
@ -60,6 +61,10 @@ class Importer(object):
VALID_TYPES = ( 'rss', 'link' )
def read_url( self, url):
request = urllib2.Request( url, headers = {'User-agent': gpodder.user_agent})
return urllib2.urlopen( request).read()
def __init__( self, url):
"""
Parses the OPML feed from the given URL into
@ -71,7 +76,7 @@ class Importer(object):
# assume local filename
doc = xml.dom.minidom.parse( url)
else:
doc = xml.dom.minidom.parseString( urllib2.urlopen( url).read())
doc = xml.dom.minidom.parseString( self.read_url( url))
for outline in doc.getElementsByTagName('outline'):
if outline.getAttribute('type') in self.VALID_TYPES and outline.getAttribute('xmlUrl'):

View file

@ -36,7 +36,7 @@ import threading
class DownloadStatusManager( object):
COLUMN_NAMES = { 0: 'episode', 1: 'speed', 2: 'progress', 3: 'url' }
COLUMN_TYPES = ( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING )
COLUMN_TYPES = ( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_FLOAT, gobject.TYPE_STRING )
def __init__( self):
self.status_list = {}
@ -102,8 +102,9 @@ class DownloadStatusManager( object):
return self.semaphore.acquire()
def s_release( self):
self.semaphore.release()
def s_release( self, acquired = True):
if acquired:
self.semaphore.release()
def reserve_download_id( self):
id = self.next_status_id
@ -116,7 +117,7 @@ class DownloadStatusManager( object):
def register_download_id( self, id, thread):
self.tree_model_lock.acquire()
self.status_list[id] = { 'iter': self.tree_model.append(), 'thread': thread, 'progress': 0 }
self.status_list[id] = { 'iter': self.tree_model.append(), 'thread': thread, 'progress': 0.0, 'speed': _('Queued'), }
self.notify( 'list-changed')
self.tree_model_lock.release()