Replace wget with new gpodder.download module; User-agent support

git-svn-id: svn://svn.berlios.de/gpodder/trunk@421 b0d088ad-0a06-0410-aad2-9ed5178a7e87
2007-09-18 18:25:25 +00:00 · 2007-09-18 18:25:25 +00:00 · f84d984780
parent 93aca8365e
commit f84d984780
11 changed files with 207 additions and 312 deletions
--- a/27
+++ b/27
@ -1,3 +1,30 @@
+Tue, 18 Sep 2007 20:15:56 +0200 <thp@perli.net>
+Replace wget with new gpodder.download module; User-agent support
+
+	* bin/gpodder: Set "gpodder.user_agent" field on startup; remove check
+	for wget, as this is not needed anymore
+	* src/gpodder/cache.py: Clean-up; remove old logging code; add support
+	for gpodder.user_agent; log info when there is an error in parsing the
+	feed, so the user knows why this feed is not cached
+	* src/gpodder/console.py: Remove DownloadPool, Use new
+	gpodder.download module for carrying out downloads, remove
+	wget_version() tester, as this is not needed anymore =)
+	* src/gpodder/download.py: Added new downloader module that uses
+	urllib and some custom classes and functions to provide the equivalent
+	functionality of the obsolete "libwget", but without the wget
+	dependency and with better accuracy (progress reporting, etc..)
+	* src/gpodder/gui.py: Utilize new gpodder.download module instead of
+	libwget
+	* src/gpodder/__init__.py: Add "user_agent" variable to the gpodder
+	module that holds the value of the "User-agent" header to send to web
+	servers when requesting OPMLs, Feeds or download data
+	* src/gpodder/opml.py: Add support for sending the User-agent header
+	* src/gpodder/services.py: Make the progress column a float column to
+	have smoother progress indicuation; add the "acquired" keyword
+	argument to s_release(); default 'speed' to a translated "Queued"
+	* src/gpodder/libwget.py: Removed
+	* doc/dev/redhat-wget-output.txt: Removed
+
 Tue, 18 Sep 2007 02:30:04 +0200 <thp@perli.net>
 Refreshed pot files and po templates; updated German translation

--- a/bin/gpodder
+++ b/bin/gpodder
@ -99,18 +99,14 @@ def main( argv = sys.argv):
    if options.local:
        sys.path = [ os.path.join( prefix, 'src') ] + sys.path

+    import gpodder
+    gpodder.user_agent = 'gPodder/%s (+http://gpodder.berlios.de/)' % __version__
+
    if options.verbose:
        from gpodder.liblogger import enable_verbose
        enable_verbose()

-    # wget installation detection
    from gpodder import console
-    which_wget = console.wget_version()
-    if which_wget == "":
-        print _("Error: cannot find wget.")
-	return 20
-    # which_wget
-    
    if options.list:
        console.list_channels()
    elif options.run:
--- a/doc/dev/redhat-wget-output.txt
+++ b/doc/dev/redhat-wget-output.txt
@ -1,34 +0,0 @@
-
-RedHat Linux / Fedora Core seem to use a custom version of GNU wget, at 
-least as of early 2007. To overcome this problem, we're checking for both 
-the "normal" wget output and RedHat/Fedora output so we can parse both 
-variants for the speed string.
-
-For more information, please look at the gpodder-devel archives of January 2007.
-
-
-                              -- thp [thp at perli.net], 2007-01-22
-
-
-----------------------------------
-
-From: nikosapi <nikosapi@gmail.com>
-To: Development for gPodder <gpodder-devel@lists.berlios.de>
-Date: Wed, 17 Jan 2007 15:40:10 -0500
-Subject: Re: [gpodder-devel] gPodder 0.9.0 preparations: Please translate
-	and test!
-
-
-Ok, I dug into this and found the problem. It seems FC6 uses they're own 
-version of wget (as you suspected). I don't know why they do this, the 
-official GNU one is more informative in a terminal. 
-I looked through libwget and reenacted it on the command line and these are my 
-results (this is right before the regexp is tested against msg): 
-
-Stock FC6 wget:
-'500K .......... .......... .......... .......... ..........  1%  518K 84s'
-Official GNU wget:
-'700K .......... .......... .......... .......... ..........  2%  505.27 KB/s'
-
-----------------------------------
-
--- a/src/gpodder/init.py
+++ b/src/gpodder/init.py
@ -17,3 +17,5 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #

+user_agent = 'gPodder'
+
--- a/src/gpodder/cache.py
+++ b/src/gpodder/cache.py
@ -25,37 +25,15 @@
 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #

-"""

-"""
-
-__module_id__ = "$Id: cache.py 863 2007-08-12 15:02:16Z dhellmann $"
-
-#
-# Import system modules
-#
 import feedparser

-import logging
 import time
+import gpodder

-#
-# Import local modules
-#
+from gpodder.liblogger import log


-#
-# Module
-#
-
-class dummylogger(object):
-    def debug(self,s):
-        pass
-    def warning(self,s):
-        pass
-
-logger = dummylogger()
-
 class Cache:
    """A class to wrap Mark Pilgrim's Universal Feed Parser module
    (http://www.feedparser.org) so that parameters can be used to
@ -64,7 +42,7 @@ class Cache:
    caching.
    """

-    def __init__(self, storage, timeToLiveSeconds=3600, userAgent='feedcache'):
+    def __init__(self, storage, timeToLiveSeconds=3600):
        """
        Arguments:

@ -74,19 +52,14 @@ class Cache:

          timeToLiveSeconds=300 -- The length of time content should
          live in the cache before an update is attempted.
-
-          userAgent='feedcache' -- User agent string to be used when
-          fetching feed contents.
-
        """
        self.storage = storage
        self.time_to_live = timeToLiveSeconds
-        self.user_agent = userAgent
+        self.user_agent = gpodder.user_agent
        return

    def fetch(self, url, force_update = False, offline = False):
        "Return the feed at url."
-        logger.debug('url="%s"' % url)

        modified = None
        etag = None
@ -99,30 +72,19 @@ class Cache:

        # Does the storage contain a version of the data
        # which is older than the time-to-live?
-        logger.debug('cache modified time: %s' % str(cached_time))
        if cached_time is not None and not force_update:
            if self.time_to_live:
                age = now - cached_time
                if age <= self.time_to_live:
-                    logger.debug('cache contents still valid')
                    return cached_content
-                else:
-                    logger.debug('cache contents older than TTL')
-            else:
-                logger.debug('no TTL value')
            
            # The cache is out of date, but we have
            # something.  Try to use the etag and modified_time
            # values from the cached content.
            etag = cached_content.get('etag')
            modified = cached_content.get('modified')
-            logger.debug('cached etag=%s' % etag)
-            logger.debug('cached modified=%s' % str(modified))
-        else:
-            logger.debug('nothing in the cache')

        # We know we need to fetch, so go ahead and do it.
-        logger.debug('fetching...')
        parsed_result = feedparser.parse(url,
                                         agent=self.user_agent,
                                         modified=modified,
@ -130,7 +92,6 @@ class Cache:
                                         )

        status = parsed_result.get('status', None)
-        logger.debug('status=%s' % status)
        if status == 304:
            # No new data, based on the etag or modified values.
            # We need to update the modified time in the
@ -145,10 +106,9 @@ class Cache:
            # There is new content, so store it unless there was an error.
            error = parsed_result.get('bozo_exception')
            if not error:
-                logger.debug('Updating stored data for %s' % url)
                self.storage[url] = (now, parsed_result)
            else:
-                logger.warning('Not storing data with exception: %s' % str(error))
+                log( 'Not storing result: %s', str( error), sender = self)

        return parsed_result

--- a/src/gpodder/console.py
+++ b/src/gpodder/console.py
@ -18,38 +18,19 @@
 #

 from gpodder import util
+from gpodder import download
 from gpodder.liblogger import msg

 from libpodcasts import load_channels
 from libpodcasts import save_channels
 from libpodcasts import podcastChannel

-from libwget import downloadThread
-
 import time

 import popen2
 import urllib


-class DownloadPool(object):
-    def __init__( self, max_downloads = 1):
-        self.max_downloads = max_downloads
-        self.cur_downloads = 0
-
-    def add( self):
-        self.cur_downloads += 1
-
-    def set( self):
-        if self.cur_downloads < 1:
-            self.cur_downloads = 1
-
-        self.cur_downloads -= 1
-    
-    def has_free_slot( self):
-        return self.cur_downloads < self.max_downloads
-
-
 def list_channels():
    for channel in load_channels( load_items = False):
        msg( 'channel', urllib.unquote( channel.url))
@ -104,24 +85,9 @@ def update():
 def run():
    channels = update()

-    pool = DownloadPool()
    for channel in channels:
-       episodes_to_download = channel.get_new_episodes()
-
-       for episode in episodes_to_download:
-           msg( 'queue', urllib.unquote( episode.url))
-
-       for episode in episodes_to_download:
-           while not pool.has_free_slot():
-               time.sleep( 3)
-
-           pool.add()
-           filename = episode.local_filename()
-           #thread will call pool.set() when finished
-           downloadThread( episode.url, filename, ready_event = pool, channelitem = channel, item = episode).download()
+       for episode in channel.get_new_episodes():
           msg( 'downloading', urllib.unquote( episode.url))
-               
-    
-def wget_version():
-    return popen2.Popen3( 'wget --version', True).fromchild.read().split('\n')[0].strip()
+           # Calling run() calls the code in the current thread
+           download.DownloadThread( channel, episode).run()

--- a/src/gpodder/download.py
+++ b/src/gpodder/download.py
@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+#
+# gPodder - A media aggregator and podcast client
+# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
+#
+# gPodder is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# gPodder is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+
+#
+#  download.py -- Download client using DownloadStatusManager
+#  Thomas Perl <thp@perli.net>   2007-09-15
+#
+#  Based on libwget.py (2005-10-29)
+#
+
+from gpodder.liblogger import log
+from gpodder import libgpodder
+from gpodder import util
+from gpodder import services
+import gpodder
+
+import threading
+import urllib
+import shutil
+import os.path
+import time
+
+class DownloadCancelledException(Exception): pass
+
+
+class DownloadURLOpener(urllib.FancyURLopener):
+    version = gpodder.user_agent
+
+    def __init__( self, channel):
+        gl = libgpodder.gPodderLib()
+        if gl.proxy_use_environment:
+            proxies = None
+        else:
+            proxies = {}
+            if gl.http_proxy:
+                proxies['http'] = gl.http_proxy
+            if gl.ftp_proxy:
+                proxies['ftp'] = gl.ftp_proxy
+
+        self.channel = channel
+        urllib.FancyURLopener.__init__( self, proxies)
+        self.addheader( 'Referer', self.channel.url)
+
+    def prompt_user_passwd( self, host, realm):
+        if self.channel.username or self.channel.password:
+            log( 'Authenticating as "%s" to "%s" for realm "%s".', self.channel.username, host, realm, sender = self)
+            return ( self.channel.username, self.channel.password )
+
+        return ( None, None )
+
+
+class DownloadThread(threading.Thread):
+    def __init__( self, channel, episode):
+        threading.Thread.__init__( self)
+        self.setDaemon( True)
+
+        self.channel = channel
+        self.episode = episode
+
+        self.url = self.episode.url
+        self.filename = self.episode.local_filename()
+        self.tempname = os.path.join( os.path.dirname( self.filename), '.tmp-' + os.path.basename( self.filename))
+
+        gl = libgpodder.gPodderLib()
+        self.limit_rate = gl.limit_rate
+        self.limit_rate_value = gl.limit_rate_value
+
+        self.cancelled = False
+        self.start_time = 0.0
+        self.speed = _('Queued')
+        self.progress = 0.0
+        self.downloader = DownloadURLOpener( self.channel)
+
+    def cancel( self):
+        self.cancelled = True
+
+    def status_updated( self, count, blockSize, totalSize):
+        if totalSize:
+            self.progress = 100.0*float(count*blockSize)/float(totalSize)
+        else:
+            self.progress = 100.0
+
+        self.calculate_speed( count, blockSize)
+        services.download_status_manager.update_status( self.download_id, speed = self.speed, progress = self.progress)
+
+        if self.cancelled:
+            util.delete_file( self.tempname)
+            raise DownloadCancelledException()
+
+    def calculate_speed( self, count, blockSize):
+        if count % 5 == 0:
+            now = time.time()
+            if self.start_time > 0:
+                passed = now - self.start_time
+                speed = (count*blockSize)/passed
+            else:
+                self.start_time = now
+                passed = now - self.start_time
+                speed = count*blockSize
+            self.speed = '%s/s' % util.format_filesize( speed)
+
+            if self.limit_rate and speed > self.limit_rate_value:
+                # calculate the time that should have passed to reach
+                # the desired download rate and wait if necessary
+                should_have_passed = float(count*blockSize)/(self.limit_rate_value*1024.0)
+                if should_have_passed > passed:
+                    # sleep a maximum of 10 seconds to not cause time-outs
+                    delay = min( 10.0, float(should_have_passed-passed))
+                    time.sleep( delay)
+
+    def run( self):
+        self.download_id = services.download_status_manager.reserve_download_id()
+        services.download_status_manager.register_download_id( self.download_id, self)
+
+        # Initial status update
+        services.download_status_manager.update_status( self.download_id, episode = self.episode.title, url = self.episode.url, speed = self.speed, progress = self.progress)
+
+        acquired = services.download_status_manager.s_acquire()
+        try:
+            try:
+                if self.cancelled:
+                    return
+         
+                util.delete_file( self.tempname)
+                self.downloader.retrieve( self.episode.url, self.tempname, reporthook = self.status_updated)
+                shutil.move( self.tempname, self.filename)
+                self.channel.addDownloadedItem( self.episode)
+            finally:
+                services.download_status_manager.remove_download_id( self.download_id)
+                services.download_status_manager.s_release( acquired)
+        except DownloadCancelledException:
+            log( 'Download has been cancelled: %s', self.episode.title, sender = self)
+        except:
+            log( 'Error while downloading "%s".', self.episode.title, sender = self, traceback = True)
+
--- a/src/gpodder/gui.py
+++ b/src/gpodder/gui.py
@ -35,6 +35,7 @@ from string import strip
 from gpodder import util
 from gpodder import opml
 from gpodder import services
+from gpodder import download
 from gpodder import SimpleGladeApp

 from libpodcasts import podcastChannel
@ -42,8 +43,6 @@ from libpodcasts import channelsToModel
 from libpodcasts import load_channels
 from libpodcasts import save_channels

-from libwget import downloadThread
-
 from libgpodder import gPodderLib
 from liblogger import log

@ -699,7 +698,7 @@ class gPodder(GladeWidget):
                return
        
        if not os.path.exists( filename) and not services.download_status_manager.is_download_in_progress( current_podcast.url):
-            downloadThread( current_podcast.url, filename, None, current_podcast.title, current_channel, current_podcast).download()
+            download.DownloadThread( current_channel, current_podcast).start()
        else:
            if want_message_dialog and os.path.exists( filename) and not current_podcast.file_type() == 'torrent':
                title = _('Episode already downloaded')
@ -806,7 +805,7 @@ class gPodder(GladeWidget):
                for channel, episode in to_download:
                    filename = episode.local_filename()
                    if not os.path.exists( filename) and not services.download_status_manager.is_download_in_progress( episode.url):
-                        downloadThread( episode.url, filename, None, episode.title, channel, episode).download()
+                        download.DownloadThread( channel, episode).start()
        else:
            title = _('No new episodes')
            message = _('There are no new episodes to download from your podcast subscriptions. Please check for new episodes later.')
--- a/src/gpodder/libwget.py
+++ b/src/gpodder/libwget.py
@ -1,179 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# gPodder - A media aggregator and podcast client
-# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
-#
-# gPodder is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# gPodder is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-
-#
-#  libwget.py -- wget download functionality
-#  thomas perl <thp@perli.net>   20051029
-#
-#
-
-from os.path import basename
-from os.path import dirname
-
-from os import system
-from os import kill
-from threading import Thread
-from threading import Lock
-from threading import Semaphore
-from shutil import move
-
-from gpodder import util
-from gpodder import services
-
-from liblogger import log
-
-import libgpodder
-
-import signal
-
-import popen2
-import re
-import md5
-
-import gtk
-import gobject
-
-class downloadThread( object):
-    def __init__( self, url, filename, ready_event = None, cutename = _("unknown"), channelitem = None, item = None):
-        self.url = url.replace( "%20", " ")
-        
-        self.filename = filename
-        self.tempname = dirname( self.filename) + "/.tmp-" + basename( self.filename)
-        
-        self.ready_event = ready_event
-        self.pid= -1
-        self.percentage = 0.0
-        self.speed = _("unknown")
-        
-        self.thread = None
-        self.result = -1
-
-	self.cutename = cutename
-
-        self.channelitem = channelitem
-        self.item = item
-
-        self.is_cancelled = False
-
-	self.download_id = services.download_status_manager.reserve_download_id()
-	services.download_status_manager.register_download_id( self.download_id, self)
-    
-    def thread_function( self):
-        acquired = False
-
-        gl = libgpodder.gPodderLib()
-        util.delete_file( self.tempname)
-
-        command = [ 'wget', '--timeout=120', '--continue', '--output-document="%s"' % self.tempname ]
-
-        if self.channelitem and (self.channelitem.username or self.channelitem.password):
-            command.append( '--user="%s"' % self.channelitem.username)
-            command.append( '--password="%s"' % self.channelitem.password)
-
-        if gl.limit_rate:
-            command.append( '--limit-rate=%.1fk' % gl.limit_rate_value)
-
-        command.append( '"%s"' % self.url)
-        command = ' '.join( command)
-
-        log( 'Command: %s', command)
-        services.download_status_manager.update_status( self.download_id, episode = self.cutename, speed = _('Queued'), progress = 0.0, url = self.url)
-        acquired = services.download_status_manager.s_acquire()
-
-        # if after acquiring the lock, we are already cancelled,
-        # the user has cancelled this download while it was queued
-        if self.is_cancelled:
-	    services.download_status_manager.remove_download_id( self.download_id)
-            if self.ready_event != None:
-                self.ready_event.set()
-         
-            if acquired:
-                services.download_status_manager.s_release()
-            return
-
-        process = popen2.Popen3( command, True)
-        
-        self.pid = process.pid
-        stderr = process.childerr
-        
-        while process.poll() == -1 and self.is_cancelled == False:
-            msg = stderr.readline( 80)
-            msg = msg.strip()
-            #log( 'wget> %s', msg)
-
-            if msg.find("%") != -1:
-                try:
-                    self.percentage = max( self.percentage, (int(msg[(msg.find("%") - 2)] + msg[(msg.find("%") - 1)])+0.001)/100.0)
-                except:
-                    pass
-               
-                # Fedora/RedHat seem to have changed the output format of "wget", so we
-                # first try to "detect" the speed in the Fedora/RedHat format and if we 
-                # don't succeed, we'll use a regular expression to find the speed string.
-                # Also see: doc/dev/redhat-wget-output.txt
-
-                try:
-                    speed_msg = msg.split()[7]
-                except:
-                    speed_msg = ''
-
-                if re.search('[KB]', speed_msg):
-                    self.speed = speed_msg
-                else:
-                    iter = re.compile('...\... .B\/s').finditer( msg)
-                    for speed_string in iter:
-                        self.speed = speed_string.group(0).strip()
-
-	    services.download_status_manager.update_status( self.download_id, speed = self.speed, progress = int(self.percentage*100))
-        
-        if process.wait() == 0:
-            try:
-                move( self.tempname, self.filename)
-            except:
-                log( 'Error happened during moving tempfile :/')
-                raise
-        else:
-            # Delete partially downloaded file
-            util.delete_file( self.tempname)
-        
-        self.result = process.poll()
-        self.pid = -1
-
-	services.download_status_manager.remove_download_id( self.download_id)
-
-        if self.result == 0 and self.channelitem and self.item:
-            log( 'Download thread finished: Adding downloaded item to local database')
-            self.channelitem.addDownloadedItem( self.item)
-        
-        if self.ready_event != None:
-            self.ready_event.set()
-
-        if acquired:
-            services.download_status_manager.s_release()
-    
-    def cancel( self):
-        self.is_cancelled = True
-        if self.pid != -1:
-            kill( self.pid, signal.SIGKILL)
-    
-    def download( self):
-        self.thread = Thread( target=self.thread_function)
-        self.thread.start()
-
--- a/src/gpodder/opml.py
+++ b/src/gpodder/opml.py
@ -46,6 +46,7 @@ import urllib
 import urllib2

 import datetime
+import gpodder


 class Importer(object):
@ -60,6 +61,10 @@ class Importer(object):

    VALID_TYPES = ( 'rss', 'link' )

+    def read_url( self, url):
+        request = urllib2.Request( url, headers = {'User-agent': gpodder.user_agent})
+        return urllib2.urlopen( request).read()
+
    def __init__( self, url):
        """
        Parses the OPML feed from the given URL into 
@ -71,7 +76,7 @@ class Importer(object):
                # assume local filename
                doc = xml.dom.minidom.parse( url)
            else:
-                doc = xml.dom.minidom.parseString( urllib2.urlopen( url).read())
+                doc = xml.dom.minidom.parseString( self.read_url( url))

            for outline in doc.getElementsByTagName('outline'):
                if outline.getAttribute('type') in self.VALID_TYPES and outline.getAttribute('xmlUrl'):
--- a/src/gpodder/services.py
+++ b/src/gpodder/services.py
@ -36,7 +36,7 @@ import threading

 class DownloadStatusManager( object):
    COLUMN_NAMES = { 0: 'episode', 1: 'speed', 2: 'progress', 3: 'url' }
-    COLUMN_TYPES = ( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_INT, gobject.TYPE_STRING )
+    COLUMN_TYPES = ( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_FLOAT, gobject.TYPE_STRING )

    def __init__( self):
        self.status_list = {}
@ -102,8 +102,9 @@ class DownloadStatusManager( object):

        return self.semaphore.acquire()

-    def s_release( self):
-        self.semaphore.release()
+    def s_release( self, acquired = True):
+        if acquired:
+            self.semaphore.release()

    def reserve_download_id( self):
        id = self.next_status_id
@ -116,7 +117,7 @@ class DownloadStatusManager( object):

    def register_download_id( self, id, thread):
        self.tree_model_lock.acquire()
-        self.status_list[id] = { 'iter': self.tree_model.append(), 'thread': thread, 'progress': 0 }
+        self.status_list[id] = { 'iter': self.tree_model.append(), 'thread': thread, 'progress': 0.0, 'speed': _('Queued'), }
        self.notify( 'list-changed')
        self.tree_model_lock.release()