2007-09-18 20:25:25 +02:00
# -*- coding: utf-8 -*-
#
# gPodder - A media aggregator and podcast client
2009-02-01 21:22:21 +01:00
# Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
2007-09-18 20:25:25 +02:00
#
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# gPodder is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
# download.py -- Download client using DownloadStatusManager
# Thomas Perl <thp@perli.net> 2007-09-15
#
# Based on libwget.py (2005-10-29)
#
from gpodder . liblogger import log
2008-03-02 14:22:29 +01:00
from gpodder . libgpodder import gl
2009-02-09 23:26:47 +01:00
from gpodder . dbsqlite import db
2007-09-18 20:25:25 +02:00
from gpodder import util
from gpodder import services
2008-10-13 15:28:44 +02:00
from gpodder import resolver
2007-09-18 20:25:25 +02:00
import gpodder
import threading
import urllib
import shutil
import os . path
2008-06-15 14:46:34 +02:00
import os
2007-09-18 20:25:25 +02:00
import time
2007-11-05 13:55:36 +01:00
from xml . sax import saxutils
2007-09-18 20:25:25 +02:00
class DownloadCancelledException ( Exception ) : pass
2008-08-10 14:38:20 +02:00
class gPodderDownloadHTTPError ( Exception ) :
def __init__ ( self , url , error_code , error_message ) :
self . url = url
self . error_code = error_code
self . error_message = error_message
2007-09-18 20:25:25 +02:00
class DownloadURLOpener ( urllib . FancyURLopener ) :
version = gpodder . user_agent
def __init__ ( self , channel ) :
2007-11-02 17:37:14 +01:00
if gl . config . proxy_use_environment :
2007-09-18 20:25:25 +02:00
proxies = None
else :
proxies = { }
2007-11-02 17:37:14 +01:00
if gl . config . http_proxy :
proxies [ ' http ' ] = gl . config . http_proxy
if gl . config . ftp_proxy :
proxies [ ' ftp ' ] = gl . config . ftp_proxy
2007-09-18 20:25:25 +02:00
self . channel = channel
urllib . FancyURLopener . __init__ ( self , proxies )
2008-08-10 14:38:20 +02:00
def http_error_default ( self , url , fp , errcode , errmsg , headers ) :
"""
FancyURLopener by default does not raise an exception when
there is some unknown HTTP error code . We want to override
this and provide a function to log the error and raise an
exception , so we don ' t download the HTTP error page here.
"""
# The following two lines are copied from urllib.URLopener's
# implementation of http_error_default
void = fp . read ( )
fp . close ( )
raise gPodderDownloadHTTPError ( url , errcode , errmsg )
2009-02-09 23:26:47 +01:00
# The following is based on Python's urllib.py "URLopener.retrieve"
# Also based on http://mail.python.org/pipermail/python-list/2001-October/110069.html
def http_error_206 ( self , url , fp , errcode , errmsg , headers , data = None ) :
# The next line is taken from urllib's URLopener.open_http
# method, at the end after the line "if errcode == 200:"
return urllib . addinfourl ( fp , headers , ' http: ' + url )
def retrieve_resume ( self , url , filename , reporthook = None , data = None ) :
""" retrieve_resume(url) returns (filename, headers) for a local object
or ( tempfilename , headers ) for a remote object .
The filename argument is REQUIRED ( no tempfile creation code here ! )
Additionally resumes a download if the local filename exists """
current_size = 0
tfp = None
if os . path . exists ( filename ) :
try :
current_size = os . path . getsize ( filename )
tfp = open ( filename , ' ab ' )
#If the file exists, then only download the remainder
self . addheader ( ' Range ' , ' bytes= %s - ' % ( current_size ) )
except :
log ( ' Cannot open file for resuming: %s ' , filename , sender = self , traceback = True )
tfp = None
current_size = 0
if tfp is None :
tfp = open ( filename , ' wb ' )
url = urllib . unwrap ( urllib . toBytes ( url ) )
fp = self . open ( url , data )
headers = fp . info ( )
# gPodder TODO: we can get the real url via fp.geturl() here
# (if anybody wants to fix filenames in the future)
result = filename , headers
bs = 1024 * 8
size = - 1
read = current_size
blocknum = int ( current_size / bs )
if reporthook :
if " content-length " in headers :
size = int ( headers [ " Content-Length " ] ) + current_size
reporthook ( blocknum , bs , size )
while 1 :
block = fp . read ( bs )
if block == " " :
break
read + = len ( block )
tfp . write ( block )
blocknum + = 1
if reporthook :
reporthook ( blocknum , bs , size )
fp . close ( )
tfp . close ( )
del fp
del tfp
# raise exception if actual size does not match content-length header
if size > = 0 and read < size :
2009-03-30 18:59:50 +02:00
raise urllib . ContentTooShortError ( " retrieval incomplete: got only %i out "
2009-02-09 23:26:47 +01:00
" of %i bytes " % ( read , size ) , result )
return result
# end code based on urllib.py
2007-09-18 20:25:25 +02:00
def prompt_user_passwd ( self , host , realm ) :
if self . channel . username or self . channel . password :
log ( ' Authenticating as " %s " to " %s " for realm " %s " . ' , self . channel . username , host , realm , sender = self )
return ( self . channel . username , self . channel . password )
return ( None , None )
class DownloadThread ( threading . Thread ) :
2007-10-06 12:41:46 +02:00
MAX_UPDATES_PER_SEC = 1
2007-11-05 13:55:36 +01:00
def __init__ ( self , channel , episode , notification = None ) :
2007-09-18 20:25:25 +02:00
threading . Thread . __init__ ( self )
self . setDaemon ( True )
2008-12-13 13:29:45 +01:00
if gpodder . interface == gpodder . MAEMO :
# Only update status every 3 seconds on Maemo
self . MAX_UPDATES_PER_SEC = 1. / 3.
2007-09-18 20:25:25 +02:00
self . channel = channel
self . episode = episode
2007-11-05 13:55:36 +01:00
self . notification = notification
2007-09-18 20:25:25 +02:00
self . url = self . episode . url
2009-02-09 23:26:47 +01:00
self . filename = self . episode . local_filename ( create = True )
# Commit the database, so we won't lose the (possibly created) filename
db . commit ( )
2009-01-03 17:32:26 +01:00
self . tempname = self . filename + ' .partial '
2007-09-18 20:25:25 +02:00
2008-04-17 17:59:38 +02:00
# Make an educated guess about the total file size
self . total_size = self . episode . length
2007-09-18 20:25:25 +02:00
self . cancelled = False
2009-02-09 23:26:47 +01:00
self . keep_files = False
2007-09-18 20:25:25 +02:00
self . start_time = 0.0
self . speed = _ ( ' Queued ' )
2008-08-04 14:17:01 +02:00
self . speed_value = 0
2007-09-18 20:25:25 +02:00
self . progress = 0.0
self . downloader = DownloadURLOpener ( self . channel )
2007-10-06 12:41:46 +02:00
self . last_update = 0.0
2007-09-18 20:25:25 +02:00
2008-04-17 17:45:29 +02:00
# Keep a copy of these global variables for comparison later
self . limit_rate_value = gl . config . limit_rate_value
self . limit_rate = gl . config . limit_rate
self . start_blocks = 0
2009-02-09 23:26:47 +01:00
def cancel ( self , keep_files = False ) :
2007-09-18 20:25:25 +02:00
self . cancelled = True
2009-02-09 23:26:47 +01:00
self . keep_files = keep_files
2007-09-18 20:25:25 +02:00
def status_updated ( self , count , blockSize , totalSize ) :
if totalSize :
2008-04-17 17:59:38 +02:00
# We see a different "total size" while downloading,
# so correct the total size variable in the thread
2008-08-09 17:14:16 +02:00
if totalSize != self . total_size and totalSize > 0 :
2008-04-17 17:59:38 +02:00
log ( ' Correcting file size for %s from %d to %d while downloading. ' , self . url , self . total_size , totalSize , sender = self )
self . total_size = totalSize
2008-08-09 17:14:16 +02:00
elif totalSize < 0 :
# The current download has a negative value, so assume
# the total size given from the feed is correct
totalSize = self . total_size
2009-02-18 13:32:39 +01:00
try :
self . progress = 100.0 * float ( count * blockSize ) / float ( totalSize )
except ZeroDivisionError , zde :
log ( ' Totalsize unknown, cannot determine progress. ' , sender = self )
self . progress = 100.0
2007-09-18 20:25:25 +02:00
else :
self . progress = 100.0
2008-08-09 17:14:16 +02:00
# Sanity checks for "progress" in valid range (0..100)
if self . progress < 0.0 :
log ( ' Warning: Progress is lower than 0 (count= %d , blockSize= %d , totalSize= %d ) ' , count , blockSize , totalSize , sender = self )
self . progress = 0.0
elif self . progress > 100.0 :
log ( ' Warning: Progress is more than 100 (count= %d , blockSize= %d , totalSize= %d ) ' , count , blockSize , totalSize , sender = self )
self . progress = 100.0
2007-09-18 20:25:25 +02:00
self . calculate_speed ( count , blockSize )
2007-10-06 12:41:46 +02:00
if self . last_update < time . time ( ) - ( 1.0 / self . MAX_UPDATES_PER_SEC ) :
services . download_status_manager . update_status ( self . download_id , speed = self . speed , progress = self . progress )
self . last_update = time . time ( )
2007-09-18 20:25:25 +02:00
if self . cancelled :
2009-02-09 23:26:47 +01:00
if not self . keep_files :
util . delete_file ( self . tempname )
2007-09-18 20:25:25 +02:00
raise DownloadCancelledException ( )
def calculate_speed ( self , count , blockSize ) :
if count % 5 == 0 :
now = time . time ( )
if self . start_time > 0 :
2008-04-17 17:45:29 +02:00
# Has rate limiting been enabled or disabled?
if self . limit_rate != gl . config . limit_rate :
# If it has been enabled then reset base time and block count
if gl . config . limit_rate :
self . start_time = now
self . start_blocks = count
self . limit_rate = gl . config . limit_rate
# Has the rate been changed and are we currently limiting?
if self . limit_rate_value != gl . config . limit_rate_value and self . limit_rate :
self . start_time = now
self . start_blocks = count
self . limit_rate_value = gl . config . limit_rate_value
2007-09-18 20:25:25 +02:00
passed = now - self . start_time
2007-09-25 22:06:48 +02:00
if passed > 0 :
2008-04-17 17:45:29 +02:00
speed = ( ( count - self . start_blocks ) * blockSize ) / passed
2007-09-25 22:06:48 +02:00
else :
speed = 0
2007-09-18 20:25:25 +02:00
else :
self . start_time = now
2008-04-17 17:45:29 +02:00
self . start_blocks = count
2007-09-18 20:25:25 +02:00
passed = now - self . start_time
speed = count * blockSize
2008-04-17 17:45:29 +02:00
2008-03-02 14:22:29 +01:00
self . speed = ' %s /s ' % gl . format_filesize ( speed )
2008-08-04 14:17:01 +02:00
self . speed_value = speed
2007-09-18 20:25:25 +02:00
2008-04-17 17:45:29 +02:00
if gl . config . limit_rate and speed > gl . config . limit_rate_value :
2007-09-18 20:25:25 +02:00
# calculate the time that should have passed to reach
# the desired download rate and wait if necessary
2008-04-17 17:45:29 +02:00
should_have_passed = float ( ( count - self . start_blocks ) * blockSize ) / ( gl . config . limit_rate_value * 1024.0 )
2007-09-18 20:25:25 +02:00
if should_have_passed > passed :
# sleep a maximum of 10 seconds to not cause time-outs
delay = min ( 10.0 , float ( should_have_passed - passed ) )
time . sleep ( delay )
def run ( self ) :
self . download_id = services . download_status_manager . reserve_download_id ( )
services . download_status_manager . register_download_id ( self . download_id , self )
2009-02-09 23:26:47 +01:00
if os . path . exists ( self . tempname ) :
try :
already_downloaded = os . path . getsize ( self . tempname )
if self . total_size > 0 :
self . progress = already_downloaded / self . total_size
if already_downloaded > 0 :
self . speed = _ ( ' Queued (partial) ' )
except :
pass
else :
# "touch self.tempname", so we also get partial
# files for resuming when the file is queued
open ( self . tempname , ' w ' ) . close ( )
2007-09-18 20:25:25 +02:00
# Initial status update
services . download_status_manager . update_status ( self . download_id , episode = self . episode . title , url = self . episode . url , speed = self . speed , progress = self . progress )
acquired = services . download_status_manager . s_acquire ( )
try :
try :
if self . cancelled :
2009-02-09 23:26:47 +01:00
# Remove the partial file in case we do
# not want to keep it (e.g. user cancelled)
if not self . keep_files :
util . delete_file ( self . tempname )
2007-09-18 20:25:25 +02:00
return
2009-02-09 23:26:47 +01:00
( unused , headers ) = self . downloader . retrieve_resume ( resolver . get_real_download_url ( self . url ) , self . tempname , reporthook = self . status_updated )
2008-10-13 15:28:44 +02:00
2009-02-25 14:12:48 +01:00
new_mimetype = headers . get ( ' content-type ' , self . episode . mimetype )
old_mimetype = self . episode . mimetype
if new_mimetype != old_mimetype :
log ( ' Correcting mime type: %s => %s ' , old_mimetype , new_mimetype , sender = self )
old_extension = self . episode . extension ( )
self . episode . mimetype = new_mimetype
new_extension = self . episode . extension ( )
# If the desired filename extension changed due to the new mimetype,
# we force an update of the local filename to fix the extension
if old_extension != new_extension :
self . filename = self . episode . local_filename ( create = True , force_update = True )
2008-10-13 15:28:44 +02:00
2007-09-18 20:25:25 +02:00
shutil . move ( self . tempname , self . filename )
2008-08-07 04:30:42 +02:00
# Get the _real_ filesize once we actually have the file
self . episode . length = os . path . getsize ( self . filename )
2008-10-13 15:28:44 +02:00
self . channel . addDownloadedItem ( self . episode )
services . download_status_manager . download_completed ( self . download_id )
2008-06-15 14:46:34 +02:00
# If a user command has been defined, execute the command setting some environment variables
if len ( gl . config . cmd_download_complete ) > 0 :
os . environ [ " GPODDER_EPISODE_URL " ] = self . episode . url or ' '
os . environ [ " GPODDER_EPISODE_TITLE " ] = self . episode . title or ' '
os . environ [ " GPODDER_EPISODE_FILENAME " ] = self . filename or ' '
os . environ [ " GPODDER_EPISODE_PUBDATE " ] = str ( int ( self . episode . pubDate ) )
os . environ [ " GPODDER_EPISODE_LINK " ] = self . episode . link or ' '
os . environ [ " GPODDER_EPISODE_DESC " ] = self . episode . description or ' '
threading . Thread ( target = gl . ext_command_thread , args = ( self . notification , gl . config . cmd_download_complete ) ) . start ( )
2007-09-18 20:25:25 +02:00
finally :
services . download_status_manager . remove_download_id ( self . download_id )
services . download_status_manager . s_release ( acquired )
except DownloadCancelledException :
2008-04-22 21:57:02 +02:00
log ( ' Download has been cancelled: %s ' , self . episode . title , traceback = None , sender = self )
2009-02-09 23:26:47 +01:00
if not self . keep_files :
util . delete_file ( self . tempname )
2007-11-05 13:55:36 +01:00
except IOError , ioe :
2008-04-22 21:16:30 +02:00
if self . notification is not None :
2007-11-05 13:55:36 +01:00
title = ioe . strerror
2009-02-09 23:26:47 +01:00
message = _ ( ' An error happened while trying to download <b> %s </b>. Please try again later. ' ) % ( saxutils . escape ( self . episode . title ) , )
2007-11-05 13:55:36 +01:00
self . notification ( message , title )
log ( ' Error " %s " while downloading " %s " : %s ' , ioe . strerror , self . episode . title , ioe . filename , sender = self )
2008-08-10 14:38:20 +02:00
except gPodderDownloadHTTPError , gdhe :
if self . notification is not None :
title = gdhe . error_message
2009-02-09 23:26:47 +01:00
message = _ ( ' An error (HTTP %d ) happened while trying to download <b> %s </b>. You can try to resume the download later. ' ) % ( gdhe . error_code , saxutils . escape ( self . episode . title ) , )
2008-08-10 14:38:20 +02:00
self . notification ( message , title )
log ( ' HTTP error %s while downloading " %s " : %s ' , gdhe . error_code , self . episode . title , gdhe . error_message , sender = self )
2007-09-18 20:25:25 +02:00
except :
2008-04-22 22:24:19 +02:00
log ( ' Error while downloading " %s " . ' , self . episode . title , sender = self , traceback = True )
2007-09-18 20:25:25 +02:00