2007-09-18 20:25:25 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
#
|
|
|
|
# gPodder - A media aggregator and podcast client
|
2011-04-01 18:59:42 +02:00
|
|
|
# Copyright (c) 2005-2011 Thomas Perl and the gPodder Team
|
2007-09-18 20:25:25 +02:00
|
|
|
#
|
|
|
|
# gPodder is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# gPodder is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
#
|
2009-08-24 16:17:32 +02:00
|
|
|
# download.py -- Download queue management
|
2007-09-18 20:25:25 +02:00
|
|
|
# Thomas Perl <thp@perli.net> 2007-09-15
|
|
|
|
#
|
|
|
|
# Based on libwget.py (2005-10-29)
|
|
|
|
#
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
from __future__ import with_statement
|
|
|
|
|
2011-07-15 16:32:06 +02:00
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2007-09-18 20:25:25 +02:00
|
|
|
from gpodder import util
|
2009-08-24 17:02:35 +02:00
|
|
|
from gpodder import youtube
|
2007-09-18 20:25:25 +02:00
|
|
|
import gpodder
|
|
|
|
|
|
|
|
import threading
|
|
|
|
import urllib
|
2009-06-03 19:32:15 +02:00
|
|
|
import urlparse
|
2007-09-18 20:25:25 +02:00
|
|
|
import shutil
|
|
|
|
import os.path
|
2008-06-15 14:46:34 +02:00
|
|
|
import os
|
2007-09-18 20:25:25 +02:00
|
|
|
import time
|
2009-04-01 01:12:17 +02:00
|
|
|
import collections
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-09-06 16:38:40 +02:00
|
|
|
import mimetypes
|
|
|
|
import email
|
|
|
|
import email.Header
|
|
|
|
|
2011-02-25 21:05:26 +01:00
|
|
|
import cgi
|
2007-11-05 13:55:36 +01:00
|
|
|
|
2009-05-07 16:26:07 +02:00
|
|
|
_ = gpodder.gettext
|
2009-05-05 10:22:12 +02:00
|
|
|
|
2009-09-06 16:38:40 +02:00
|
|
|
def get_header_param(headers, param, header_name):
|
|
|
|
"""Extract a HTTP header parameter from a dict
|
|
|
|
|
|
|
|
Uses the "email" module to retrieve parameters
|
|
|
|
from HTTP headers. This can be used to get the
|
|
|
|
"filename" parameter of the "content-disposition"
|
|
|
|
header for downloads to pick a good filename.
|
|
|
|
|
|
|
|
Returns None if the filename cannot be retrieved.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
headers_string = ['%s:%s'%(k,v) for k,v in headers.items()]
|
|
|
|
msg = email.message_from_string('\n'.join(headers_string))
|
|
|
|
if header_name in msg:
|
|
|
|
value = msg.get_param(param, header=header_name)
|
2009-09-15 14:23:38 +02:00
|
|
|
if value is None:
|
|
|
|
return None
|
2009-09-06 16:38:40 +02:00
|
|
|
decoded_list = email.Header.decode_header(value)
|
|
|
|
value = []
|
|
|
|
for part, encoding in decoded_list:
|
|
|
|
if encoding:
|
|
|
|
value.append(part.decode(encoding))
|
|
|
|
else:
|
|
|
|
value.append(unicode(part))
|
|
|
|
return u''.join(value)
|
|
|
|
except Exception, e:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('Cannot get %s from %s', param, header_name, exc_info=True)
|
2009-09-06 16:38:40 +02:00
|
|
|
|
|
|
|
return None
|
|
|
|
|
2009-05-05 10:22:12 +02:00
|
|
|
class ContentRange(object):
|
|
|
|
# Based on:
|
|
|
|
# http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
|
|
|
|
#
|
|
|
|
# Copyright (c) 2007 Ian Bicking and Contributors
|
|
|
|
#
|
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
# a copy of this software and associated documentation files (the
|
|
|
|
# "Software"), to deal in the Software without restriction, including
|
|
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
# the following conditions:
|
|
|
|
#
|
|
|
|
# The above copyright notice and this permission notice shall be
|
|
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
#
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
|
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
|
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
|
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
"""
|
|
|
|
Represents the Content-Range header
|
|
|
|
|
|
|
|
This header is ``start-stop/length``, where stop and length can be
|
|
|
|
``*`` (represented as None in the attributes).
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, start, stop, length):
|
|
|
|
assert start >= 0, "Bad start: %r" % start
|
|
|
|
assert stop is None or (stop >= 0 and stop >= start), (
|
|
|
|
"Bad stop: %r" % stop)
|
|
|
|
self.start = start
|
|
|
|
self.stop = stop
|
|
|
|
self.length = length
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return '<%s %s>' % (
|
|
|
|
self.__class__.__name__,
|
|
|
|
self)
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
if self.stop is None:
|
|
|
|
stop = '*'
|
|
|
|
else:
|
|
|
|
stop = self.stop + 1
|
|
|
|
if self.length is None:
|
|
|
|
length = '*'
|
|
|
|
else:
|
|
|
|
length = self.length
|
|
|
|
return 'bytes %s-%s/%s' % (self.start, stop, length)
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
"""
|
|
|
|
Mostly so you can unpack this, like:
|
|
|
|
|
|
|
|
start, stop, length = res.content_range
|
|
|
|
"""
|
|
|
|
return iter([self.start, self.stop, self.length])
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def parse(cls, value):
|
|
|
|
"""
|
|
|
|
Parse the header. May return None if it cannot parse.
|
|
|
|
"""
|
|
|
|
if value is None:
|
|
|
|
return None
|
|
|
|
value = value.strip()
|
|
|
|
if not value.startswith('bytes '):
|
|
|
|
# Unparseable
|
|
|
|
return None
|
|
|
|
value = value[len('bytes '):].strip()
|
|
|
|
if '/' not in value:
|
|
|
|
# Invalid, no length given
|
|
|
|
return None
|
|
|
|
range, length = value.split('/', 1)
|
|
|
|
if '-' not in range:
|
|
|
|
# Invalid, no range
|
|
|
|
return None
|
|
|
|
start, end = range.split('-', 1)
|
|
|
|
try:
|
|
|
|
start = int(start)
|
|
|
|
if end == '*':
|
|
|
|
end = None
|
|
|
|
else:
|
|
|
|
end = int(end)
|
|
|
|
if length == '*':
|
|
|
|
length = None
|
|
|
|
else:
|
|
|
|
length = int(length)
|
|
|
|
except ValueError:
|
|
|
|
# Parse problem
|
|
|
|
return None
|
|
|
|
if end is None:
|
|
|
|
return cls(start, None, length)
|
|
|
|
else:
|
|
|
|
return cls(start, end-1, length)
|
|
|
|
|
|
|
|
|
2007-09-18 20:25:25 +02:00
|
|
|
class DownloadCancelledException(Exception): pass
|
2009-08-24 13:04:11 +02:00
|
|
|
class AuthenticationError(Exception): pass
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2008-08-10 14:38:20 +02:00
|
|
|
class gPodderDownloadHTTPError(Exception):
|
|
|
|
def __init__(self, url, error_code, error_message):
|
|
|
|
self.url = url
|
|
|
|
self.error_code = error_code
|
|
|
|
self.error_message = error_message
|
2007-09-18 20:25:25 +02:00
|
|
|
|
|
|
|
class DownloadURLOpener(urllib.FancyURLopener):
|
|
|
|
version = gpodder.user_agent
|
|
|
|
|
2009-09-09 18:53:45 +02:00
|
|
|
# Sometimes URLs are not escaped correctly - try to fix them
|
|
|
|
# (see RFC2396; Section 2.4.3. Excluded US-ASCII Characters)
|
|
|
|
# FYI: The omission of "%" in the list is to avoid double escaping!
|
|
|
|
ESCAPE_CHARS = dict((ord(c), u'%%%x'%ord(c)) for c in u' <>#"{}|\\^[]`')
|
|
|
|
|
2007-09-18 20:25:25 +02:00
|
|
|
def __init__( self, channel):
|
|
|
|
self.channel = channel
|
2009-08-24 13:04:11 +02:00
|
|
|
self._auth_retry_counter = 0
|
2009-07-06 15:29:09 +02:00
|
|
|
urllib.FancyURLopener.__init__(self, None)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2008-08-10 14:38:20 +02:00
|
|
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
|
|
|
"""
|
|
|
|
FancyURLopener by default does not raise an exception when
|
|
|
|
there is some unknown HTTP error code. We want to override
|
|
|
|
this and provide a function to log the error and raise an
|
|
|
|
exception, so we don't download the HTTP error page here.
|
|
|
|
"""
|
|
|
|
# The following two lines are copied from urllib.URLopener's
|
|
|
|
# implementation of http_error_default
|
|
|
|
void = fp.read()
|
|
|
|
fp.close()
|
|
|
|
raise gPodderDownloadHTTPError(url, errcode, errmsg)
|
2009-06-03 19:32:15 +02:00
|
|
|
|
|
|
|
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
|
|
|
|
""" This is the exact same function that's included with urllib
|
|
|
|
except with "void = fp.read()" commented out. """
|
|
|
|
|
|
|
|
if 'location' in headers:
|
|
|
|
newurl = headers['location']
|
|
|
|
elif 'uri' in headers:
|
|
|
|
newurl = headers['uri']
|
|
|
|
else:
|
|
|
|
return
|
|
|
|
|
|
|
|
# This blocks forever(?) with certain servers (see bug #465)
|
|
|
|
#void = fp.read()
|
|
|
|
fp.close()
|
|
|
|
|
|
|
|
# In case the server sent a relative URL, join with original:
|
|
|
|
newurl = urlparse.urljoin(self.type + ":" + url, newurl)
|
|
|
|
return self.open(newurl)
|
|
|
|
|
2009-02-09 23:26:47 +01:00
|
|
|
# The following is based on Python's urllib.py "URLopener.retrieve"
|
|
|
|
# Also based on http://mail.python.org/pipermail/python-list/2001-October/110069.html
|
|
|
|
|
|
|
|
def http_error_206(self, url, fp, errcode, errmsg, headers, data=None):
|
|
|
|
# The next line is taken from urllib's URLopener.open_http
|
|
|
|
# method, at the end after the line "if errcode == 200:"
|
|
|
|
return urllib.addinfourl(fp, headers, 'http:' + url)
|
|
|
|
|
|
|
|
def retrieve_resume(self, url, filename, reporthook=None, data=None):
|
2009-09-06 16:38:40 +02:00
|
|
|
"""Download files from an URL; return (headers, real_url)
|
2009-02-09 23:26:47 +01:00
|
|
|
|
2009-09-06 16:38:40 +02:00
|
|
|
Resumes a download if the local filename exists and
|
|
|
|
the server supports download resuming.
|
|
|
|
"""
|
2009-02-09 23:26:47 +01:00
|
|
|
|
|
|
|
current_size = 0
|
|
|
|
tfp = None
|
|
|
|
if os.path.exists(filename):
|
|
|
|
try:
|
|
|
|
current_size = os.path.getsize(filename)
|
|
|
|
tfp = open(filename, 'ab')
|
|
|
|
#If the file exists, then only download the remainder
|
2009-06-10 01:57:35 +02:00
|
|
|
if current_size > 0:
|
|
|
|
self.addheader('Range', 'bytes=%s-' % (current_size))
|
2009-02-09 23:26:47 +01:00
|
|
|
except:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.warn('Cannot resume download: %s', filename, exc_info=True)
|
2009-02-09 23:26:47 +01:00
|
|
|
tfp = None
|
|
|
|
current_size = 0
|
|
|
|
|
|
|
|
if tfp is None:
|
|
|
|
tfp = open(filename, 'wb')
|
|
|
|
|
2009-09-09 18:53:45 +02:00
|
|
|
# Fix a problem with bad URLs that are not encoded correctly (bug 549)
|
|
|
|
url = url.decode('ascii', 'ignore')
|
|
|
|
url = url.translate(self.ESCAPE_CHARS)
|
|
|
|
url = url.encode('ascii')
|
|
|
|
|
2009-02-09 23:26:47 +01:00
|
|
|
url = urllib.unwrap(urllib.toBytes(url))
|
|
|
|
fp = self.open(url, data)
|
|
|
|
headers = fp.info()
|
|
|
|
|
2009-05-05 10:22:12 +02:00
|
|
|
if current_size > 0:
|
|
|
|
# We told the server to resume - see if she agrees
|
|
|
|
# See RFC2616 (206 Partial Content + Section 14.16)
|
|
|
|
# XXX check status code here, too...
|
|
|
|
range = ContentRange.parse(headers.get('content-range', ''))
|
|
|
|
if range is None or range.start != current_size:
|
|
|
|
# Ok, that did not work. Reset the download
|
|
|
|
# TODO: seek and truncate if content-range differs from request
|
|
|
|
tfp.close()
|
|
|
|
tfp = open(filename, 'wb')
|
|
|
|
current_size = 0
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.warn('Cannot resume: Invalid Content-Range (RFC2616).')
|
2009-05-05 10:22:12 +02:00
|
|
|
|
2009-09-06 16:38:40 +02:00
|
|
|
result = headers, fp.geturl()
|
2009-02-09 23:26:47 +01:00
|
|
|
bs = 1024*8
|
|
|
|
size = -1
|
|
|
|
read = current_size
|
|
|
|
blocknum = int(current_size/bs)
|
|
|
|
if reporthook:
|
|
|
|
if "content-length" in headers:
|
2010-12-03 01:25:00 +01:00
|
|
|
size = int(headers.getrawheader("Content-Length")) + current_size
|
2009-02-09 23:26:47 +01:00
|
|
|
reporthook(blocknum, bs, size)
|
2009-09-01 14:37:11 +02:00
|
|
|
while read < size or size == -1:
|
2009-06-10 01:57:35 +02:00
|
|
|
if size == -1:
|
|
|
|
block = fp.read(bs)
|
|
|
|
else:
|
|
|
|
block = fp.read(min(size-read, bs))
|
2009-02-09 23:26:47 +01:00
|
|
|
if block == "":
|
|
|
|
break
|
|
|
|
read += len(block)
|
|
|
|
tfp.write(block)
|
|
|
|
blocknum += 1
|
|
|
|
if reporthook:
|
|
|
|
reporthook(blocknum, bs, size)
|
|
|
|
fp.close()
|
|
|
|
tfp.close()
|
|
|
|
del fp
|
|
|
|
del tfp
|
|
|
|
|
|
|
|
# raise exception if actual size does not match content-length header
|
|
|
|
if size >= 0 and read < size:
|
2009-03-30 18:59:50 +02:00
|
|
|
raise urllib.ContentTooShortError("retrieval incomplete: got only %i out "
|
2009-02-09 23:26:47 +01:00
|
|
|
"of %i bytes" % (read, size), result)
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
# end code based on urllib.py
|
|
|
|
|
2007-09-18 20:25:25 +02:00
|
|
|
def prompt_user_passwd( self, host, realm):
|
2009-08-24 13:04:11 +02:00
|
|
|
# Keep track of authentication attempts, fail after the third one
|
|
|
|
self._auth_retry_counter += 1
|
|
|
|
if self._auth_retry_counter > 3:
|
|
|
|
raise AuthenticationError(_('Wrong username/password'))
|
|
|
|
|
2010-12-20 14:35:46 +01:00
|
|
|
if self.channel.auth_username or self.channel.auth_password:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.debug('Authenticating as "%s" to "%s" for realm "%s".',
|
|
|
|
self.channel.auth_username, host, realm)
|
2010-12-20 14:35:46 +01:00
|
|
|
return ( self.channel.auth_username, self.channel.auth_password )
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-08-24 13:04:11 +02:00
|
|
|
return (None, None)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
class DownloadQueueWorker(threading.Thread):
|
2010-03-07 20:15:36 +01:00
|
|
|
def __init__(self, queue, exit_callback, continue_check_callback, minimum_tasks):
|
2009-04-01 01:12:17 +02:00
|
|
|
threading.Thread.__init__(self)
|
|
|
|
self.queue = queue
|
|
|
|
self.exit_callback = exit_callback
|
2009-12-17 00:37:45 +01:00
|
|
|
self.continue_check_callback = continue_check_callback
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2010-03-07 20:15:36 +01:00
|
|
|
# The minimum amount of tasks that should be downloaded by this worker
|
|
|
|
# before using the continue_check_callback to determine if it might
|
|
|
|
# continue accepting tasks. This can be used to forcefully start a
|
|
|
|
# download, even if a download limit is in effect.
|
|
|
|
self.minimum_tasks = minimum_tasks
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def run(self):
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Starting new thread: %s', self.getName())
|
2009-12-17 00:37:45 +01:00
|
|
|
while True:
|
|
|
|
# Check if this thread is allowed to continue accepting tasks
|
2010-03-07 20:15:36 +01:00
|
|
|
# (But only after reducing minimum_tasks to zero - see above)
|
|
|
|
if self.minimum_tasks > 0:
|
|
|
|
self.minimum_tasks -= 1
|
|
|
|
elif not self.continue_check_callback(self):
|
2009-12-17 00:37:45 +01:00
|
|
|
return
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
try:
|
|
|
|
task = self.queue.pop()
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('%s is processing: %s', self.getName(), task)
|
2009-04-01 01:12:17 +02:00
|
|
|
task.run()
|
|
|
|
except IndexError, e:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('No more tasks for %s to carry out.', self.getName())
|
2009-04-01 01:12:17 +02:00
|
|
|
break
|
|
|
|
self.exit_callback(self)
|
|
|
|
|
|
|
|
|
|
|
|
class DownloadQueueManager(object):
|
2009-08-24 16:17:32 +02:00
|
|
|
def __init__(self, config):
|
2009-08-11 00:09:02 +02:00
|
|
|
self._config = config
|
2009-04-01 01:12:17 +02:00
|
|
|
self.tasks = collections.deque()
|
|
|
|
|
|
|
|
self.worker_threads_access = threading.RLock()
|
|
|
|
self.worker_threads = []
|
|
|
|
|
|
|
|
def __exit_callback(self, worker_thread):
|
|
|
|
with self.worker_threads_access:
|
|
|
|
self.worker_threads.remove(worker_thread)
|
2008-12-13 13:29:45 +01:00
|
|
|
|
2009-12-17 00:37:45 +01:00
|
|
|
def __continue_check_callback(self, worker_thread):
|
2009-04-01 01:12:17 +02:00
|
|
|
with self.worker_threads_access:
|
2009-08-11 00:09:02 +02:00
|
|
|
if len(self.worker_threads) > self._config.max_downloads and \
|
|
|
|
self._config.max_downloads_enabled:
|
2009-12-17 00:37:45 +01:00
|
|
|
self.worker_threads.remove(worker_thread)
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
2010-03-07 20:15:36 +01:00
|
|
|
def spawn_threads(self, force_start=False):
|
|
|
|
"""Spawn new worker threads if necessary
|
|
|
|
|
|
|
|
If force_start is True, forcefully spawn a thread and
|
|
|
|
let it process at least one episodes, even if a download
|
|
|
|
limit is in effect at the moment.
|
|
|
|
"""
|
2009-12-17 00:37:45 +01:00
|
|
|
with self.worker_threads_access:
|
|
|
|
if not len(self.tasks):
|
|
|
|
return
|
2009-04-01 01:12:17 +02:00
|
|
|
|
2010-03-07 20:15:36 +01:00
|
|
|
if force_start or len(self.worker_threads) == 0 or \
|
2009-08-11 00:09:02 +02:00
|
|
|
len(self.worker_threads) < self._config.max_downloads or \
|
2009-12-17 00:37:45 +01:00
|
|
|
not self._config.max_downloads_enabled:
|
2009-04-01 01:12:17 +02:00
|
|
|
# We have to create a new thread here, there's work to do
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Starting new worker thread.')
|
2010-03-07 20:15:36 +01:00
|
|
|
|
|
|
|
# The new worker should process at least one task (the one
|
|
|
|
# that we want to forcefully start) if force_start is True.
|
|
|
|
if force_start:
|
|
|
|
minimum_tasks = 1
|
|
|
|
else:
|
|
|
|
minimum_tasks = 0
|
|
|
|
|
2009-12-17 00:37:45 +01:00
|
|
|
worker = DownloadQueueWorker(self.tasks, self.__exit_callback, \
|
2010-03-07 20:15:36 +01:00
|
|
|
self.__continue_check_callback, minimum_tasks)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.worker_threads.append(worker)
|
|
|
|
worker.start()
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-08-17 21:46:17 +02:00
|
|
|
def are_queued_or_active_tasks(self):
|
|
|
|
with self.worker_threads_access:
|
|
|
|
return len(self.worker_threads) > 0
|
|
|
|
|
2010-03-07 20:15:36 +01:00
|
|
|
def add_task(self, task, force_start=False):
|
|
|
|
"""Add a new task to the download queue
|
|
|
|
|
|
|
|
If force_start is True, ignore the download limit
|
|
|
|
and forcefully start the download right away.
|
|
|
|
"""
|
2009-08-24 16:17:32 +02:00
|
|
|
if task.status != DownloadTask.INIT:
|
2010-03-07 20:15:36 +01:00
|
|
|
# Remove the task from its current position in the
|
|
|
|
# download queue (if any) to avoid race conditions
|
|
|
|
# where two worker threads download the same file
|
|
|
|
try:
|
|
|
|
self.tasks.remove(task)
|
|
|
|
except ValueError, e:
|
|
|
|
pass
|
2009-04-01 01:12:17 +02:00
|
|
|
task.status = DownloadTask.QUEUED
|
2010-03-07 20:15:36 +01:00
|
|
|
if force_start:
|
|
|
|
# Add the task to be taken on next pop
|
|
|
|
self.tasks.append(task)
|
|
|
|
else:
|
|
|
|
# Add the task to the end of the queue
|
|
|
|
self.tasks.appendleft(task)
|
|
|
|
self.spawn_threads(force_start)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2008-04-17 17:59:38 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
class DownloadTask(object):
|
|
|
|
"""An object representing the download task of an episode
|
|
|
|
|
|
|
|
You can create a new download task like this:
|
|
|
|
|
2009-08-11 00:09:02 +02:00
|
|
|
task = DownloadTask(episode, gpodder.config.Config(CONFIGFILE))
|
2009-04-01 01:12:17 +02:00
|
|
|
task.status = DownloadTask.QUEUED
|
|
|
|
task.run()
|
|
|
|
|
|
|
|
While the download is in progress, you can access its properties:
|
|
|
|
|
|
|
|
task.total_size # in bytes
|
|
|
|
task.progress # from 0.0 to 1.0
|
|
|
|
task.speed # in bytes per second
|
|
|
|
str(task) # name of the episode
|
|
|
|
task.status # current status
|
2009-05-12 11:03:59 +02:00
|
|
|
task.status_changed # True if the status has been changed (see below)
|
|
|
|
task.url # URL of the episode being downloaded
|
|
|
|
task.podcast_url # URL of the podcast this download belongs to
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
You can cancel a running download task by setting its status:
|
|
|
|
|
|
|
|
task.status = DownloadTask.CANCELLED
|
|
|
|
|
|
|
|
The task will then abort as soon as possible (due to the nature
|
|
|
|
of downloading data, this can take a while when the Internet is
|
|
|
|
busy).
|
|
|
|
|
2009-04-02 00:02:07 +02:00
|
|
|
The "status_changed" attribute gets set to True everytime the
|
|
|
|
"status" attribute changes its value. After you get the value of
|
|
|
|
the "status_changed" attribute, it is always reset to False:
|
|
|
|
|
|
|
|
if task.status_changed:
|
|
|
|
new_status = task.status
|
|
|
|
# .. update the UI accordingly ..
|
|
|
|
|
|
|
|
Obviously, this also means that you must have at most *one*
|
|
|
|
place in your UI code where you check for status changes and
|
|
|
|
broadcast the status updates from there.
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
While the download is taking place and after the .run() method
|
|
|
|
has finished, you can get the final status to check if the download
|
|
|
|
was successful:
|
|
|
|
|
|
|
|
if task.status == DownloadTask.DONE:
|
|
|
|
# .. everything ok ..
|
|
|
|
elif task.status == DownloadTask.FAILED:
|
|
|
|
# .. an error happened, and the
|
|
|
|
# error_message attribute is set ..
|
|
|
|
print task.error_message
|
|
|
|
elif task.status == DownloadTask.PAUSED:
|
|
|
|
# .. user paused the download ..
|
|
|
|
elif task.status == DownloadTask.CANCELLED:
|
|
|
|
# .. user cancelled the download ..
|
|
|
|
|
|
|
|
The difference between cancelling and pausing a DownloadTask is
|
|
|
|
that the temporary file gets deleted when cancelling, but does
|
|
|
|
not get deleted when pausing.
|
|
|
|
|
|
|
|
Be sure to call .removed_from_list() on this task when removing
|
|
|
|
it from the UI, so that it can carry out any pending clean-up
|
|
|
|
actions (e.g. removing the temporary file when the task has not
|
|
|
|
finished successfully; i.e. task.status != DownloadTask.DONE).
|
2010-12-18 14:50:43 +01:00
|
|
|
|
|
|
|
The UI can call the method "notify_as_finished()" to determine if
|
|
|
|
this episode still has still to be shown as "finished" download
|
|
|
|
in a notification window. This will return True only the first time
|
|
|
|
it is called when the status is DONE. After returning True once,
|
|
|
|
it will always return False afterwards.
|
|
|
|
|
|
|
|
The same thing works for failed downloads ("notify_as_failed()").
|
2009-04-01 01:12:17 +02:00
|
|
|
"""
|
|
|
|
# Possible states this download task can be in
|
|
|
|
STATUS_MESSAGE = (_('Added'), _('Queued'), _('Downloading'),
|
|
|
|
_('Finished'), _('Failed'), _('Cancelled'), _('Paused'))
|
|
|
|
(INIT, QUEUED, DOWNLOADING, DONE, FAILED, CANCELLED, PAUSED) = range(7)
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return self.__episode.title
|
|
|
|
|
|
|
|
def __get_status(self):
|
|
|
|
return self.__status
|
|
|
|
|
|
|
|
def __set_status(self, status):
|
2009-04-02 00:02:07 +02:00
|
|
|
if status != self.__status:
|
|
|
|
self.__status_changed = True
|
|
|
|
self.__status = status
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
status = property(fget=__get_status, fset=__set_status)
|
|
|
|
|
2009-04-02 00:02:07 +02:00
|
|
|
def __get_status_changed(self):
|
|
|
|
if self.__status_changed:
|
|
|
|
self.__status_changed = False
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
status_changed = property(fget=__get_status_changed)
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def __get_url(self):
|
|
|
|
return self.__episode.url
|
|
|
|
|
|
|
|
url = property(fget=__get_url)
|
|
|
|
|
2009-05-12 11:03:59 +02:00
|
|
|
def __get_podcast_url(self):
|
|
|
|
return self.__episode.channel.url
|
|
|
|
|
|
|
|
podcast_url = property(fget=__get_podcast_url)
|
|
|
|
|
2009-04-19 08:09:36 +02:00
|
|
|
def __get_episode(self):
|
|
|
|
return self.__episode
|
|
|
|
|
|
|
|
episode = property(fget=__get_episode)
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def removed_from_list(self):
|
|
|
|
if self.status != self.DONE:
|
|
|
|
util.delete_file(self.tempname)
|
|
|
|
|
2009-08-11 00:09:02 +02:00
|
|
|
def __init__(self, episode, config):
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__status = DownloadTask.INIT
|
2009-04-02 00:02:07 +02:00
|
|
|
self.__status_changed = True
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__episode = episode
|
2009-08-11 00:09:02 +02:00
|
|
|
self._config = config
|
2009-04-01 01:12:17 +02:00
|
|
|
|
2009-09-22 18:53:14 +02:00
|
|
|
# Set names for the downloads list
|
2011-02-25 21:05:26 +01:00
|
|
|
self.markup_name = cgi.escape(self.__episode.title)
|
|
|
|
self.markup_podcast_name = cgi.escape(self.__episode.channel.title)
|
2009-09-22 18:53:14 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# Create the target filename and save it in the database
|
|
|
|
self.filename = self.__episode.local_filename(create=True)
|
|
|
|
self.tempname = self.filename + '.partial'
|
|
|
|
|
2010-12-20 14:35:46 +01:00
|
|
|
self.total_size = self.__episode.file_size
|
2009-04-01 01:12:17 +02:00
|
|
|
self.speed = 0.0
|
2007-09-18 20:25:25 +02:00
|
|
|
self.progress = 0.0
|
2009-04-01 01:12:17 +02:00
|
|
|
self.error_message = None
|
|
|
|
|
2010-12-18 14:50:43 +01:00
|
|
|
# Have we already shown this task in a notification?
|
|
|
|
self._notification_shown = False
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# Variables for speed limit and speed calculation
|
|
|
|
self.__start_time = 0
|
|
|
|
self.__start_blocks = 0
|
2009-08-11 00:09:02 +02:00
|
|
|
self.__limit_rate_value = self._config.limit_rate_value
|
|
|
|
self.__limit_rate = self._config.limit_rate
|
2009-02-18 13:32:39 +01:00
|
|
|
|
2010-10-10 22:44:22 +02:00
|
|
|
# Callbacks
|
|
|
|
self._progress_updated = lambda x: None
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# If the tempname already exists, set progress accordingly
|
|
|
|
if os.path.exists(self.tempname):
|
2009-02-18 13:32:39 +01:00
|
|
|
try:
|
2009-04-01 01:12:17 +02:00
|
|
|
already_downloaded = os.path.getsize(self.tempname)
|
|
|
|
if self.total_size > 0:
|
|
|
|
self.progress = max(0.0, min(1.0, float(already_downloaded)/self.total_size))
|
|
|
|
except OSError, os_error:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('Cannot get size for %s', os_error)
|
2007-09-18 20:25:25 +02:00
|
|
|
else:
|
2009-04-01 01:12:17 +02:00
|
|
|
# "touch self.tempname", so we also get partial
|
|
|
|
# files for resuming when the file is queued
|
|
|
|
open(self.tempname, 'w').close()
|
|
|
|
|
2010-12-18 14:50:43 +01:00
|
|
|
def notify_as_finished(self):
|
|
|
|
if self.status == DownloadTask.DONE:
|
|
|
|
if self._notification_shown:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
self._notification_shown = True
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
def notify_as_failed(self):
|
|
|
|
if self.status == DownloadTask.FAILED:
|
|
|
|
if self._notification_shown:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
self._notification_shown = True
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
2010-10-10 22:44:22 +02:00
|
|
|
def add_progress_callback(self, callback):
|
|
|
|
self._progress_updated = callback
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def status_updated(self, count, blockSize, totalSize):
|
|
|
|
# We see a different "total size" while downloading,
|
|
|
|
# so correct the total size variable in the thread
|
|
|
|
if totalSize != self.total_size and totalSize > 0:
|
|
|
|
self.total_size = float(totalSize)
|
|
|
|
|
|
|
|
if self.total_size > 0:
|
|
|
|
self.progress = max(0.0, min(1.0, float(count*blockSize)/self.total_size))
|
2010-10-10 22:44:22 +02:00
|
|
|
self._progress_updated(self.progress)
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
self.calculate_speed(count, blockSize)
|
|
|
|
|
|
|
|
if self.status == DownloadTask.CANCELLED:
|
2007-09-18 20:25:25 +02:00
|
|
|
raise DownloadCancelledException()
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
if self.status == DownloadTask.PAUSED:
|
|
|
|
raise DownloadCancelledException()
|
|
|
|
|
|
|
|
def calculate_speed(self, count, blockSize):
|
2007-09-18 20:25:25 +02:00
|
|
|
if count % 5 == 0:
|
|
|
|
now = time.time()
|
2009-04-01 01:12:17 +02:00
|
|
|
if self.__start_time > 0:
|
2008-04-17 17:45:29 +02:00
|
|
|
# Has rate limiting been enabled or disabled?
|
2009-08-11 00:09:02 +02:00
|
|
|
if self.__limit_rate != self._config.limit_rate:
|
2008-04-17 17:45:29 +02:00
|
|
|
# If it has been enabled then reset base time and block count
|
2009-08-11 00:09:02 +02:00
|
|
|
if self._config.limit_rate:
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__start_time = now
|
|
|
|
self.__start_blocks = count
|
2009-08-11 00:09:02 +02:00
|
|
|
self.__limit_rate = self._config.limit_rate
|
2008-04-17 17:45:29 +02:00
|
|
|
|
|
|
|
# Has the rate been changed and are we currently limiting?
|
2009-08-11 00:09:02 +02:00
|
|
|
if self.__limit_rate_value != self._config.limit_rate_value and self.__limit_rate:
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__start_time = now
|
|
|
|
self.__start_blocks = count
|
2009-08-11 00:09:02 +02:00
|
|
|
self.__limit_rate_value = self._config.limit_rate_value
|
2008-04-17 17:45:29 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
passed = now - self.__start_time
|
2007-09-25 22:06:48 +02:00
|
|
|
if passed > 0:
|
2009-04-01 01:12:17 +02:00
|
|
|
speed = ((count-self.__start_blocks)*blockSize)/passed
|
2007-09-25 22:06:48 +02:00
|
|
|
else:
|
|
|
|
speed = 0
|
2007-09-18 20:25:25 +02:00
|
|
|
else:
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__start_time = now
|
|
|
|
self.__start_blocks = count
|
|
|
|
passed = now - self.__start_time
|
2007-09-18 20:25:25 +02:00
|
|
|
speed = count*blockSize
|
2008-04-17 17:45:29 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
self.speed = float(speed)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-08-11 00:09:02 +02:00
|
|
|
if self._config.limit_rate and speed > self._config.limit_rate_value:
|
2007-09-18 20:25:25 +02:00
|
|
|
# calculate the time that should have passed to reach
|
|
|
|
# the desired download rate and wait if necessary
|
2009-08-11 00:09:02 +02:00
|
|
|
should_have_passed = float((count-self.__start_blocks)*blockSize)/(self._config.limit_rate_value*1024.0)
|
2007-09-18 20:25:25 +02:00
|
|
|
if should_have_passed > passed:
|
|
|
|
# sleep a maximum of 10 seconds to not cause time-outs
|
2009-04-01 01:12:17 +02:00
|
|
|
delay = min(10.0, float(should_have_passed-passed))
|
|
|
|
time.sleep(delay)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def run(self):
|
|
|
|
# Speed calculation (re-)starts here
|
|
|
|
self.__start_time = 0
|
|
|
|
self.__start_blocks = 0
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# If the download has already been cancelled, skip it
|
|
|
|
if self.status == DownloadTask.CANCELLED:
|
|
|
|
util.delete_file(self.tempname)
|
2009-09-02 15:57:09 +02:00
|
|
|
self.progress = 0.0
|
|
|
|
self.speed = 0.0
|
2009-04-01 01:12:17 +02:00
|
|
|
return False
|
2009-02-09 23:26:47 +01:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# We only start this download if its status is "queued"
|
|
|
|
if self.status != DownloadTask.QUEUED:
|
|
|
|
return False
|
2009-02-09 23:26:47 +01:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# We are downloading this file right now
|
|
|
|
self.status = DownloadTask.DOWNLOADING
|
2010-12-18 14:50:43 +01:00
|
|
|
self._notification_shown = False
|
2007-09-18 20:25:25 +02:00
|
|
|
|
|
|
|
try:
|
2009-04-01 01:12:17 +02:00
|
|
|
# Resolve URL and start downloading the episode
|
2009-12-17 13:08:55 +01:00
|
|
|
url = youtube.get_real_download_url(self.__episode.url, \
|
|
|
|
self._config.youtube_preferred_fmt_id)
|
2009-04-01 01:12:17 +02:00
|
|
|
downloader = DownloadURLOpener(self.__episode.channel)
|
2009-09-06 16:38:40 +02:00
|
|
|
headers, real_url = downloader.retrieve_resume(url, \
|
2009-04-01 01:12:17 +02:00
|
|
|
self.tempname, reporthook=self.status_updated)
|
|
|
|
|
2010-12-20 14:35:46 +01:00
|
|
|
new_mimetype = headers.get('content-type', self.__episode.mime_type)
|
|
|
|
old_mimetype = self.__episode.mime_type
|
2010-04-09 02:28:11 +02:00
|
|
|
_basename, ext = os.path.splitext(self.filename)
|
|
|
|
if new_mimetype != old_mimetype or util.wrong_extension(ext):
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype)
|
2009-04-01 01:12:17 +02:00
|
|
|
old_extension = self.__episode.extension()
|
2010-12-20 14:35:46 +01:00
|
|
|
self.__episode.mime_type = new_mimetype
|
2009-04-01 01:12:17 +02:00
|
|
|
new_extension = self.__episode.extension()
|
|
|
|
|
2010-04-09 02:28:11 +02:00
|
|
|
# If the desired filename extension changed due to the new
|
|
|
|
# mimetype, we force an update of the local filename to fix the
|
|
|
|
# extension.
|
|
|
|
if old_extension != new_extension or util.wrong_extension(ext):
|
2009-04-01 01:12:17 +02:00
|
|
|
self.filename = self.__episode.local_filename(create=True, force_update=True)
|
|
|
|
|
2009-09-06 16:38:40 +02:00
|
|
|
# TODO: Check if "real_url" is different from "url" and if it is,
|
|
|
|
# see if we can get a better episode filename out of it
|
|
|
|
|
|
|
|
# Look at the Content-disposition header; use if if available
|
|
|
|
disposition_filename = get_header_param(headers, \
|
|
|
|
'filename', 'content-disposition')
|
|
|
|
|
|
|
|
if disposition_filename is not None:
|
|
|
|
# The server specifies a download filename - try to use it
|
|
|
|
disposition_filename = os.path.basename(disposition_filename)
|
|
|
|
self.filename = self.__episode.local_filename(create=True, \
|
|
|
|
force_update=True, template=disposition_filename)
|
|
|
|
new_mimetype, encoding = mimetypes.guess_type(self.filename)
|
|
|
|
if new_mimetype is not None:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Using content-disposition mimetype: %s',
|
|
|
|
new_mimetype)
|
2009-09-06 16:38:40 +02:00
|
|
|
self.__episode.set_mimetype(new_mimetype, commit=True)
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
shutil.move(self.tempname, self.filename)
|
|
|
|
|
2010-04-26 21:41:50 +02:00
|
|
|
# Model- and database-related updates after a download has finished
|
|
|
|
self.__episode.on_downloaded(self.filename)
|
2007-09-18 20:25:25 +02:00
|
|
|
except DownloadCancelledException:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Download has been cancelled/paused: %s', self)
|
2009-04-01 01:12:17 +02:00
|
|
|
if self.status == DownloadTask.CANCELLED:
|
2009-02-09 23:26:47 +01:00
|
|
|
util.delete_file(self.tempname)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.progress = 0.0
|
|
|
|
self.speed = 0.0
|
2009-06-10 01:57:35 +02:00
|
|
|
except urllib.ContentTooShortError, ctse:
|
|
|
|
self.status = DownloadTask.FAILED
|
|
|
|
self.error_message = _('Missing content from server')
|
2007-11-05 13:55:36 +01:00
|
|
|
except IOError, ioe:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('%s while downloading "%s": %s', ioe.strerror,
|
|
|
|
self.__episode.title, ioe.filename, exc_info=True)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.status = DownloadTask.FAILED
|
2010-01-18 21:20:22 +01:00
|
|
|
d = {'error': ioe.strerror, 'filename': ioe.filename}
|
|
|
|
self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
|
2008-08-10 14:38:20 +02:00
|
|
|
except gPodderDownloadHTTPError, gdhe:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('HTTP %s while downloading "%s": %s',
|
|
|
|
gdhe.error_code, self.__episode.title, gdhe.error_message,
|
|
|
|
exc_info=True)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.status = DownloadTask.FAILED
|
2010-01-18 21:20:22 +01:00
|
|
|
d = {'code': gdhe.error_code, 'message': gdhe.error_message}
|
|
|
|
self.error_message = _('HTTP Error %(code)s: %(message)s') % d
|
2009-04-01 01:12:17 +02:00
|
|
|
except Exception, e:
|
|
|
|
self.status = DownloadTask.FAILED
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('Download failed: %s', str(e), exc_info=True)
|
2010-12-18 14:50:43 +01:00
|
|
|
self.error_message = _('Error: %s') % (str(e),)
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
if self.status == DownloadTask.DOWNLOADING:
|
|
|
|
# Everything went well - we're done
|
|
|
|
self.status = DownloadTask.DONE
|
2009-09-01 14:37:11 +02:00
|
|
|
if self.total_size <= 0:
|
|
|
|
self.total_size = util.calculate_size(self.filename)
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Total size updated to %d', self.total_size)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.progress = 1.0
|
2010-10-11 10:46:57 +02:00
|
|
|
if gpodder.user_hooks is not None:
|
|
|
|
gpodder.user_hooks.on_episode_downloaded(self.__episode)
|
2009-04-01 01:12:17 +02:00
|
|
|
return True
|
|
|
|
|
|
|
|
self.speed = 0.0
|
|
|
|
|
|
|
|
# We finished, but not successfully (at least not really)
|
|
|
|
return False
|
2007-09-18 20:25:25 +02:00
|
|
|
|