2007-09-18 20:25:25 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
#
|
|
|
|
# gPodder - A media aggregator and podcast client
|
2018-01-28 19:39:53 +01:00
|
|
|
# Copyright (c) 2005-2018 The gPodder Team
|
2007-09-18 20:25:25 +02:00
|
|
|
#
|
|
|
|
# gPodder is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# gPodder is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
#
|
2009-08-24 16:17:32 +02:00
|
|
|
# download.py -- Download queue management
|
2007-09-18 20:25:25 +02:00
|
|
|
# Thomas Perl <thp@perli.net> 2007-09-15
|
|
|
|
#
|
|
|
|
# Based on libwget.py (2005-10-29)
|
|
|
|
#
|
|
|
|
|
2018-07-24 11:08:10 +02:00
|
|
|
import collections
|
|
|
|
import email
|
2011-07-15 16:32:06 +02:00
|
|
|
import logging
|
2018-07-24 11:08:10 +02:00
|
|
|
import mimetypes
|
|
|
|
import os
|
|
|
|
import os.path
|
|
|
|
import shutil
|
2012-02-05 18:20:59 +01:00
|
|
|
import socket
|
2007-09-18 20:25:25 +02:00
|
|
|
import threading
|
|
|
|
import time
|
2018-05-05 23:50:37 +02:00
|
|
|
import urllib.error
|
|
|
|
import urllib.parse
|
|
|
|
import urllib.request
|
2012-01-19 11:26:44 +01:00
|
|
|
from email.header import decode_header
|
2009-09-06 16:38:40 +02:00
|
|
|
|
2018-07-24 11:08:10 +02:00
|
|
|
import gpodder
|
2019-08-17 16:25:00 +02:00
|
|
|
from gpodder import registry, util
|
2018-07-24 11:08:10 +02:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2009-05-07 16:26:07 +02:00
|
|
|
_ = gpodder.gettext
|
2009-05-05 10:22:12 +02:00
|
|
|
|
2018-02-11 00:22:00 +01:00
|
|
|
|
2019-08-17 16:25:00 +02:00
|
|
|
class CustomDownload:
|
|
|
|
""" abstract class for custom downloads. DownloadTask call retrieve_resume() on it """
|
|
|
|
|
|
|
|
def retrieve_resume(self, unused_tempname, reporthook):
|
|
|
|
"""
|
|
|
|
:param str tempname: temporary filename for the download
|
|
|
|
:param func(number, number, number) reporthook: callback for download progress (count, blockSize, totalSize)
|
|
|
|
:return dict(str, str), str: (headers, real_url)
|
|
|
|
"""
|
|
|
|
return {}, None
|
|
|
|
|
|
|
|
|
|
|
|
class CustomDownloader:
|
|
|
|
"""
|
|
|
|
abstract class for custom downloaders.
|
|
|
|
|
|
|
|
DownloadTask calls custom_downloader to get a CustomDownload
|
|
|
|
"""
|
|
|
|
|
|
|
|
def custom_downloader(self, config, episode):
|
|
|
|
"""
|
|
|
|
if this custom downloader has a custom download method (e.g. youtube-dl),
|
|
|
|
return a CustomDownload. Else return None
|
|
|
|
:param config: gpodder config (e.g. to get preferred video format)
|
|
|
|
:param model.PodcastEpisode episode: episode to download
|
|
|
|
:return CustomDownload: object used to download the episode
|
|
|
|
"""
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2009-09-06 16:38:40 +02:00
|
|
|
def get_header_param(headers, param, header_name):
|
|
|
|
"""Extract a HTTP header parameter from a dict
|
|
|
|
|
|
|
|
Uses the "email" module to retrieve parameters
|
|
|
|
from HTTP headers. This can be used to get the
|
|
|
|
"filename" parameter of the "content-disposition"
|
|
|
|
header for downloads to pick a good filename.
|
|
|
|
|
|
|
|
Returns None if the filename cannot be retrieved.
|
|
|
|
"""
|
2012-09-14 21:36:46 +02:00
|
|
|
value = None
|
2009-09-06 16:38:40 +02:00
|
|
|
try:
|
2018-03-18 01:00:02 +01:00
|
|
|
headers_string = ['%s:%s' % (k, v) for k, v in list(headers.items())]
|
2009-09-06 16:38:40 +02:00
|
|
|
msg = email.message_from_string('\n'.join(headers_string))
|
|
|
|
if header_name in msg:
|
2012-09-14 21:36:46 +02:00
|
|
|
raw_value = msg.get_param(param, header=header_name)
|
2012-12-27 16:26:50 +01:00
|
|
|
if raw_value is not None:
|
|
|
|
value = email.utils.collapse_rfc2231_value(raw_value)
|
2016-11-21 23:13:46 +01:00
|
|
|
except Exception as e:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('Cannot get %s from %s', param, header_name, exc_info=True)
|
2009-09-06 16:38:40 +02:00
|
|
|
|
2012-09-14 21:36:46 +02:00
|
|
|
return value
|
2009-09-06 16:38:40 +02:00
|
|
|
|
2018-02-11 00:22:00 +01:00
|
|
|
|
2009-05-05 10:22:12 +02:00
|
|
|
class ContentRange(object):
|
|
|
|
# Based on:
|
|
|
|
# http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
|
|
|
|
#
|
|
|
|
# Copyright (c) 2007 Ian Bicking and Contributors
|
|
|
|
#
|
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
# a copy of this software and associated documentation files (the
|
|
|
|
# "Software"), to deal in the Software without restriction, including
|
|
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
# the following conditions:
|
|
|
|
#
|
|
|
|
# The above copyright notice and this permission notice shall be
|
|
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
#
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
|
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
|
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
|
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
"""
|
|
|
|
Represents the Content-Range header
|
|
|
|
|
|
|
|
This header is ``start-stop/length``, where stop and length can be
|
|
|
|
``*`` (represented as None in the attributes).
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, start, stop, length):
|
|
|
|
assert start >= 0, "Bad start: %r" % start
|
|
|
|
assert stop is None or (stop >= 0 and stop >= start), (
|
|
|
|
"Bad stop: %r" % stop)
|
|
|
|
self.start = start
|
|
|
|
self.stop = stop
|
|
|
|
self.length = length
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return '<%s %s>' % (
|
|
|
|
self.__class__.__name__,
|
|
|
|
self)
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
if self.stop is None:
|
|
|
|
stop = '*'
|
|
|
|
else:
|
|
|
|
stop = self.stop + 1
|
|
|
|
if self.length is None:
|
|
|
|
length = '*'
|
|
|
|
else:
|
|
|
|
length = self.length
|
|
|
|
return 'bytes %s-%s/%s' % (self.start, stop, length)
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
"""
|
|
|
|
Mostly so you can unpack this, like:
|
|
|
|
|
|
|
|
start, stop, length = res.content_range
|
|
|
|
"""
|
|
|
|
return iter([self.start, self.stop, self.length])
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def parse(cls, value):
|
|
|
|
"""
|
|
|
|
Parse the header. May return None if it cannot parse.
|
|
|
|
"""
|
|
|
|
if value is None:
|
|
|
|
return None
|
|
|
|
value = value.strip()
|
|
|
|
if not value.startswith('bytes '):
|
|
|
|
# Unparseable
|
|
|
|
return None
|
|
|
|
value = value[len('bytes '):].strip()
|
|
|
|
if '/' not in value:
|
|
|
|
# Invalid, no length given
|
|
|
|
return None
|
|
|
|
range, length = value.split('/', 1)
|
|
|
|
if '-' not in range:
|
|
|
|
# Invalid, no range
|
|
|
|
return None
|
|
|
|
start, end = range.split('-', 1)
|
|
|
|
try:
|
|
|
|
start = int(start)
|
|
|
|
if end == '*':
|
|
|
|
end = None
|
|
|
|
else:
|
|
|
|
end = int(end)
|
|
|
|
if length == '*':
|
|
|
|
length = None
|
|
|
|
else:
|
|
|
|
length = int(length)
|
|
|
|
except ValueError:
|
|
|
|
# Parse problem
|
|
|
|
return None
|
|
|
|
if end is None:
|
|
|
|
return cls(start, None, length)
|
|
|
|
else:
|
2018-03-27 21:40:36 +02:00
|
|
|
return cls(start, end - 1, length)
|
2009-05-05 10:22:12 +02:00
|
|
|
|
|
|
|
|
2007-09-18 20:25:25 +02:00
|
|
|
class DownloadCancelledException(Exception): pass
|
2018-02-11 00:22:00 +01:00
|
|
|
|
|
|
|
|
2009-08-24 13:04:11 +02:00
|
|
|
class AuthenticationError(Exception): pass
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2018-02-11 00:22:00 +01:00
|
|
|
|
2008-08-10 14:38:20 +02:00
|
|
|
class gPodderDownloadHTTPError(Exception):
|
|
|
|
def __init__(self, url, error_code, error_message):
|
|
|
|
self.url = url
|
|
|
|
self.error_code = error_code
|
|
|
|
self.error_message = error_message
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2018-02-11 00:22:00 +01:00
|
|
|
|
2016-11-21 23:13:46 +01:00
|
|
|
class DownloadURLOpener(urllib.request.FancyURLopener):
|
2007-09-18 20:25:25 +02:00
|
|
|
version = gpodder.user_agent
|
|
|
|
|
2009-09-09 18:53:45 +02:00
|
|
|
# Sometimes URLs are not escaped correctly - try to fix them
|
|
|
|
# (see RFC2396; Section 2.4.3. Excluded US-ASCII Characters)
|
|
|
|
# FYI: The omission of "%" in the list is to avoid double escaping!
|
2018-03-18 01:00:02 +01:00
|
|
|
ESCAPE_CHARS = dict((ord(c), '%%%x' % ord(c)) for c in ' <>#"{}|\\^[]`')
|
2009-09-09 18:53:45 +02:00
|
|
|
|
2018-05-27 20:09:40 +02:00
|
|
|
def __init__(self, channel):
|
2007-09-18 20:25:25 +02:00
|
|
|
self.channel = channel
|
2009-08-24 13:04:11 +02:00
|
|
|
self._auth_retry_counter = 0
|
2018-12-15 16:18:47 +01:00
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
def http_error(self, url, fp, errcode, errmsg, headers, data=None):
|
|
|
|
"""Handle http errors.
|
|
|
|
Overriden to give retry=True to http_error_40{1,7}.
|
|
|
|
See https://github.com/python/cpython/commit/80f1b059714aeb1c6fc9f6ce1173bc8a51af7dd9
|
|
|
|
See python issue https://bugs.python.org/issue1368368
|
|
|
|
"""
|
|
|
|
result = False
|
|
|
|
if errcode == 401:
|
|
|
|
result = self.http_error_401(url, fp, errcode, errmsg, headers, data=data, retry=True)
|
|
|
|
elif errcode == 407:
|
|
|
|
result = self.http_error_407(url, fp, errcode, errmsg, headers, data=data, retry=True)
|
|
|
|
if result:
|
|
|
|
return result
|
|
|
|
return super().http_error(url, fp, errcode, errmsg, headers, data=data)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2008-08-10 14:38:20 +02:00
|
|
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
|
|
|
"""
|
|
|
|
FancyURLopener by default does not raise an exception when
|
|
|
|
there is some unknown HTTP error code. We want to override
|
|
|
|
this and provide a function to log the error and raise an
|
|
|
|
exception, so we don't download the HTTP error page here.
|
|
|
|
"""
|
|
|
|
# The following two lines are copied from urllib.URLopener's
|
|
|
|
# implementation of http_error_default
|
|
|
|
void = fp.read()
|
|
|
|
fp.close()
|
|
|
|
raise gPodderDownloadHTTPError(url, errcode, errmsg)
|
2018-01-30 14:04:28 +01:00
|
|
|
|
2009-06-03 19:32:15 +02:00
|
|
|
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
|
|
|
|
""" This is the exact same function that's included with urllib
|
|
|
|
except with "void = fp.read()" commented out. """
|
2018-01-30 14:04:28 +01:00
|
|
|
|
2009-06-03 19:32:15 +02:00
|
|
|
if 'location' in headers:
|
|
|
|
newurl = headers['location']
|
|
|
|
elif 'uri' in headers:
|
|
|
|
newurl = headers['uri']
|
|
|
|
else:
|
|
|
|
return
|
2018-01-30 14:04:28 +01:00
|
|
|
|
2009-06-03 19:32:15 +02:00
|
|
|
# This blocks forever(?) with certain servers (see bug #465)
|
2018-05-17 08:39:56 +02:00
|
|
|
# void = fp.read()
|
2009-06-03 19:32:15 +02:00
|
|
|
fp.close()
|
2018-01-30 14:04:28 +01:00
|
|
|
|
2009-06-03 19:32:15 +02:00
|
|
|
# In case the server sent a relative URL, join with original:
|
2016-11-21 23:13:46 +01:00
|
|
|
newurl = urllib.parse.urljoin(self.type + ":" + url, newurl)
|
2009-06-03 19:32:15 +02:00
|
|
|
return self.open(newurl)
|
2018-01-30 14:04:28 +01:00
|
|
|
|
2009-02-09 23:26:47 +01:00
|
|
|
# The following is based on Python's urllib.py "URLopener.retrieve"
|
|
|
|
# Also based on http://mail.python.org/pipermail/python-list/2001-October/110069.html
|
|
|
|
|
|
|
|
def http_error_206(self, url, fp, errcode, errmsg, headers, data=None):
|
|
|
|
# The next line is taken from urllib's URLopener.open_http
|
|
|
|
# method, at the end after the line "if errcode == 200:"
|
|
|
|
return urllib.addinfourl(fp, headers, 'http:' + url)
|
|
|
|
|
|
|
|
def retrieve_resume(self, url, filename, reporthook=None, data=None):
|
2009-09-06 16:38:40 +02:00
|
|
|
"""Download files from an URL; return (headers, real_url)
|
2009-02-09 23:26:47 +01:00
|
|
|
|
2009-09-06 16:38:40 +02:00
|
|
|
Resumes a download if the local filename exists and
|
|
|
|
the server supports download resuming.
|
|
|
|
"""
|
2009-02-09 23:26:47 +01:00
|
|
|
|
|
|
|
current_size = 0
|
|
|
|
tfp = None
|
|
|
|
if os.path.exists(filename):
|
|
|
|
try:
|
|
|
|
current_size = os.path.getsize(filename)
|
|
|
|
tfp = open(filename, 'ab')
|
2018-05-17 08:39:56 +02:00
|
|
|
# If the file exists, then only download the remainder
|
2009-06-10 01:57:35 +02:00
|
|
|
if current_size > 0:
|
|
|
|
self.addheader('Range', 'bytes=%s-' % (current_size))
|
2009-02-09 23:26:47 +01:00
|
|
|
except:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.warn('Cannot resume download: %s', filename, exc_info=True)
|
2009-02-09 23:26:47 +01:00
|
|
|
tfp = None
|
|
|
|
current_size = 0
|
|
|
|
|
|
|
|
if tfp is None:
|
|
|
|
tfp = open(filename, 'wb')
|
|
|
|
|
2009-09-09 18:53:45 +02:00
|
|
|
# Fix a problem with bad URLs that are not encoded correctly (bug 549)
|
|
|
|
url = url.translate(self.ESCAPE_CHARS)
|
|
|
|
|
2009-02-09 23:26:47 +01:00
|
|
|
fp = self.open(url, data)
|
|
|
|
headers = fp.info()
|
|
|
|
|
2009-05-05 10:22:12 +02:00
|
|
|
if current_size > 0:
|
|
|
|
# We told the server to resume - see if she agrees
|
|
|
|
# See RFC2616 (206 Partial Content + Section 14.16)
|
|
|
|
# XXX check status code here, too...
|
|
|
|
range = ContentRange.parse(headers.get('content-range', ''))
|
|
|
|
if range is None or range.start != current_size:
|
|
|
|
# Ok, that did not work. Reset the download
|
|
|
|
# TODO: seek and truncate if content-range differs from request
|
|
|
|
tfp.close()
|
|
|
|
tfp = open(filename, 'wb')
|
|
|
|
current_size = 0
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.warn('Cannot resume: Invalid Content-Range (RFC2616).')
|
2009-05-05 10:22:12 +02:00
|
|
|
|
2009-09-06 16:38:40 +02:00
|
|
|
result = headers, fp.geturl()
|
2018-03-27 21:40:36 +02:00
|
|
|
bs = 1024 * 8
|
2009-02-09 23:26:47 +01:00
|
|
|
size = -1
|
|
|
|
read = current_size
|
2018-03-27 21:40:36 +02:00
|
|
|
blocknum = current_size // bs
|
2009-02-09 23:26:47 +01:00
|
|
|
if reporthook:
|
|
|
|
if "content-length" in headers:
|
2018-03-18 00:47:54 +01:00
|
|
|
size = int(headers['Content-Length']) + current_size
|
2009-02-09 23:26:47 +01:00
|
|
|
reporthook(blocknum, bs, size)
|
2009-09-01 14:37:11 +02:00
|
|
|
while read < size or size == -1:
|
2009-06-10 01:57:35 +02:00
|
|
|
if size == -1:
|
|
|
|
block = fp.read(bs)
|
|
|
|
else:
|
2018-03-27 21:40:36 +02:00
|
|
|
block = fp.read(min(size - read, bs))
|
2018-12-11 09:59:14 +01:00
|
|
|
if len(block) == 0:
|
2009-02-09 23:26:47 +01:00
|
|
|
break
|
|
|
|
read += len(block)
|
|
|
|
tfp.write(block)
|
|
|
|
blocknum += 1
|
|
|
|
if reporthook:
|
|
|
|
reporthook(blocknum, bs, size)
|
|
|
|
fp.close()
|
|
|
|
tfp.close()
|
|
|
|
del fp
|
|
|
|
del tfp
|
|
|
|
|
|
|
|
# raise exception if actual size does not match content-length header
|
|
|
|
if size >= 0 and read < size:
|
2016-11-21 23:13:46 +01:00
|
|
|
raise urllib.error.ContentTooShortError("retrieval incomplete: got only %i out "
|
2009-02-09 23:26:47 +01:00
|
|
|
"of %i bytes" % (read, size), result)
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
# end code based on urllib.py
|
|
|
|
|
2018-05-27 20:09:40 +02:00
|
|
|
def prompt_user_passwd(self, host, realm):
|
2009-08-24 13:04:11 +02:00
|
|
|
# Keep track of authentication attempts, fail after the third one
|
|
|
|
self._auth_retry_counter += 1
|
|
|
|
if self._auth_retry_counter > 3:
|
|
|
|
raise AuthenticationError(_('Wrong username/password'))
|
|
|
|
|
2010-12-20 14:35:46 +01:00
|
|
|
if self.channel.auth_username or self.channel.auth_password:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.debug('Authenticating as "%s" to "%s" for realm "%s".',
|
|
|
|
self.channel.auth_username, host, realm)
|
2018-05-27 20:09:40 +02:00
|
|
|
return (self.channel.auth_username, self.channel.auth_password)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-08-24 13:04:11 +02:00
|
|
|
return (None, None)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
|
|
|
|
2012-07-10 13:52:34 +02:00
|
|
|
class DownloadQueueWorker(object):
|
2016-11-15 04:54:21 +01:00
|
|
|
def __init__(self, queue, exit_callback, continue_check_callback):
|
2009-04-01 01:12:17 +02:00
|
|
|
self.queue = queue
|
|
|
|
self.exit_callback = exit_callback
|
2009-12-17 00:37:45 +01:00
|
|
|
self.continue_check_callback = continue_check_callback
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2012-07-10 13:52:34 +02:00
|
|
|
def __repr__(self):
|
|
|
|
return threading.current_thread().getName()
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def run(self):
|
2012-07-10 13:52:34 +02:00
|
|
|
logger.info('Starting new thread: %s', self)
|
2009-12-17 00:37:45 +01:00
|
|
|
while True:
|
2016-11-15 04:54:21 +01:00
|
|
|
if not self.continue_check_callback(self):
|
2009-12-17 00:37:45 +01:00
|
|
|
return
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
try:
|
2016-11-15 04:54:21 +01:00
|
|
|
task = self.queue.get_next()
|
2012-07-10 13:52:34 +02:00
|
|
|
logger.info('%s is processing: %s', self, task)
|
2009-04-01 01:12:17 +02:00
|
|
|
task.run()
|
2011-07-16 18:04:07 +02:00
|
|
|
task.recycle()
|
2016-11-21 23:13:46 +01:00
|
|
|
except StopIteration as e:
|
2012-07-10 13:52:34 +02:00
|
|
|
logger.info('No more tasks for %s to carry out.', self)
|
2009-04-01 01:12:17 +02:00
|
|
|
break
|
|
|
|
self.exit_callback(self)
|
|
|
|
|
|
|
|
|
2016-11-15 04:54:21 +01:00
|
|
|
class ForceDownloadWorker(object):
|
|
|
|
def __init__(self, task):
|
|
|
|
self.task = task
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return threading.current_thread().getName()
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
logger.info('Starting new thread: %s', self)
|
|
|
|
logger.info('%s is processing: %s', self, self.task)
|
|
|
|
self.task.run()
|
|
|
|
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
class DownloadQueueManager(object):
|
2016-11-15 04:54:21 +01:00
|
|
|
def __init__(self, config, queue):
|
2009-08-11 00:09:02 +02:00
|
|
|
self._config = config
|
2016-11-15 04:54:21 +01:00
|
|
|
self.tasks = queue
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
self.worker_threads_access = threading.RLock()
|
|
|
|
self.worker_threads = []
|
|
|
|
|
|
|
|
def __exit_callback(self, worker_thread):
|
|
|
|
with self.worker_threads_access:
|
|
|
|
self.worker_threads.remove(worker_thread)
|
2008-12-13 13:29:45 +01:00
|
|
|
|
2009-12-17 00:37:45 +01:00
|
|
|
def __continue_check_callback(self, worker_thread):
|
2009-04-01 01:12:17 +02:00
|
|
|
with self.worker_threads_access:
|
2009-08-11 00:09:02 +02:00
|
|
|
if len(self.worker_threads) > self._config.max_downloads and \
|
|
|
|
self._config.max_downloads_enabled:
|
2009-12-17 00:37:45 +01:00
|
|
|
self.worker_threads.remove(worker_thread)
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
2016-11-15 04:54:21 +01:00
|
|
|
def __spawn_threads(self):
|
2010-03-07 20:15:36 +01:00
|
|
|
"""Spawn new worker threads if necessary
|
|
|
|
"""
|
2009-12-17 00:37:45 +01:00
|
|
|
with self.worker_threads_access:
|
2018-04-29 15:15:18 +02:00
|
|
|
work_count = self.tasks.available_work_count()
|
|
|
|
if self._config.max_downloads_enabled:
|
|
|
|
# always allow at least 1 download
|
|
|
|
max_downloads = max(int(self._config.max_downloads), 1)
|
|
|
|
spawn_limit = max_downloads - len(self.worker_threads)
|
|
|
|
else:
|
2018-04-29 15:59:22 +02:00
|
|
|
spawn_limit = self._config.limit.downloads.concurrent_max
|
2018-04-29 15:15:18 +02:00
|
|
|
logger.info('%r tasks to do, can start at most %r threads', work_count, spawn_limit)
|
|
|
|
for i in range(0, min(work_count, spawn_limit)):
|
2009-04-01 01:12:17 +02:00
|
|
|
# We have to create a new thread here, there's work to do
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Starting new worker thread.')
|
2010-03-07 20:15:36 +01:00
|
|
|
|
2012-07-10 13:52:34 +02:00
|
|
|
worker = DownloadQueueWorker(self.tasks, self.__exit_callback,
|
2016-11-15 04:54:21 +01:00
|
|
|
self.__continue_check_callback)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.worker_threads.append(worker)
|
2012-07-10 13:52:34 +02:00
|
|
|
util.run_in_background(worker.run)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2016-11-15 04:54:21 +01:00
|
|
|
def update_max_downloads(self):
|
|
|
|
self.__spawn_threads()
|
2009-08-17 21:46:17 +02:00
|
|
|
|
2016-11-15 04:54:21 +01:00
|
|
|
def force_start_task(self, task):
|
|
|
|
if self.tasks.set_downloading(task):
|
|
|
|
worker = ForceDownloadWorker(task)
|
|
|
|
util.run_in_background(worker.run)
|
2010-03-07 20:15:36 +01:00
|
|
|
|
2016-11-15 04:54:21 +01:00
|
|
|
def queue_task(self, task):
|
|
|
|
"""Marks a task as queued
|
2010-03-07 20:15:36 +01:00
|
|
|
"""
|
2009-04-01 01:12:17 +02:00
|
|
|
task.status = DownloadTask.QUEUED
|
2016-11-15 04:54:21 +01:00
|
|
|
self.__spawn_threads()
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2008-04-17 17:59:38 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
class DownloadTask(object):
|
|
|
|
"""An object representing the download task of an episode
|
|
|
|
|
|
|
|
You can create a new download task like this:
|
|
|
|
|
2009-08-11 00:09:02 +02:00
|
|
|
task = DownloadTask(episode, gpodder.config.Config(CONFIGFILE))
|
2018-02-15 21:13:05 +01:00
|
|
|
task.status = DownloadTask.DOWNLOADING
|
2009-04-01 01:12:17 +02:00
|
|
|
task.run()
|
|
|
|
|
|
|
|
While the download is in progress, you can access its properties:
|
|
|
|
|
|
|
|
task.total_size # in bytes
|
|
|
|
task.progress # from 0.0 to 1.0
|
|
|
|
task.speed # in bytes per second
|
|
|
|
str(task) # name of the episode
|
|
|
|
task.status # current status
|
2009-05-12 11:03:59 +02:00
|
|
|
task.status_changed # True if the status has been changed (see below)
|
|
|
|
task.url # URL of the episode being downloaded
|
|
|
|
task.podcast_url # URL of the podcast this download belongs to
|
2012-01-21 09:55:11 +01:00
|
|
|
task.episode # Episode object of this task
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
You can cancel a running download task by setting its status:
|
|
|
|
|
|
|
|
task.status = DownloadTask.CANCELLED
|
|
|
|
|
|
|
|
The task will then abort as soon as possible (due to the nature
|
|
|
|
of downloading data, this can take a while when the Internet is
|
|
|
|
busy).
|
|
|
|
|
2009-04-02 00:02:07 +02:00
|
|
|
The "status_changed" attribute gets set to True everytime the
|
|
|
|
"status" attribute changes its value. After you get the value of
|
|
|
|
the "status_changed" attribute, it is always reset to False:
|
|
|
|
|
|
|
|
if task.status_changed:
|
|
|
|
new_status = task.status
|
|
|
|
# .. update the UI accordingly ..
|
|
|
|
|
|
|
|
Obviously, this also means that you must have at most *one*
|
|
|
|
place in your UI code where you check for status changes and
|
|
|
|
broadcast the status updates from there.
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
While the download is taking place and after the .run() method
|
|
|
|
has finished, you can get the final status to check if the download
|
|
|
|
was successful:
|
|
|
|
|
|
|
|
if task.status == DownloadTask.DONE:
|
|
|
|
# .. everything ok ..
|
|
|
|
elif task.status == DownloadTask.FAILED:
|
|
|
|
# .. an error happened, and the
|
|
|
|
# error_message attribute is set ..
|
|
|
|
print task.error_message
|
|
|
|
elif task.status == DownloadTask.PAUSED:
|
|
|
|
# .. user paused the download ..
|
|
|
|
elif task.status == DownloadTask.CANCELLED:
|
|
|
|
# .. user cancelled the download ..
|
|
|
|
|
|
|
|
The difference between cancelling and pausing a DownloadTask is
|
|
|
|
that the temporary file gets deleted when cancelling, but does
|
|
|
|
not get deleted when pausing.
|
|
|
|
|
|
|
|
Be sure to call .removed_from_list() on this task when removing
|
|
|
|
it from the UI, so that it can carry out any pending clean-up
|
|
|
|
actions (e.g. removing the temporary file when the task has not
|
|
|
|
finished successfully; i.e. task.status != DownloadTask.DONE).
|
2010-12-18 14:50:43 +01:00
|
|
|
|
|
|
|
The UI can call the method "notify_as_finished()" to determine if
|
|
|
|
this episode still has still to be shown as "finished" download
|
|
|
|
in a notification window. This will return True only the first time
|
|
|
|
it is called when the status is DONE. After returning True once,
|
|
|
|
it will always return False afterwards.
|
|
|
|
|
|
|
|
The same thing works for failed downloads ("notify_as_failed()").
|
2009-04-01 01:12:17 +02:00
|
|
|
"""
|
|
|
|
# Possible states this download task can be in
|
|
|
|
STATUS_MESSAGE = (_('Added'), _('Queued'), _('Downloading'),
|
|
|
|
_('Finished'), _('Failed'), _('Cancelled'), _('Paused'))
|
2016-11-21 23:13:46 +01:00
|
|
|
(INIT, QUEUED, DOWNLOADING, DONE, FAILED, CANCELLED, PAUSED) = list(range(7))
|
2012-07-09 21:08:40 +02:00
|
|
|
|
|
|
|
# Wheter this task represents a file download or a device sync operation
|
2016-11-21 23:13:46 +01:00
|
|
|
ACTIVITY_DOWNLOAD, ACTIVITY_SYNCHRONIZE = list(range(2))
|
2012-07-09 21:08:40 +02:00
|
|
|
|
2012-10-23 13:10:27 +02:00
|
|
|
# Minimum time between progress updates (in seconds)
|
|
|
|
MIN_TIME_BETWEEN_UPDATES = 1.
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return self.__episode.title
|
|
|
|
|
|
|
|
def __get_status(self):
|
|
|
|
return self.__status
|
|
|
|
|
|
|
|
def __set_status(self, status):
|
2009-04-02 00:02:07 +02:00
|
|
|
if status != self.__status:
|
|
|
|
self.__status_changed = True
|
|
|
|
self.__status = status
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
status = property(fget=__get_status, fset=__set_status)
|
|
|
|
|
2009-04-02 00:02:07 +02:00
|
|
|
def __get_status_changed(self):
|
|
|
|
if self.__status_changed:
|
|
|
|
self.__status_changed = False
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
status_changed = property(fget=__get_status_changed)
|
|
|
|
|
2012-07-07 23:41:56 +02:00
|
|
|
def __get_activity(self):
|
|
|
|
return self.__activity
|
|
|
|
|
|
|
|
def __set_activity(self, activity):
|
|
|
|
self.__activity = activity
|
|
|
|
|
|
|
|
activity = property(fget=__get_activity, fset=__set_activity)
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def __get_url(self):
|
|
|
|
return self.__episode.url
|
|
|
|
|
|
|
|
url = property(fget=__get_url)
|
|
|
|
|
2009-05-12 11:03:59 +02:00
|
|
|
def __get_podcast_url(self):
|
|
|
|
return self.__episode.channel.url
|
|
|
|
|
|
|
|
podcast_url = property(fget=__get_podcast_url)
|
|
|
|
|
2009-04-19 08:09:36 +02:00
|
|
|
def __get_episode(self):
|
|
|
|
return self.__episode
|
|
|
|
|
|
|
|
episode = property(fget=__get_episode)
|
|
|
|
|
2011-07-16 18:04:07 +02:00
|
|
|
def cancel(self):
|
|
|
|
if self.status in (self.DOWNLOADING, self.QUEUED):
|
|
|
|
self.status = self.CANCELLED
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def removed_from_list(self):
|
|
|
|
if self.status != self.DONE:
|
|
|
|
util.delete_file(self.tempname)
|
|
|
|
|
2009-08-11 00:09:02 +02:00
|
|
|
def __init__(self, episode, config):
|
2011-07-16 18:04:07 +02:00
|
|
|
assert episode.download_task is None
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__status = DownloadTask.INIT
|
2012-07-09 21:08:40 +02:00
|
|
|
self.__activity = DownloadTask.ACTIVITY_DOWNLOAD
|
2009-04-02 00:02:07 +02:00
|
|
|
self.__status_changed = True
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__episode = episode
|
2009-08-11 00:09:02 +02:00
|
|
|
self._config = config
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
# Create the target filename and save it in the database
|
|
|
|
self.filename = self.__episode.local_filename(create=True)
|
|
|
|
self.tempname = self.filename + '.partial'
|
|
|
|
|
2010-12-20 14:35:46 +01:00
|
|
|
self.total_size = self.__episode.file_size
|
2009-04-01 01:12:17 +02:00
|
|
|
self.speed = 0.0
|
2007-09-18 20:25:25 +02:00
|
|
|
self.progress = 0.0
|
2009-04-01 01:12:17 +02:00
|
|
|
self.error_message = None
|
|
|
|
|
2010-12-18 14:50:43 +01:00
|
|
|
# Have we already shown this task in a notification?
|
|
|
|
self._notification_shown = False
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# Variables for speed limit and speed calculation
|
|
|
|
self.__start_time = 0
|
|
|
|
self.__start_blocks = 0
|
2009-08-11 00:09:02 +02:00
|
|
|
self.__limit_rate_value = self._config.limit_rate_value
|
|
|
|
self.__limit_rate = self._config.limit_rate
|
2009-02-18 13:32:39 +01:00
|
|
|
|
2012-10-23 13:10:27 +02:00
|
|
|
# Progress update functions
|
|
|
|
self._progress_updated = None
|
|
|
|
self._last_progress_updated = 0.
|
2010-10-10 22:44:22 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# If the tempname already exists, set progress accordingly
|
|
|
|
if os.path.exists(self.tempname):
|
2009-02-18 13:32:39 +01:00
|
|
|
try:
|
2009-04-01 01:12:17 +02:00
|
|
|
already_downloaded = os.path.getsize(self.tempname)
|
|
|
|
if self.total_size > 0:
|
2018-03-27 21:40:36 +02:00
|
|
|
self.progress = max(0.0, min(1.0, already_downloaded / self.total_size))
|
2016-11-21 23:13:46 +01:00
|
|
|
except OSError as os_error:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('Cannot get size for %s', os_error)
|
2007-09-18 20:25:25 +02:00
|
|
|
else:
|
2009-04-01 01:12:17 +02:00
|
|
|
# "touch self.tempname", so we also get partial
|
|
|
|
# files for resuming when the file is queued
|
|
|
|
open(self.tempname, 'w').close()
|
|
|
|
|
2011-07-16 18:04:07 +02:00
|
|
|
# Store a reference to this task in the episode
|
|
|
|
episode.download_task = self
|
|
|
|
|
2010-12-18 14:50:43 +01:00
|
|
|
def notify_as_finished(self):
|
|
|
|
if self.status == DownloadTask.DONE:
|
|
|
|
if self._notification_shown:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
self._notification_shown = True
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
def notify_as_failed(self):
|
|
|
|
if self.status == DownloadTask.FAILED:
|
|
|
|
if self._notification_shown:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
self._notification_shown = True
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
2010-10-10 22:44:22 +02:00
|
|
|
def add_progress_callback(self, callback):
|
|
|
|
self._progress_updated = callback
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def status_updated(self, count, blockSize, totalSize):
|
|
|
|
# We see a different "total size" while downloading,
|
|
|
|
# so correct the total size variable in the thread
|
|
|
|
if totalSize != self.total_size and totalSize > 0:
|
|
|
|
self.total_size = float(totalSize)
|
2011-07-16 17:25:44 +02:00
|
|
|
if self.__episode.file_size != self.total_size:
|
|
|
|
logger.debug('Updating file size of %s to %s',
|
|
|
|
self.filename, self.total_size)
|
|
|
|
self.__episode.file_size = self.total_size
|
|
|
|
self.__episode.save()
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
if self.total_size > 0:
|
2018-03-27 21:40:36 +02:00
|
|
|
self.progress = max(0.0, min(1.0, count * blockSize / self.total_size))
|
2012-10-23 13:10:27 +02:00
|
|
|
if self._progress_updated is not None:
|
|
|
|
diff = time.time() - self._last_progress_updated
|
|
|
|
if diff > self.MIN_TIME_BETWEEN_UPDATES or self.progress == 1.:
|
|
|
|
self._progress_updated(self.progress)
|
|
|
|
self._last_progress_updated = time.time()
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
self.calculate_speed(count, blockSize)
|
|
|
|
|
|
|
|
if self.status == DownloadTask.CANCELLED:
|
2007-09-18 20:25:25 +02:00
|
|
|
raise DownloadCancelledException()
|
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
if self.status == DownloadTask.PAUSED:
|
|
|
|
raise DownloadCancelledException()
|
|
|
|
|
|
|
|
def calculate_speed(self, count, blockSize):
|
2007-09-18 20:25:25 +02:00
|
|
|
if count % 5 == 0:
|
|
|
|
now = time.time()
|
2009-04-01 01:12:17 +02:00
|
|
|
if self.__start_time > 0:
|
2018-02-01 07:59:22 +01:00
|
|
|
# Has rate limiting been enabled or disabled?
|
|
|
|
if self.__limit_rate != self._config.limit_rate:
|
|
|
|
# If it has been enabled then reset base time and block count
|
2009-08-11 00:09:02 +02:00
|
|
|
if self._config.limit_rate:
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__start_time = now
|
|
|
|
self.__start_blocks = count
|
2009-08-11 00:09:02 +02:00
|
|
|
self.__limit_rate = self._config.limit_rate
|
2018-01-30 14:04:28 +01:00
|
|
|
|
2018-02-01 07:59:22 +01:00
|
|
|
# Has the rate been changed and are we currently limiting?
|
|
|
|
if self.__limit_rate_value != self._config.limit_rate_value and self.__limit_rate:
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__start_time = now
|
|
|
|
self.__start_blocks = count
|
2009-08-11 00:09:02 +02:00
|
|
|
self.__limit_rate_value = self._config.limit_rate_value
|
2008-04-17 17:45:29 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
passed = now - self.__start_time
|
2007-09-25 22:06:48 +02:00
|
|
|
if passed > 0:
|
2018-03-27 21:40:36 +02:00
|
|
|
speed = ((count - self.__start_blocks) * blockSize) / passed
|
2007-09-25 22:06:48 +02:00
|
|
|
else:
|
|
|
|
speed = 0
|
2007-09-18 20:25:25 +02:00
|
|
|
else:
|
2009-04-01 01:12:17 +02:00
|
|
|
self.__start_time = now
|
|
|
|
self.__start_blocks = count
|
|
|
|
passed = now - self.__start_time
|
2018-03-27 21:40:36 +02:00
|
|
|
speed = count * blockSize
|
2008-04-17 17:45:29 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
self.speed = float(speed)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-08-11 00:09:02 +02:00
|
|
|
if self._config.limit_rate and speed > self._config.limit_rate_value:
|
2007-09-18 20:25:25 +02:00
|
|
|
# calculate the time that should have passed to reach
|
|
|
|
# the desired download rate and wait if necessary
|
2018-03-27 21:40:36 +02:00
|
|
|
should_have_passed = (count - self.__start_blocks) * blockSize / (self._config.limit_rate_value * 1024.0)
|
2007-09-18 20:25:25 +02:00
|
|
|
if should_have_passed > passed:
|
|
|
|
# sleep a maximum of 10 seconds to not cause time-outs
|
2018-03-27 21:40:36 +02:00
|
|
|
delay = min(10.0, float(should_have_passed - passed))
|
2009-04-01 01:12:17 +02:00
|
|
|
time.sleep(delay)
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2011-07-16 18:04:07 +02:00
|
|
|
def recycle(self):
|
2011-07-24 20:37:58 +02:00
|
|
|
self.episode.download_task = None
|
2011-07-16 18:04:07 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
def run(self):
|
|
|
|
# Speed calculation (re-)starts here
|
|
|
|
self.__start_time = 0
|
|
|
|
self.__start_blocks = 0
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# If the download has already been cancelled, skip it
|
|
|
|
if self.status == DownloadTask.CANCELLED:
|
|
|
|
util.delete_file(self.tempname)
|
2009-09-02 15:57:09 +02:00
|
|
|
self.progress = 0.0
|
|
|
|
self.speed = 0.0
|
2009-04-01 01:12:17 +02:00
|
|
|
return False
|
2009-02-09 23:26:47 +01:00
|
|
|
|
2016-11-15 04:54:21 +01:00
|
|
|
# We only start this download if its status is "downloading"
|
|
|
|
if self.status != DownloadTask.DOWNLOADING:
|
2009-04-01 01:12:17 +02:00
|
|
|
return False
|
2009-02-09 23:26:47 +01:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
# We are downloading this file right now
|
|
|
|
self.status = DownloadTask.DOWNLOADING
|
2010-12-18 14:50:43 +01:00
|
|
|
self._notification_shown = False
|
2007-09-18 20:25:25 +02:00
|
|
|
|
2019-08-11 11:57:44 +02:00
|
|
|
# Restore a reference to this task in the episode
|
|
|
|
# when running a recycled task following a pause or failed
|
|
|
|
# see #649
|
|
|
|
if not self.episode.download_task:
|
|
|
|
self.episode.download_task = self
|
|
|
|
|
2007-09-18 20:25:25 +02:00
|
|
|
try:
|
2019-08-17 16:25:00 +02:00
|
|
|
|
|
|
|
custom_downloader = registry.custom_downloader.resolve(self._config, None, self.episode)
|
|
|
|
|
2019-06-18 09:00:26 +02:00
|
|
|
url = self.__episode.url
|
2019-08-17 16:25:00 +02:00
|
|
|
if custom_downloader:
|
|
|
|
logger.info('Downloading %s with %s', url, custom_downloader)
|
|
|
|
headers, real_url = custom_downloader.retrieve_resume(
|
|
|
|
self.tempname, reporthook=self.status_updated)
|
|
|
|
else:
|
|
|
|
# Resolve URL and start downloading the episode
|
|
|
|
res = registry.download_url.resolve(self._config, None, self.episode)
|
|
|
|
if res:
|
|
|
|
url = res
|
|
|
|
if url == self.__episode.url:
|
|
|
|
# don't modify custom urls (#635 - vimeo breaks if * is unescaped)
|
|
|
|
url = url.strip()
|
|
|
|
url = util.iri_to_url(url)
|
|
|
|
|
|
|
|
logger.info("Downloading %s", url)
|
|
|
|
downloader = DownloadURLOpener(self.__episode.channel)
|
|
|
|
|
|
|
|
# HTTP Status codes for which we retry the download
|
|
|
|
retry_codes = (408, 418, 504, 598, 599)
|
|
|
|
max_retries = max(0, self._config.auto.retries)
|
|
|
|
|
|
|
|
# Retry the download on timeout (bug 1013)
|
|
|
|
for retry in range(max_retries + 1):
|
|
|
|
if retry > 0:
|
|
|
|
logger.info('Retrying download of %s (%d)', url, retry)
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
try:
|
|
|
|
headers, real_url = downloader.retrieve_resume(url,
|
|
|
|
self.tempname, reporthook=self.status_updated)
|
|
|
|
# If we arrive here, the download was successful
|
|
|
|
break
|
|
|
|
except urllib.error.ContentTooShortError as ctse:
|
|
|
|
if retry < max_retries:
|
|
|
|
logger.info('Content too short: %s - will retry.',
|
|
|
|
url)
|
|
|
|
continue
|
|
|
|
raise
|
|
|
|
except socket.timeout as tmout:
|
|
|
|
if retry < max_retries:
|
|
|
|
logger.info('Socket timeout: %s - will retry.', url)
|
|
|
|
continue
|
|
|
|
raise
|
|
|
|
except gPodderDownloadHTTPError as http:
|
|
|
|
if retry < max_retries and http.error_code in retry_codes:
|
|
|
|
logger.info('HTTP error %d: %s - will retry.',
|
|
|
|
http.error_code, url)
|
|
|
|
continue
|
|
|
|
raise
|
2009-04-01 01:12:17 +02:00
|
|
|
|
2010-12-20 14:35:46 +01:00
|
|
|
new_mimetype = headers.get('content-type', self.__episode.mime_type)
|
|
|
|
old_mimetype = self.__episode.mime_type
|
2010-04-09 02:28:11 +02:00
|
|
|
_basename, ext = os.path.splitext(self.filename)
|
|
|
|
if new_mimetype != old_mimetype or util.wrong_extension(ext):
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype)
|
2009-04-01 01:12:17 +02:00
|
|
|
old_extension = self.__episode.extension()
|
2010-12-20 14:35:46 +01:00
|
|
|
self.__episode.mime_type = new_mimetype
|
2009-04-01 01:12:17 +02:00
|
|
|
new_extension = self.__episode.extension()
|
|
|
|
|
2010-04-09 02:28:11 +02:00
|
|
|
# If the desired filename extension changed due to the new
|
|
|
|
# mimetype, we force an update of the local filename to fix the
|
|
|
|
# extension.
|
|
|
|
if old_extension != new_extension or util.wrong_extension(ext):
|
2009-04-01 01:12:17 +02:00
|
|
|
self.filename = self.__episode.local_filename(create=True, force_update=True)
|
|
|
|
|
2011-08-31 21:27:41 +02:00
|
|
|
# In some cases, the redirect of a URL causes the real filename to
|
|
|
|
# be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
|
2011-09-18 22:04:33 +02:00
|
|
|
if real_url != url and not util.is_known_redirecter(real_url):
|
|
|
|
realname, realext = util.filename_from_url(real_url)
|
|
|
|
|
|
|
|
# Only update from redirect if the redirected-to filename has
|
|
|
|
# a proper extension (this is needed for e.g. YouTube)
|
|
|
|
if not util.wrong_extension(realext):
|
|
|
|
real_filename = ''.join((realname, realext))
|
|
|
|
self.filename = self.__episode.local_filename(create=True,
|
|
|
|
force_update=True, template=real_filename)
|
|
|
|
logger.info('Download was redirected (%s). New filename: %s',
|
|
|
|
real_url, os.path.basename(self.filename))
|
2009-09-06 16:38:40 +02:00
|
|
|
|
|
|
|
# Look at the Content-disposition header; use if if available
|
2016-01-15 15:08:22 +01:00
|
|
|
disposition_filename = get_header_param(headers, 'filename', 'content-disposition')
|
|
|
|
|
2011-10-04 10:36:43 +02:00
|
|
|
# Some servers do send the content-disposition header, but provide
|
|
|
|
# an empty filename, resulting in an empty string here (bug 1440)
|
|
|
|
if disposition_filename is not None and disposition_filename != '':
|
2009-09-06 16:38:40 +02:00
|
|
|
# The server specifies a download filename - try to use it
|
2019-01-27 12:30:46 +01:00
|
|
|
# filename_from_url to remove query string; see #591
|
|
|
|
fn, ext = util.filename_from_url(disposition_filename)
|
|
|
|
logger.debug("converting disposition filename '%s' to local filename '%s%s'", disposition_filename, fn, ext)
|
|
|
|
disposition_filename = fn + ext
|
2018-05-08 10:43:56 +02:00
|
|
|
self.filename = self.__episode.local_filename(create=True,
|
2009-09-06 16:38:40 +02:00
|
|
|
force_update=True, template=disposition_filename)
|
|
|
|
new_mimetype, encoding = mimetypes.guess_type(self.filename)
|
|
|
|
if new_mimetype is not None:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Using content-disposition mimetype: %s',
|
|
|
|
new_mimetype)
|
2012-12-28 18:31:39 +01:00
|
|
|
self.__episode.mime_type = new_mimetype
|
2009-09-06 16:38:40 +02:00
|
|
|
|
2011-08-07 23:39:46 +02:00
|
|
|
# Re-evaluate filename and tempname to take care of podcast renames
|
|
|
|
# while downloads are running (which will change both file names)
|
|
|
|
self.filename = self.__episode.local_filename(create=False)
|
2011-08-31 21:27:41 +02:00
|
|
|
self.tempname = os.path.join(os.path.dirname(self.filename),
|
|
|
|
os.path.basename(self.tempname))
|
2009-04-01 01:12:17 +02:00
|
|
|
shutil.move(self.tempname, self.filename)
|
|
|
|
|
2010-04-26 21:41:50 +02:00
|
|
|
# Model- and database-related updates after a download has finished
|
|
|
|
self.__episode.on_downloaded(self.filename)
|
2007-09-18 20:25:25 +02:00
|
|
|
except DownloadCancelledException:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Download has been cancelled/paused: %s', self)
|
2009-04-01 01:12:17 +02:00
|
|
|
if self.status == DownloadTask.CANCELLED:
|
2009-02-09 23:26:47 +01:00
|
|
|
util.delete_file(self.tempname)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.progress = 0.0
|
|
|
|
self.speed = 0.0
|
2016-11-21 23:13:46 +01:00
|
|
|
except urllib.error.ContentTooShortError as ctse:
|
2009-06-10 01:57:35 +02:00
|
|
|
self.status = DownloadTask.FAILED
|
|
|
|
self.error_message = _('Missing content from server')
|
2016-11-21 23:13:46 +01:00
|
|
|
except IOError as ioe:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('%s while downloading "%s": %s', ioe.strerror,
|
|
|
|
self.__episode.title, ioe.filename, exc_info=True)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.status = DownloadTask.FAILED
|
2010-01-18 21:20:22 +01:00
|
|
|
d = {'error': ioe.strerror, 'filename': ioe.filename}
|
|
|
|
self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
|
2016-11-21 23:13:46 +01:00
|
|
|
except gPodderDownloadHTTPError as gdhe:
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('HTTP %s while downloading "%s": %s',
|
|
|
|
gdhe.error_code, self.__episode.title, gdhe.error_message,
|
|
|
|
exc_info=True)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.status = DownloadTask.FAILED
|
2010-01-18 21:20:22 +01:00
|
|
|
d = {'code': gdhe.error_code, 'message': gdhe.error_message}
|
|
|
|
self.error_message = _('HTTP Error %(code)s: %(message)s') % d
|
2016-11-21 23:13:46 +01:00
|
|
|
except Exception as e:
|
2009-04-01 01:12:17 +02:00
|
|
|
self.status = DownloadTask.FAILED
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.error('Download failed: %s', str(e), exc_info=True)
|
2010-12-18 14:50:43 +01:00
|
|
|
self.error_message = _('Error: %s') % (str(e),)
|
2009-04-01 01:12:17 +02:00
|
|
|
|
|
|
|
if self.status == DownloadTask.DOWNLOADING:
|
|
|
|
# Everything went well - we're done
|
|
|
|
self.status = DownloadTask.DONE
|
2009-09-01 14:37:11 +02:00
|
|
|
if self.total_size <= 0:
|
|
|
|
self.total_size = util.calculate_size(self.filename)
|
2011-07-15 16:32:06 +02:00
|
|
|
logger.info('Total size updated to %d', self.total_size)
|
2009-04-01 01:12:17 +02:00
|
|
|
self.progress = 1.0
|
2012-02-04 21:43:37 +01:00
|
|
|
gpodder.user_extensions.on_episode_downloaded(self.__episode)
|
2009-04-01 01:12:17 +02:00
|
|
|
return True
|
2018-01-30 14:04:28 +01:00
|
|
|
|
2009-04-01 01:12:17 +02:00
|
|
|
self.speed = 0.0
|
|
|
|
|
|
|
|
# We finished, but not successfully (at least not really)
|
|
|
|
return False
|