Support for "Content-disposition" header (RFC 2138)
Add support for renaming files according to the Content-disposition header after download has completed and correct mimetype based on the name.
This commit is contained in:
parent
4dd0964ad3
commit
c45da9e33a
|
@ -41,10 +41,43 @@ import os
|
|||
import time
|
||||
import collections
|
||||
|
||||
import mimetypes
|
||||
import email
|
||||
import email.Header
|
||||
|
||||
from xml.sax import saxutils
|
||||
|
||||
_ = gpodder.gettext
|
||||
|
||||
def get_header_param(headers, param, header_name):
|
||||
"""Extract a HTTP header parameter from a dict
|
||||
|
||||
Uses the "email" module to retrieve parameters
|
||||
from HTTP headers. This can be used to get the
|
||||
"filename" parameter of the "content-disposition"
|
||||
header for downloads to pick a good filename.
|
||||
|
||||
Returns None if the filename cannot be retrieved.
|
||||
"""
|
||||
try:
|
||||
headers_string = ['%s:%s'%(k,v) for k,v in headers.items()]
|
||||
msg = email.message_from_string('\n'.join(headers_string))
|
||||
if header_name in msg:
|
||||
value = msg.get_param(param, header=header_name)
|
||||
decoded_list = email.Header.decode_header(value)
|
||||
value = []
|
||||
for part, encoding in decoded_list:
|
||||
if encoding:
|
||||
value.append(part.decode(encoding))
|
||||
else:
|
||||
value.append(unicode(part))
|
||||
return u''.join(value)
|
||||
except Exception, e:
|
||||
log('Error trying to get %s from %s: %s', \
|
||||
param, header_name, str(e), traceback=True)
|
||||
|
||||
return None
|
||||
|
||||
class ContentRange(object):
|
||||
# Based on:
|
||||
# http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
|
||||
|
@ -205,12 +238,11 @@ class DownloadURLOpener(urllib.FancyURLopener):
|
|||
return urllib.addinfourl(fp, headers, 'http:' + url)
|
||||
|
||||
def retrieve_resume(self, url, filename, reporthook=None, data=None):
|
||||
"""retrieve_resume(url) returns (filename, headers) for a local object
|
||||
or (tempfilename, headers) for a remote object.
|
||||
"""Download files from an URL; return (headers, real_url)
|
||||
|
||||
The filename argument is REQUIRED (no tempfile creation code here!)
|
||||
|
||||
Additionally resumes a download if the local filename exists"""
|
||||
Resumes a download if the local filename exists and
|
||||
the server supports download resuming.
|
||||
"""
|
||||
|
||||
current_size = 0
|
||||
tfp = None
|
||||
|
@ -246,12 +278,7 @@ class DownloadURLOpener(urllib.FancyURLopener):
|
|||
current_size = 0
|
||||
log('Cannot resume. Missing or wrong Content-Range header (RFC2616)', sender=self)
|
||||
|
||||
|
||||
# gPodder TODO: we can get the real url via fp.geturl() here
|
||||
# (if anybody wants to fix filenames in the future)
|
||||
# Maybe also utilize the name in the "Content-disposition" header
|
||||
|
||||
result = filename, headers
|
||||
result = headers, fp.geturl()
|
||||
bs = 1024*8
|
||||
size = -1
|
||||
read = current_size
|
||||
|
@ -595,7 +622,7 @@ class DownloadTask(object):
|
|||
# Resolve URL and start downloading the episode
|
||||
url = youtube.get_real_download_url(self.__episode.url)
|
||||
downloader = DownloadURLOpener(self.__episode.channel)
|
||||
(unused, headers) = downloader.retrieve_resume(url,
|
||||
headers, real_url = downloader.retrieve_resume(url, \
|
||||
self.tempname, reporthook=self.status_updated)
|
||||
|
||||
new_mimetype = headers.get('content-type', self.__episode.mimetype)
|
||||
|
@ -611,6 +638,24 @@ class DownloadTask(object):
|
|||
if old_extension != new_extension:
|
||||
self.filename = self.__episode.local_filename(create=True, force_update=True)
|
||||
|
||||
# TODO: Check if "real_url" is different from "url" and if it is,
|
||||
# see if we can get a better episode filename out of it
|
||||
|
||||
# Look at the Content-disposition header; use if if available
|
||||
disposition_filename = get_header_param(headers, \
|
||||
'filename', 'content-disposition')
|
||||
|
||||
if disposition_filename is not None:
|
||||
# The server specifies a download filename - try to use it
|
||||
disposition_filename = os.path.basename(disposition_filename)
|
||||
self.filename = self.__episode.local_filename(create=True, \
|
||||
force_update=True, template=disposition_filename)
|
||||
new_mimetype, encoding = mimetypes.guess_type(self.filename)
|
||||
if new_mimetype is not None:
|
||||
log('Using content-disposition mimetype: %s',
|
||||
new_mimetype, sender=self)
|
||||
self.__episode.set_mimetype(new_mimetype, commit=True)
|
||||
|
||||
shutil.move(self.tempname, self.filename)
|
||||
|
||||
# Get the _real_ filesize once we actually have the file
|
||||
|
|
|
@ -763,7 +763,8 @@ class PodcastEpisode(PodcastModelObject):
|
|||
|
||||
return current_try
|
||||
|
||||
def local_filename(self, create, force_update=False, check_only=False):
|
||||
def local_filename(self, create, force_update=False, check_only=False,
|
||||
template=None):
|
||||
"""Get (and possibly generate) the local saving filename
|
||||
|
||||
Pass create=True if you want this function to generate a
|
||||
|
@ -786,6 +787,9 @@ class PodcastEpisode(PodcastModelObject):
|
|||
to rename the file, even if would be a good idea. Use this if you
|
||||
only want to check if a file exists.
|
||||
|
||||
If "template" is specified, it should be a filename that is to
|
||||
be used as a template for generating the "real" filename.
|
||||
|
||||
The generated filename is stored in the database for future access.
|
||||
"""
|
||||
ext = self.extension().encode('utf-8', 'ignore')
|
||||
|
@ -816,10 +820,14 @@ class PodcastEpisode(PodcastModelObject):
|
|||
|
||||
if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
|
||||
# Try to find a new filename for the current file
|
||||
(episode_filename, extension_UNUSED) = util.filename_from_url(self.url)
|
||||
if template is not None:
|
||||
# If template is specified, trust the template's extension
|
||||
episode_filename, ext = os.path.splitext(template)
|
||||
else:
|
||||
episode_filename, extension_UNUSED = util.filename_from_url(self.url)
|
||||
fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
|
||||
|
||||
if 'redirect' in fn_template:
|
||||
if 'redirect' in fn_template and template is None:
|
||||
# This looks like a redirection URL - force URL resolving!
|
||||
log('Looks like a redirection to me: %s', self.url, sender=self)
|
||||
url = util.get_real_url(self.channel.authenticate_url(self.url))
|
||||
|
@ -830,7 +838,7 @@ class PodcastEpisode(PodcastModelObject):
|
|||
# Use the video title for YouTube downloads
|
||||
for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
|
||||
if self.url.startswith(yt_url):
|
||||
fn_template = self.title
|
||||
fn_template = os.path.basename(self.title)
|
||||
|
||||
# If the basename is empty, use the md5 hexdigest of the URL
|
||||
if len(fn_template) == 0 or fn_template.startswith('redirect.'):
|
||||
|
@ -866,6 +874,12 @@ class PodcastEpisode(PodcastModelObject):
|
|||
|
||||
return os.path.join(self.channel.save_dir, self.filename)
|
||||
|
||||
def set_mimetype(self, mimetype, commit=False):
|
||||
"""Sets the mimetype for this episode"""
|
||||
self.mimetype = mimetype
|
||||
if commit:
|
||||
self.db.commit()
|
||||
|
||||
def extension( self):
|
||||
( filename, ext ) = util.filename_from_url(self.url)
|
||||
# if we can't detect the extension from the url fallback on the mimetype
|
||||
|
|
Loading…
Reference in New Issue