Support for "Content-disposition" header (RFC 2138)

Add support for renaming files according to the
Content-disposition header after download has
completed and correct mimetype based on the name.
This commit is contained in:
Thomas Perl 2009-09-06 16:38:40 +02:00
parent 4dd0964ad3
commit c45da9e33a
2 changed files with 75 additions and 16 deletions

View File

@ -41,10 +41,43 @@ import os
import time
import collections
import mimetypes
import email
import email.Header
from xml.sax import saxutils
_ = gpodder.gettext
def get_header_param(headers, param, header_name):
"""Extract a HTTP header parameter from a dict
Uses the "email" module to retrieve parameters
from HTTP headers. This can be used to get the
"filename" parameter of the "content-disposition"
header for downloads to pick a good filename.
Returns None if the filename cannot be retrieved.
"""
try:
headers_string = ['%s:%s'%(k,v) for k,v in headers.items()]
msg = email.message_from_string('\n'.join(headers_string))
if header_name in msg:
value = msg.get_param(param, header=header_name)
decoded_list = email.Header.decode_header(value)
value = []
for part, encoding in decoded_list:
if encoding:
value.append(part.decode(encoding))
else:
value.append(unicode(part))
return u''.join(value)
except Exception, e:
log('Error trying to get %s from %s: %s', \
param, header_name, str(e), traceback=True)
return None
class ContentRange(object):
# Based on:
# http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
@ -205,12 +238,11 @@ class DownloadURLOpener(urllib.FancyURLopener):
return urllib.addinfourl(fp, headers, 'http:' + url)
def retrieve_resume(self, url, filename, reporthook=None, data=None):
"""retrieve_resume(url) returns (filename, headers) for a local object
or (tempfilename, headers) for a remote object.
"""Download files from an URL; return (headers, real_url)
The filename argument is REQUIRED (no tempfile creation code here!)
Additionally resumes a download if the local filename exists"""
Resumes a download if the local filename exists and
the server supports download resuming.
"""
current_size = 0
tfp = None
@ -246,12 +278,7 @@ class DownloadURLOpener(urllib.FancyURLopener):
current_size = 0
log('Cannot resume. Missing or wrong Content-Range header (RFC2616)', sender=self)
# gPodder TODO: we can get the real url via fp.geturl() here
# (if anybody wants to fix filenames in the future)
# Maybe also utilize the name in the "Content-disposition" header
result = filename, headers
result = headers, fp.geturl()
bs = 1024*8
size = -1
read = current_size
@ -595,7 +622,7 @@ class DownloadTask(object):
# Resolve URL and start downloading the episode
url = youtube.get_real_download_url(self.__episode.url)
downloader = DownloadURLOpener(self.__episode.channel)
(unused, headers) = downloader.retrieve_resume(url,
headers, real_url = downloader.retrieve_resume(url, \
self.tempname, reporthook=self.status_updated)
new_mimetype = headers.get('content-type', self.__episode.mimetype)
@ -611,6 +638,24 @@ class DownloadTask(object):
if old_extension != new_extension:
self.filename = self.__episode.local_filename(create=True, force_update=True)
# TODO: Check if "real_url" is different from "url" and if it is,
# see if we can get a better episode filename out of it
# Look at the Content-disposition header; use if if available
disposition_filename = get_header_param(headers, \
'filename', 'content-disposition')
if disposition_filename is not None:
# The server specifies a download filename - try to use it
disposition_filename = os.path.basename(disposition_filename)
self.filename = self.__episode.local_filename(create=True, \
force_update=True, template=disposition_filename)
new_mimetype, encoding = mimetypes.guess_type(self.filename)
if new_mimetype is not None:
log('Using content-disposition mimetype: %s',
new_mimetype, sender=self)
self.__episode.set_mimetype(new_mimetype, commit=True)
shutil.move(self.tempname, self.filename)
# Get the _real_ filesize once we actually have the file

View File

@ -763,7 +763,8 @@ class PodcastEpisode(PodcastModelObject):
return current_try
def local_filename(self, create, force_update=False, check_only=False):
def local_filename(self, create, force_update=False, check_only=False,
template=None):
"""Get (and possibly generate) the local saving filename
Pass create=True if you want this function to generate a
@ -786,6 +787,9 @@ class PodcastEpisode(PodcastModelObject):
to rename the file, even if would be a good idea. Use this if you
only want to check if a file exists.
If "template" is specified, it should be a filename that is to
be used as a template for generating the "real" filename.
The generated filename is stored in the database for future access.
"""
ext = self.extension().encode('utf-8', 'ignore')
@ -816,10 +820,14 @@ class PodcastEpisode(PodcastModelObject):
if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
# Try to find a new filename for the current file
(episode_filename, extension_UNUSED) = util.filename_from_url(self.url)
if template is not None:
# If template is specified, trust the template's extension
episode_filename, ext = os.path.splitext(template)
else:
episode_filename, extension_UNUSED = util.filename_from_url(self.url)
fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
if 'redirect' in fn_template:
if 'redirect' in fn_template and template is None:
# This looks like a redirection URL - force URL resolving!
log('Looks like a redirection to me: %s', self.url, sender=self)
url = util.get_real_url(self.channel.authenticate_url(self.url))
@ -830,7 +838,7 @@ class PodcastEpisode(PodcastModelObject):
# Use the video title for YouTube downloads
for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
if self.url.startswith(yt_url):
fn_template = self.title
fn_template = os.path.basename(self.title)
# If the basename is empty, use the md5 hexdigest of the URL
if len(fn_template) == 0 or fn_template.startswith('redirect.'):
@ -866,6 +874,12 @@ class PodcastEpisode(PodcastModelObject):
return os.path.join(self.channel.save_dir, self.filename)
def set_mimetype(self, mimetype, commit=False):
"""Sets the mimetype for this episode"""
self.mimetype = mimetype
if commit:
self.db.commit()
def extension( self):
( filename, ext ) = util.filename_from_url(self.url)
# if we can't detect the extension from the url fallback on the mimetype