move download's get_header_param() to util, to use in soundcloud plugin

This commit is contained in:
Eric Le Lay 2020-07-12 17:01:24 +02:00
parent a56651ad79
commit d40bc49e0e
3 changed files with 28 additions and 48 deletions

View File

@ -78,30 +78,6 @@ class CustomDownloader:
return None
def get_header_param(headers, param, header_name):
"""Extract a HTTP header parameter from a dict
Uses the "email" module to retrieve parameters
from HTTP headers. This can be used to get the
"filename" parameter of the "content-disposition"
header for downloads to pick a good filename.
Returns None if the filename cannot be retrieved.
"""
value = None
try:
headers_string = ['%s:%s' % (k, v) for k, v in list(headers.items())]
msg = email.message_from_string('\n'.join(headers_string))
if header_name in msg:
raw_value = msg.get_param(param, header=header_name)
if raw_value is not None:
value = email.utils.collapse_rfc2231_value(raw_value)
except Exception as e:
logger.error('Cannot get %s from %s', param, header_name, exc_info=True)
return value
class ContentRange(object):
# Based on:
# http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
@ -892,7 +868,7 @@ class DownloadTask(object):
real_url, os.path.basename(self.filename))
# Look at the Content-disposition header; use if if available
disposition_filename = get_header_param(headers, 'filename', 'content-disposition')
disposition_filename = util.get_header_param(headers, 'filename', 'content-disposition')
# Some servers do send the content-disposition header, but provide
# an empty filename, resulting in an empty string here (bug 1440)

View File

@ -54,28 +54,6 @@ def soundcloud_parsedate(s):
return time.mktime(tuple([int(x) for x in m.groups()] + [0, 0, -1]))
def get_param(s, param='filename', header='content-disposition'):
"""Get a parameter from a string of headers
By default, this gets the "filename" parameter of
the content-disposition header. This works fine
for downloads from Soundcloud.
"""
msg = email.message_from_string(s)
if header in msg:
value = msg.get_param(param, header=header)
decoded_list = email.header.decode_header(value)
value = []
for part, encoding in decoded_list:
if encoding:
value.append(part.decode(encoding))
else:
value.append(str(part))
return ''.join(value)
return None
def get_metadata(url):
"""Get file download metadata
@ -87,7 +65,8 @@ def get_metadata(url):
filesize = track_response.headers['content-length'] or '0'
filetype = track_response.headers['content-type'] or 'application/octet-stream'
headers_s = '\n'.join('%s:%s' % (k, v) for k, v in list(track_response.headers.items()))
filename = get_param(headers_s) or os.path.basename(os.path.dirname(url))
filename = util.get_header_param(track_response.headers, 'filename', 'content-disposition') \
or os.path.basename(os.path.dirname(url))
track_fp.close()
return filesize, filetype, filename

View File

@ -31,6 +31,7 @@ are not tied to any specific part of gPodder.
"""
import collections
import datetime
import email
import glob
import gzip
import http.client
@ -2163,3 +2164,27 @@ def parse_mimetype(mimetype):
except MIMETypeException as e:
print(e)
return (None, None, {})
def get_header_param(headers, param, header_name):
"""Extract a HTTP header parameter from a dict
Uses the "email" module to retrieve parameters
from HTTP headers. This can be used to get the
"filename" parameter of the "content-disposition"
header for downloads to pick a good filename.
Returns None if the filename cannot be retrieved.
"""
value = None
try:
headers_string = ['%s:%s' % (k, v) for k, v in list(headers.items())]
msg = email.message_from_string('\n'.join(headers_string))
if header_name in msg:
raw_value = msg.get_param(param, header=header_name)
if raw_value is not None:
value = email.utils.collapse_rfc2231_value(raw_value)
except Exception as e:
logger.error('Cannot get %s from %s', param, header_name, exc_info=True)
return value