move download's get_header_param() to util, to use in soundcloud plugin

This commit is contained in:
Eric Le Lay 2020-07-12 17:01:24 +02:00
parent a56651ad79
commit d40bc49e0e
3 changed files with 28 additions and 48 deletions

View file

@ -78,30 +78,6 @@ class CustomDownloader:
return None return None
def get_header_param(headers, param, header_name):
"""Extract a HTTP header parameter from a dict
Uses the "email" module to retrieve parameters
from HTTP headers. This can be used to get the
"filename" parameter of the "content-disposition"
header for downloads to pick a good filename.
Returns None if the filename cannot be retrieved.
"""
value = None
try:
headers_string = ['%s:%s' % (k, v) for k, v in list(headers.items())]
msg = email.message_from_string('\n'.join(headers_string))
if header_name in msg:
raw_value = msg.get_param(param, header=header_name)
if raw_value is not None:
value = email.utils.collapse_rfc2231_value(raw_value)
except Exception as e:
logger.error('Cannot get %s from %s', param, header_name, exc_info=True)
return value
class ContentRange(object): class ContentRange(object):
# Based on: # Based on:
# http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py # http://svn.pythonpaste.org/Paste/WebOb/trunk/webob/byterange.py
@ -892,7 +868,7 @@ class DownloadTask(object):
real_url, os.path.basename(self.filename)) real_url, os.path.basename(self.filename))
# Look at the Content-disposition header; use if if available # Look at the Content-disposition header; use if if available
disposition_filename = get_header_param(headers, 'filename', 'content-disposition') disposition_filename = util.get_header_param(headers, 'filename', 'content-disposition')
# Some servers do send the content-disposition header, but provide # Some servers do send the content-disposition header, but provide
# an empty filename, resulting in an empty string here (bug 1440) # an empty filename, resulting in an empty string here (bug 1440)

View file

@ -54,28 +54,6 @@ def soundcloud_parsedate(s):
return time.mktime(tuple([int(x) for x in m.groups()] + [0, 0, -1])) return time.mktime(tuple([int(x) for x in m.groups()] + [0, 0, -1]))
def get_param(s, param='filename', header='content-disposition'):
"""Get a parameter from a string of headers
By default, this gets the "filename" parameter of
the content-disposition header. This works fine
for downloads from Soundcloud.
"""
msg = email.message_from_string(s)
if header in msg:
value = msg.get_param(param, header=header)
decoded_list = email.header.decode_header(value)
value = []
for part, encoding in decoded_list:
if encoding:
value.append(part.decode(encoding))
else:
value.append(str(part))
return ''.join(value)
return None
def get_metadata(url): def get_metadata(url):
"""Get file download metadata """Get file download metadata
@ -87,7 +65,8 @@ def get_metadata(url):
filesize = track_response.headers['content-length'] or '0' filesize = track_response.headers['content-length'] or '0'
filetype = track_response.headers['content-type'] or 'application/octet-stream' filetype = track_response.headers['content-type'] or 'application/octet-stream'
headers_s = '\n'.join('%s:%s' % (k, v) for k, v in list(track_response.headers.items())) headers_s = '\n'.join('%s:%s' % (k, v) for k, v in list(track_response.headers.items()))
filename = get_param(headers_s) or os.path.basename(os.path.dirname(url)) filename = util.get_header_param(track_response.headers, 'filename', 'content-disposition') \
or os.path.basename(os.path.dirname(url))
track_fp.close() track_fp.close()
return filesize, filetype, filename return filesize, filetype, filename

View file

@ -31,6 +31,7 @@ are not tied to any specific part of gPodder.
""" """
import collections import collections
import datetime import datetime
import email
import glob import glob
import gzip import gzip
import http.client import http.client
@ -2163,3 +2164,27 @@ def parse_mimetype(mimetype):
except MIMETypeException as e: except MIMETypeException as e:
print(e) print(e)
return (None, None, {}) return (None, None, {})
def get_header_param(headers, param, header_name):
"""Extract a HTTP header parameter from a dict
Uses the "email" module to retrieve parameters
from HTTP headers. This can be used to get the
"filename" parameter of the "content-disposition"
header for downloads to pick a good filename.
Returns None if the filename cannot be retrieved.
"""
value = None
try:
headers_string = ['%s:%s' % (k, v) for k, v in list(headers.items())]
msg = email.message_from_string('\n'.join(headers_string))
if header_name in msg:
raw_value = msg.get_param(param, header=header_name)
if raw_value is not None:
value = email.utils.collapse_rfc2231_value(raw_value)
except Exception as e:
logger.error('Cannot get %s from %s', param, header_name, exc_info=True)
return value