More cautious use of redirect URLs for YT and SC

For YouTube and Soundcloud feeds (among others), we try
to avoid renaming downloads even for HTTP redirects,
because the initial filename that we have is usually okay.
This commit is contained in:
Thomas Perl 2011-09-18 22:04:33 +02:00
parent 3bfab38220
commit 69b38badb4
2 changed files with 31 additions and 6 deletions

View File

@ -756,12 +756,17 @@ class DownloadTask(object):
# In some cases, the redirect of a URL causes the real filename to
# be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
if real_url != url:
real_filename = ''.join(util.filename_from_url(real_url))
self.filename = self.__episode.local_filename(create=True,
force_update=True, template=real_filename)
logger.info('Download was redirected (%s). New filename: %s',
real_url, os.path.basename(self.filename))
if real_url != url and not util.is_known_redirecter(real_url):
realname, realext = util.filename_from_url(real_url)
# Only update from redirect if the redirected-to filename has
# a proper extension (this is needed for e.g. YouTube)
if not util.wrong_extension(realext):
real_filename = ''.join((realname, realext))
self.filename = self.__episode.local_filename(create=True,
force_update=True, template=real_filename)
logger.info('Download was redirected (%s). New filename: %s',
real_url, os.path.basename(self.filename))
# Look at the Content-disposition header; use if if available
disposition_filename = get_header_param(headers, \

View File

@ -1364,3 +1364,23 @@ def generate_names(filename):
else:
yield filename
def is_known_redirecter(url):
"""Check if a URL redirect is expected, and no filenames should be updated
We usually honor URL redirects, and update filenames accordingly.
In some cases (e.g. Soundcloud) this results in a worse filename,
so we hardcode and detect these cases here to avoid renaming files
for which we know that a "known good default" exists.
The problem here is that by comparing the currently-assigned filename
with the new filename determined by the URL, we cannot really determine
which one is the "better" URL (e.g. "n5rMSpXrqmR9.128.mp3" for Soundcloud).
"""
# Soundcloud-hosted media downloads (we take the track name as filename)
if url.startswith('http://ak-media.soundcloud.com/'):
return True
return False