Better detection for wrong file extensions

This fixes issues where previously downloaded
episodes would not get their extension fixed,
because the mime type does not change, although
the filename is still wrong.
This commit is contained in:
Thomas Perl 2010-04-09 02:28:11 +02:00
parent 23992c42f9
commit ccf590328e
2 changed files with 52 additions and 4 deletions

View File

@ -687,15 +687,17 @@ class DownloadTask(object):
new_mimetype = headers.get('content-type', self.__episode.mimetype)
old_mimetype = self.__episode.mimetype
if new_mimetype != old_mimetype:
_basename, ext = os.path.splitext(self.filename)
if new_mimetype != old_mimetype or util.wrong_extension(ext):
log('Correcting mime type: %s => %s', old_mimetype, new_mimetype, sender=self)
old_extension = self.__episode.extension()
self.__episode.mimetype = new_mimetype
new_extension = self.__episode.extension()
# If the desired filename extension changed due to the new mimetype,
# we force an update of the local filename to fix the extension
if old_extension != new_extension:
# If the desired filename extension changed due to the new
# mimetype, we force an update of the local filename to fix the
# extension.
if old_extension != new_extension or util.wrong_extension(ext):
self.filename = self.__episode.local_filename(create=True, force_update=True)
# TODO: Check if "real_url" is different from "url" and if it is,

View File

@ -529,6 +529,52 @@ def remove_html_tags(html):
return result.strip()
def wrong_extension(extension):
"""
Determine if a given extension looks like it's
wrong (e.g. empty, extremely long or spaces)
Returns True if the extension most likely is a
wrong one and should be replaced.
>>> wrong_extension('.mp3')
False
>>> wrong_extension('.divx')
False
>>> wrong_extension('mp3')
True
>>> wrong_extension('')
True
>>> wrong_extension('.12 - Everybody')
True
>>> wrong_extension('.mp3 ')
True
>>> wrong_extension('.')
True
>>> wrong_extension('.42')
True
"""
if not extension:
return True
elif len(extension) > 5:
return True
elif ' ' in extension:
return True
elif extension == '.':
return True
elif not extension.startswith('.'):
return True
else:
try:
# ".<number>" is an invalid extension
float(extension)
return True
except:
pass
return False
def extension_from_mimetype(mimetype):
"""
Simply guesses what the file extension should be from the mimetype