Tue, 01 Jul 2008 22:46:52 -0400 <me@nikosapi.org>

Better file extension detection using feed items' mimetypes

	* src/gpodder/gui.py: Use episode.file_type() instead of 
	util.file_type_by_extension(util.file_extension_from_url(url))
	* src/gpodder/libgpodder.py: Use episode.file_type() instead of          
        util.file_type_by_extension(util.file_extension_from_url(url))
	* src/gpodder/libpodcasts.py: Add new column to the channel list
	gtk.ListStore for holding the file extension. Add extension()
	function to podcastItem which makes use mimetype in the event that
	filename_from_url()[1] returns None
	* src/gpodder/util.py: file_extension_from_url is renamed to 
	filename_from_url and returns (filename, extension)
	Added extension_from_mimetype(mimetype)



git-svn-id: svn://svn.berlios.de/gpodder/trunk@752 b0d088ad-0a06-0410-aad2-9ed5178a7e87
This commit is contained in:
Nick 2008-07-02 23:36:39 +00:00
parent 2560aac326
commit d4ea5b78b2
5 changed files with 60 additions and 39 deletions

View File

@ -1,3 +1,18 @@
Tue, 01 Jul 2008 22:46:52 -0400 <me@nikosapi.org>
Better file extension detection using feed items' mimetypes
* src/gpodder/gui.py: Use episode.file_type() instead of
util.file_type_by_extension(util.file_extension_from_url(url))
* src/gpodder/libgpodder.py: Use episode.file_type() instead of
util.file_type_by_extension(util.file_extension_from_url(url))
* src/gpodder/libpodcasts.py: Add new column to the channel list
gtk.ListStore for holding the file extension. Add extension()
function to podcastItem which makes use mimetype in the event that
filename_from_url()[1] returns None
* src/gpodder/util.py: file_extension_from_url is renamed to
filename_from_url and returns (filename, extension)
Added extension_from_mimetype(mimetype)
Mon, 30 Jun 2008 17:39:36 +0200 <thp@perli.net>
Do not update podcast list when closing channel window (patch by Justin Forest)

View File

@ -1056,7 +1056,7 @@ class gPodder(GladeWidget):
else:
can_download = True
if util.file_type_by_extension(util.file_extension_from_url(url)) == 'torrent':
if self.active_channel.find_episode(url).file_type() == 'torrent':
can_download = can_download or gl.config.use_gnome_bittorrent
can_download = can_download and not can_cancel

View File

@ -321,7 +321,7 @@ class gPodderLib(object):
return (True, service)
# Determine the file type and set the player accordingly.
file_type = util.file_type_by_extension(util.file_extension_from_url(episode.url))
file_type = episode.file_type()
if file_type == 'video':
player = self.config.videoplayer

View File

@ -338,7 +338,7 @@ class podcastChannel(object):
if missing:
log('Episode missing: %s (before drawing an icon)', episode.url, sender=self)
file_type = util.file_type_by_extension( util.file_extension_from_url(url))
file_type = util.file_type_by_extension( model.get_value( iter, 9))
if file_type == 'audio':
status_icon = util.get_tree_icon(ICON_AUDIO_FILE, not episode.is_played, episode.is_locked, not episode.file_exists(), self.icon_cache, icon_size)
elif file_type == 'video':
@ -359,7 +359,9 @@ class podcastChannel(object):
"""
Return a gtk.ListStore containing episodes for this channel
"""
new_model = gtk.ListStore( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_BOOLEAN, gtk.gdk.Pixbuf, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING)
new_model = gtk.ListStore( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING,
gobject.TYPE_BOOLEAN, gtk.gdk.Pixbuf, gobject.TYPE_STRING, gobject.TYPE_STRING,
gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING )
for item in self.get_all_episodes():
if gl.config.episode_list_descriptions:
@ -372,7 +374,9 @@ class podcastChannel(object):
else:
filelength = None
new_iter = new_model.append((item.url, item.title, filelength, True, None, item.cute_pubdate(), description, item.description, item.local_filename()))
new_iter = new_model.append((item.url, item.title, filelength,
True, None, item.cute_pubdate(), description, item.description,
item.local_filename(), item.extension()))
self.iter_set_downloading_columns( new_model, new_iter)
self.update_save_dir_size()
@ -456,7 +460,9 @@ class podcastItem(object):
break
episode.url = util.normalize_feed_url( enclosure.get( 'href', ''))
elif hasattr(entry, 'link'):
extension = util.file_extension_from_url(entry.link)
(filename, extension) = util.filename_from_url(entry.link)
if extension == '' and hasattr( entry, 'type'):
extension = util.extension_from_mimetype(e.type)
file_type = util.file_type_by_extension(extension)
if file_type is not None:
log('Adding episode with link to file type "%s".', file_type, sender=episode)
@ -558,8 +564,8 @@ class podcastItem(object):
log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
def local_filename( self):
ext = util.file_extension_from_url(self.url)
ext = self.extension()
# For compatibility with already-downloaded episodes,
# we accept md5 filenames if they are downloaded now.
md5_filename = os.path.join(self.channel.save_dir, md5.new(self.url).hexdigest()+ext)
@ -567,8 +573,8 @@ class podcastItem(object):
return md5_filename
# If the md5 filename does not exist,
episode = util.file_extension_from_url(self.url, complete_filename=True)
episode = util.sanitize_filename(episode)
( episode, e ) = util.filename_from_url(self.url)
episode = util.sanitize_filename(episode) + ext
# If the episode filename looks suspicious,
# we still return the md5 filename to be on
@ -578,6 +584,14 @@ class podcastItem(object):
filename = os.path.join(self.channel.save_dir, episode)
return filename
def extension( self):
( filename, ext ) = util.filename_from_url(self.url)
# if we can't detect the extension from the url fallback on the mimetype
if ext == '' or util.file_type_by_extension(ext) is None:
ext = util.extension_from_mimetype(self.mimetype)
log('Getting extension from mimetype for: %s (mimetype: %s)' % (self.title, ext), sender=self)
return ext
def mark_new(self):
self.state = db.STATE_NORMAL
self.is_played = False
@ -604,7 +618,7 @@ class podcastItem(object):
return self.title
def file_type( self):
return util.file_type_by_extension( util.file_extension_from_url( self.url))
return util.file_type_by_extension( self.extension() )
@property
def basename( self):

View File

@ -54,6 +54,7 @@ import urllib
import urllib2
import httplib
import webbrowser
import mimetypes
import feedparser
@ -415,12 +416,17 @@ def torrent_filename( filename):
except:
return None
def file_extension_from_url(url, complete_filename=False):
def extension_from_mimetype(mimetype):
"""
Extracts the (lowercase) file name extension (with dot)
Simply guesses what the file extension should be from the mimetype
"""
return mimetypes.guess_extension(mimetype) or ''
def filename_from_url(url):
"""
Extracts the filename and (lowercase) extension (with dot)
from a URL, e.g. http://server.com/file.MP3?download=yes
will result in the string ".mp3" being returned.
will result in the string ("file", ".mp3") being returned.
This function will also try to best-guess the "real"
extension for a media file (audio, video, torrent) by
@ -428,43 +434,29 @@ def file_extension_from_url(url, complete_filename=False):
into the query string to find better matches, if the
original extension does not resolve to a known type.
If the optional parameter "complete_filename" is set to
True, this will not return the extension, but the
complete filename (basename) of the found media file.
http://my.net/redirect.php?my.net/file.ogg => ".ogg"
http://server/get.jsp?file=/episode0815.MOV => ".mov"
http://s/redirect.mp4?http://serv2/test.mp4 => ".mp4"
http://my.net/redirect.php?my.net/file.ogg => ("file", ".ogg")
http://server/get.jsp?file=/episode0815.MOV => ("episode0815", ".mov")
http://s/redirect.mp4?http://serv2/test.mp4 => ("test", ".mp4")
"""
(scheme, netloc, path, para, query, fragid) = urlparse.urlparse(url)
filename = os.path.basename( urllib.unquote(path))
(tmp, extension) = os.path.splitext(filename)
(filename, extension) = os.path.splitext(os.path.basename( urllib.unquote(path)))
if file_type_by_extension(extension) is not None and not \
query.startswith(scheme+'://'):
# We have found a valid extension (audio, video, torrent)
# and the query string doesn't look like a URL
if complete_filename:
return filename
else:
return extension.lower()
return ( filename, extension.lower() )
# If the query string looks like a possible URL, try that first
if len(query.strip()) > 0 and query.find('/') != -1:
query_url = '://'.join((scheme, urllib.unquote(query)))
query_extension = file_extension_from_url(query_url)
(query_filename, query_extension) = filename_from_url(query_url)
if file_type_by_extension(query_extension) is not None:
if complete_filename:
return os.path.basename(query_url)
else:
return query_extension
return os.path.splitext(os.path.basename(query_url))
# No exact match found, simply return the original extension
if complete_filename:
return filename
else:
return extension.lower()
# No exact match found, simply return the original filename & extension
return ( filename, extension.lower() )
def file_type_by_extension( extension):