Better file extension guessing for URLs
git-svn-id: svn://svn.berlios.de/gpodder/trunk@461 b0d088ad-0a06-0410-aad2-9ed5178a7e87
This commit is contained in:
parent
5da37feed7
commit
9069d10158
|
@ -1,3 +1,12 @@
|
|||
Sun, 11 Nov 2007 14:24:17 +0100 <thp@perli.net>
|
||||
Better file extension guessing for URLs
|
||||
|
||||
* src/gpodder/util.py: Improve file_extension_from_url() by adding
|
||||
additional checks for known good extensions and recurse into the query
|
||||
string if it looks like an URL; this should fix compatibility problems
|
||||
for feeds with strange URLs; should provide more reliable guessing;
|
||||
thanks to Nicolas Quienot <niqooo@gmail.com> for the bug report
|
||||
|
||||
Fri, 09 Nov 2007 10:05:36 +0100 <thp@perli.net>
|
||||
Fix format_filesize() usage in episodes selector and podcastItem
|
||||
|
||||
|
|
|
@ -245,10 +245,34 @@ def file_extension_from_url( url):
|
|||
Extracts the (lowercase) file name extension (with dot)
|
||||
from a URL, e.g. http://server.com/file.MP3?download=yes
|
||||
will result in the string ".mp3" being returned.
|
||||
|
||||
This function will also try to best-guess the "real"
|
||||
extension for a media file (audio, video, torrent) by
|
||||
trying to match an extension to these types and recurse
|
||||
into the query string to find better matches, if the
|
||||
original extension does not resolve to a known type.
|
||||
|
||||
http://my.net/redirect.php?my.net/file.ogg => ".ogg"
|
||||
http://server/get.jsp?file=/episode0815.MOV => ".mov"
|
||||
"""
|
||||
path = urlparse.urlparse( url)[2]
|
||||
filename = urllib.unquote( os.path.basename( path))
|
||||
return os.path.splitext( filename)[1].lower()
|
||||
(scheme, netloc, path, para, query, fragid) = urlparse.urlparse(url)
|
||||
filename = os.path.basename( urllib.unquote(path))
|
||||
(filename, extension) = os.path.splitext(filename)
|
||||
|
||||
if file_type_by_extension(extension) != None:
|
||||
# We have found a valid extension (audio, video, torrent)
|
||||
return extension.lower()
|
||||
|
||||
# If the query string looks like a possible URL, try that first
|
||||
if len(query.strip()) > 0 and query.find('/') != -1:
|
||||
query_url = '://'.join((scheme, urllib.unquote(query)))
|
||||
query_extension = file_extension_from_url(query_url)
|
||||
|
||||
if file_type_by_extension(query_extension) != None:
|
||||
return query_extension
|
||||
|
||||
# No exact match found, simply return the original extension
|
||||
return extension.lower()
|
||||
|
||||
|
||||
def file_type_by_extension( extension):
|
||||
|
@ -270,6 +294,8 @@ def file_type_by_extension( extension):
|
|||
if extension[0] == '.':
|
||||
extension = extension[1:]
|
||||
|
||||
extension = extension.lower()
|
||||
|
||||
for type in types:
|
||||
if extension in types[type]:
|
||||
return type
|
||||
|
|
Loading…
Reference in New Issue