gpodder/src/gpodder/util.py



#
# gPodder (a media aggregator / podcast client)
# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
# MA  02110-1301, USA.
#

#
#  util.py -- Misc utility functions
#  Thomas Perl <thp@perli.net> 2007-08-04
#

"""Miscellaneous helper functions for gPodder

This module provides helper and utility functions for gPodder that 
are not tied to any specific part of gPodder.

"""


from gpodder.liblogger import log

import os
import os.path

import re
import htmlentitydefs

import urlparse
import urllib


def make_directory( path):
    """
    Tries to create a directory if it does not exist already.
    Returns True if the directory exists after the function 
    call, False otherwise.
    """
    if os.path.isdir( path):
        return True

    try:
        os.makedirs( path)
    except:
        log( 'Could not create directory: %s', path)
        return False

    return True


def normalize_feed_url( url):
    """
    Converts any URL to http:// or ftp:// so that it can be 
    used with "wget". If the URL cannot be converted (invalid
    or unknown scheme), "None" is returned.
    """
    if not url or len( url) < 8:
        return None
    
    if url.startswith( 'http://') or url.startswith( 'ftp://'):
        return url

    if url.startswith( 'feed://'):
        return 'http://' + url[7:]

    return None


def username_password_from_url( url):
    """
    Returns a tuple (username,password) containing authentication
    data from the specified URL or (None,None) if no authentication
    data can be found in the URL.
    """
    (username, password) = (None, None)

    (scheme, netloc, path, params, query, fragment) = urlparse.urlparse( url)

    if '@' in netloc:
        (username, password) = netloc.split( '@', 1)[0].split( ':', 1)
        username = urllib.unquote( username)
        password = urllib.unquote( password)

    return (username, password)


def directory_is_writable( path):
    """
    Returns True if the specified directory exists and is writable
    by the current user.
    """
    return os.path.isdir( path) and os.access( path, os.W_OK)


def calculate_size( path):
    """
    Tries to calculate the size of a directory, including any 
    subdirectories found. The returned value might not be 
    correct if the user doesn't have appropriate permissions 
    to list all subdirectories of the given path.
    """
    if os.path.dirname( path) == '/':
        return 0L

    if os.path.isfile( path):
        return os.path.getsize( path)

    if os.path.isdir( path):
        sum = os.path.getsize( path)

        for item in os.listdir( path):
            try:
                sum += calculate_size( os.path.join( path, item))
            except:
                pass

        return sum

    return 0L


def format_filesize( bytesize, method = None):
    """
    Formats the given size in bytes to be human-readable, 
    either the most appropriate form (B, KB, MB, GB) or 
    a form specified as optional second parameter (e.g. "MB").
    """
    methods = {
        'GB': 1024.0 * 1024.0 * 1024.0,
        'MB': 1024.0 * 1024.0,
        'KB': 1024.0,
        'B':  1.0
    }

    bytesize = float( bytesize)

    if method not in methods:
        method = 'B'
        for trying in ( 'KB', 'MB', 'GB' ):
            if bytesize >= methods[trying]:
                method = trying

    return '%.2f %s' % ( bytesize / methods[method], method, )


def delete_file( path):
    """
    Tries to delete the given filename and silently 
    ignores deletion errors (if the file doesn't exist).
    Also deletes extracted cover files if they exist.
    """
    log( 'Trying to delete: %s', path)
    try:
        os.unlink( path)
        # if libipodsync extracted the cover file, remove it here
        cover_path = path + '.cover.jpg'
        if os.path.isfile( cover_path):
            os.unlink( cover_path)
    except:
        pass


def remove_html_tags( html):
    """
    Remove HTML tags from a string and replace numeric and
    named entities with the corresponding character, so the 
    HTML text can be displayed in a simple text view.
    """
    # strips html from a string (fix for <description> tags containing html)
    rexp = re.compile( "<[^>]*>")
    stripstr = rexp.sub( '', html)
    # replaces numeric entities with entity names
    dict = htmlentitydefs.codepoint2name
    for key in dict.keys():
        stripstr = stripstr.replace( '&#'+str(key)+';', '&'+unicode( dict[key], 'iso-8859-1')+';')
    # strips html entities
    dict = htmlentitydefs.entitydefs
    for key in dict.keys():
        stripstr = stripstr.replace( '&'+unicode(key,'iso-8859-1')+';', unicode(dict[key], 'iso-8859-1'))
    return stripstr


def torrent_filename( filename):
    """
    Checks if a file is a ".torrent" file by examining its 
    contents and searching for the file name of the file 
    to be downloaded.

    Returns the name of the file the ".torrent" will download 
    or None if no filename is found (the file is no ".torrent")
    """
    if not os.path.exists( filename):
        return None

    header = open( filename).readline()
    try:
        header.index( '6:pieces')
        name_length_pos = header.index('4:name') + 6

        colon_pos = header.find( ':', name_length_pos)
        name_length = int(header[name_length_pos:colon_pos]) + 1
        name = header[(colon_pos + 1):(colon_pos + name_length)]
        return name
    except:
        return None


def file_extension_from_url( url):
    """
    Extracts the (lowercase) file name extension (with dot)
    from a URL, e.g. http://server.com/file.MP3?download=yes
    will result in the string ".mp3" being returned.
    """
    path = urlparse.urlparse( url)[2]
    filename = urllib.unquote( os.path.basename( path))
    return os.path.splitext( filename)[1].lower()


def file_type_by_extension( extension):
    """
    Tries to guess the file type by looking up the filename 
    extension from a table of known file types. Will return 
    the type as string ("audio", "video" or "torrent") or 
    None if the file type cannot be determined.
    """
    types = {
            'audio': [ 'mp3', 'ogg', 'wav', 'wma', 'aac', 'm4a' ],
            'video': [ 'mp4', 'avi', 'mpg', 'mpeg', 'm4v', 'mov', 'divx' ],
            'torrent': [ 'torrent' ],
    }

    if extension[0] == '.':
        extension = extension[1:]

    for type in types:
        if extension in types[type]:
            return type
    
    return None
gpodder.gui module; gpodder.util module (mostly reorganization stuff) git-svn-id: svn://svn.berlios.de/gpodder/trunk@363 b0d088ad-0a06-0410-aad2-9ed5178a7e87 2007-08-07 20:11:31 +02:00

			`#`
			`# gPodder (a media aggregator / podcast client)`
			`# Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>`
			`#`
			`# This program is free software; you can redistribute it and/or`
			`# modify it under the terms of the GNU General Public License`
			`# as published by the Free Software Foundation; either version 2`
			`# of the License, or (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program; if not, write to the Free Software`
			`# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,`
			`# MA 02110-1301, USA.`
			`#`

			`#`
			`# util.py -- Misc utility functions`
			`# Thomas Perl <thp@perli.net> 2007-08-04`
			`#`

			`"""Miscellaneous helper functions for gPodder`

			`This module provides helper and utility functions for gPodder that`
			`are not tied to any specific part of gPodder.`

			`"""`


			`from gpodder.liblogger import log`

			`import os`
			`import os.path`

			`import re`
			`import htmlentitydefs`

			`import urlparse`
			`import urllib`


			`def make_directory( path):`
			`"""`
			`Tries to create a directory if it does not exist already.`
			`Returns True if the directory exists after the function`
			`call, False otherwise.`
			`"""`
			`if os.path.isdir( path):`
			`return True`

			`try:`
			`os.makedirs( path)`
			`except:`
			`log( 'Could not create directory: %s', path)`
			`return False`

			`return True`


			`def normalize_feed_url( url):`
			`"""`
			`Converts any URL to http:// or ftp:// so that it can be`
			`used with "wget". If the URL cannot be converted (invalid`
			`or unknown scheme), "None" is returned.`
			`"""`
			`if not url or len( url) < 8:`
			`return None`

			`if url.startswith( 'http://') or url.startswith( 'ftp://'):`
			`return url`

			`if url.startswith( 'feed://'):`
			`return 'http://' + url[7:]`

			`return None`


			`def username_password_from_url( url):`
			`"""`
			`Returns a tuple (username,password) containing authentication`
			`data from the specified URL or (None,None) if no authentication`
			`data can be found in the URL.`
			`"""`
			`(username, password) = (None, None)`

			`(scheme, netloc, path, params, query, fragment) = urlparse.urlparse( url)`

			`if '@' in netloc:`
			`(username, password) = netloc.split( '@', 1)[0].split( ':', 1)`
			`username = urllib.unquote( username)`
			`password = urllib.unquote( password)`

			`return (username, password)`


			`def directory_is_writable( path):`
			`"""`
			`Returns True if the specified directory exists and is writable`
			`by the current user.`
			`"""`
			`return os.path.isdir( path) and os.access( path, os.W_OK)`


			`def calculate_size( path):`
			`"""`
			`Tries to calculate the size of a directory, including any`
			`subdirectories found. The returned value might not be`
			`correct if the user doesn't have appropriate permissions`
			`to list all subdirectories of the given path.`
			`"""`
			`if os.path.dirname( path) == '/':`
			`return 0L`

			`if os.path.isfile( path):`
			`return os.path.getsize( path)`

			`if os.path.isdir( path):`
			`sum = os.path.getsize( path)`

			`for item in os.listdir( path):`
			`try:`
			`sum += calculate_size( os.path.join( path, item))`
			`except:`
			`pass`

			`return sum`

			`return 0L`


			`def format_filesize( bytesize, method = None):`
			`"""`
			`Formats the given size in bytes to be human-readable,`
			`either the most appropriate form (B, KB, MB, GB) or`
			`a form specified as optional second parameter (e.g. "MB").`
			`"""`
			`methods = {`
			`'GB': 1024.0 * 1024.0 * 1024.0,`
			`'MB': 1024.0 * 1024.0,`
			`'KB': 1024.0,`
			`'B': 1.0`
			`}`

			`bytesize = float( bytesize)`

			`if method not in methods:`
			`method = 'B'`
			`for trying in ( 'KB', 'MB', 'GB' ):`
			`if bytesize >= methods[trying]:`
			`method = trying`

			`return '%.2f %s' % ( bytesize / methods[method], method, )`


			`def delete_file( path):`
			`"""`
			`Tries to delete the given filename and silently`
			`ignores deletion errors (if the file doesn't exist).`
			`Also deletes extracted cover files if they exist.`
			`"""`
			`log( 'Trying to delete: %s', path)`
			`try:`
			`os.unlink( path)`
			`# if libipodsync extracted the cover file, remove it here`
			`cover_path = path + '.cover.jpg'`
			`if os.path.isfile( cover_path):`
			`os.unlink( cover_path)`
			`except:`
			`pass`


			`def remove_html_tags( html):`
			`"""`
			`Remove HTML tags from a string and replace numeric and`
			`named entities with the corresponding character, so the`
			`HTML text can be displayed in a simple text view.`
			`"""`
			`# strips html from a string (fix for <description> tags containing html)`
			`rexp = re.compile( "<[^>]*>")`
			`stripstr = rexp.sub( '', html)`
			`# replaces numeric entities with entity names`
			`dict = htmlentitydefs.codepoint2name`
			`for key in dict.keys():`
			`stripstr = stripstr.replace( '&#'+str(key)+';', '&'+unicode( dict[key], 'iso-8859-1')+';')`
			`# strips html entities`
			`dict = htmlentitydefs.entitydefs`
			`for key in dict.keys():`
			`stripstr = stripstr.replace( '&'+unicode(key,'iso-8859-1')+';', unicode(dict[key], 'iso-8859-1'))`
			`return stripstr`


libpodcasts restructuring, clean-up git-svn-id: svn://svn.berlios.de/gpodder/branches/gpodder-thp-200708@376 b0d088ad-0a06-0410-aad2-9ed5178a7e87 2007-08-22 01:00:49 +02:00			`def torrent_filename( filename):`
			`"""`
			`Checks if a file is a ".torrent" file by examining its`
			`contents and searching for the file name of the file`
			`to be downloaded.`

			`Returns the name of the file the ".torrent" will download`
			`or None if no filename is found (the file is no ".torrent")`
			`"""`
			`if not os.path.exists( filename):`
			`return None`

			`header = open( filename).readline()`
			`try:`
			`header.index( '6:pieces')`
			`name_length_pos = header.index('4:name') + 6`

			`colon_pos = header.find( ':', name_length_pos)`
			`name_length = int(header[name_length_pos:colon_pos]) + 1`
			`name = header[(colon_pos + 1):(colon_pos + name_length)]`
			`return name`
			`except:`
			`return None`


			`def file_extension_from_url( url):`
			`"""`
			`Extracts the (lowercase) file name extension (with dot)`
			`from a URL, e.g. http://server.com/file.MP3?download=yes`
			`will result in the string ".mp3" being returned.`
			`"""`
			`path = urlparse.urlparse( url)[2]`
			`filename = urllib.unquote( os.path.basename( path))`
			`return os.path.splitext( filename)[1].lower()`


			`def file_type_by_extension( extension):`
			`"""`
			`Tries to guess the file type by looking up the filename`
			`extension from a table of known file types. Will return`
			`the type as string ("audio", "video" or "torrent") or`
			`None if the file type cannot be determined.`
			`"""`
			`types = {`
			`'audio': [ 'mp3', 'ogg', 'wav', 'wma', 'aac', 'm4a' ],`
			`'video': [ 'mp4', 'avi', 'mpg', 'mpeg', 'm4v', 'mov', 'divx' ],`
			`'torrent': [ 'torrent' ],`
			`}`

			`if extension[0] == '.':`
			`extension = extension[1:]`

			`for type in types:`
			`if extension in types[type]:`
			`return type`

			`return None`