2007-08-29 20:30:26 +02:00
# -*- coding: utf-8 -*-
2007-08-07 20:11:31 +02:00
#
2007-08-29 20:30:26 +02:00
# gPodder - A media aggregator and podcast client
2018-01-28 19:39:53 +01:00
# Copyright (c) 2005-2018 The gPodder Team
2011-10-19 14:00:38 +02:00
# Copyright (c) 2011 Neal H. Walfield
2007-08-07 20:11:31 +02:00
#
2007-08-29 20:30:26 +02:00
# gPodder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
2007-08-07 20:11:31 +02:00
#
2007-08-29 20:30:26 +02:00
# gPodder is distributed in the hope that it will be useful,
2007-08-07 20:11:31 +02:00
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
2007-08-29 20:30:26 +02:00
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2007-08-07 20:11:31 +02:00
#
#
# util.py -- Misc utility functions
# Thomas Perl <thp@perli.net> 2007-08-04
#
""" Miscellaneous helper functions for gPodder
2018-02-01 07:59:22 +01:00
This module provides helper and utility functions for gPodder that
2007-08-07 20:11:31 +02:00
are not tied to any specific part of gPodder .
"""
2017-11-27 22:09:32 +01:00
import json
2012-02-05 17:16:20 +01:00
2008-01-15 14:54:22 +01:00
import gpodder
2011-07-15 16:32:06 +02:00
import logging
logger = logging . getLogger ( __name__ )
2007-08-07 20:11:31 +02:00
import os
import os . path
2009-05-09 15:21:04 +02:00
import platform
2007-11-05 00:24:36 +01:00
import glob
2007-12-10 09:41:17 +01:00
import stat
2009-07-13 15:32:46 +02:00
import shlex
2012-01-03 23:59:19 +01:00
import shutil
2010-01-28 17:39:10 +01:00
import socket
2010-08-23 22:48:36 +02:00
import sys
2011-08-08 13:06:20 +02:00
import string
2007-08-07 20:11:31 +02:00
import re
2008-01-21 10:52:09 +01:00
import subprocess
2016-11-21 23:13:46 +01:00
from html . entities import entitydefs
2007-08-30 20:49:53 +02:00
import time
2007-12-06 09:31:09 +01:00
import gzip
2007-12-10 09:41:17 +01:00
import datetime
2008-03-03 20:38:22 +01:00
import threading
2007-08-07 20:11:31 +02:00
2016-11-21 23:13:46 +01:00
import http . client
2008-03-03 20:38:22 +01:00
import webbrowser
2008-07-03 01:36:39 +02:00
import mimetypes
2011-08-07 23:39:46 +02:00
import itertools
2018-05-05 23:50:37 +02:00
import urllib . error
import urllib . parse
import urllib . request
2008-03-02 13:56:16 +01:00
2016-11-21 23:13:46 +01:00
import io
2007-12-06 09:31:09 +01:00
import xml . dom . minidom
2007-08-07 20:11:31 +02:00
2016-08-08 19:51:15 +02:00
import collections
if sys . hexversion < 0x03000000 :
2016-11-21 23:13:46 +01:00
from html . parser import HTMLParser
from html . entities import name2codepoint
2016-08-08 19:51:15 +02:00
else :
from html . parser import HTMLParser
2016-08-08 20:00:28 +02:00
from html . entities import name2codepoint
2016-08-08 19:51:15 +02:00
2016-08-11 09:50:20 +02:00
try :
import html5lib
except ImportError :
logger . warn ( ' html5lib not found, falling back to HTMLParser ' )
html5lib = None
2012-12-17 15:34:08 +01:00
if gpodder . ui . win32 :
2012-12-07 20:02:21 +01:00
try :
2018-05-02 10:59:35 +02:00
import gpodder . utilwin32ctypes as win32file
2012-12-07 20:02:21 +01:00
except ImportError :
2018-05-02 10:59:35 +02:00
logger . warn ( ' Running on Win32 but utilwin32ctypes can \' t be loaded. ' )
2012-12-07 20:02:21 +01:00
win32file = None
2009-05-07 16:26:07 +02:00
_ = gpodder . gettext
2009-12-21 23:18:00 +01:00
N_ = gpodder . ngettext
2009-05-07 16:26:07 +02:00
2007-08-07 20:11:31 +02:00
2010-09-26 23:42:05 +02:00
import locale
2011-03-03 13:09:23 +01:00
try :
locale . setlocale ( locale . LC_ALL , ' ' )
2016-11-21 23:13:46 +01:00
except Exception as e :
2011-07-15 16:32:06 +02:00
logger . warn ( ' Cannot set locale ( %s ) ' , e , exc_info = True )
2010-09-26 23:42:05 +02:00
2010-08-23 22:48:36 +02:00
# Native filesystem encoding detection
encoding = sys . getfilesystemencoding ( )
if encoding is None :
if ' LANG ' in os . environ and ' . ' in os . environ [ ' LANG ' ] :
lang = os . environ [ ' LANG ' ]
( language , encoding ) = lang . rsplit ( ' . ' , 1 )
2011-07-15 16:32:06 +02:00
logger . info ( ' Detected encoding: %s ' , encoding )
2012-11-18 19:26:02 +01:00
elif gpodder . ui . win32 :
2010-08-23 22:48:36 +02:00
# To quote http://docs.python.org/howto/unicode.html:
# ,,on Windows, Python uses the name "mbcs" to refer
# to whatever the currently configured encoding is``
encoding = ' mbcs '
else :
encoding = ' iso-8859-15 '
2011-07-15 16:32:06 +02:00
logger . info ( ' Assuming encoding: ISO-8859-15 ($LANG not set). ' )
2008-06-13 16:13:27 +02:00
2011-08-08 13:06:20 +02:00
# Filename / folder name sanitization
def _sanitize_char ( c ) :
if c in string . whitespace :
2016-11-21 23:13:46 +01:00
return b ' '
2011-08-08 13:06:20 +02:00
elif c in ' ,-.() ' :
2016-11-21 23:13:46 +01:00
return c . encode ( ' utf-8 ' )
elif c in string . punctuation or ord ( c ) < = 31 or ord ( c ) > = 127 :
return b ' _ '
2011-08-08 13:06:20 +02:00
2016-11-21 23:13:46 +01:00
return c . encode ( ' utf-8 ' )
2011-08-08 13:06:20 +02:00
2018-02-10 11:11:20 +01:00
2016-11-21 23:13:46 +01:00
SANITIZATION_TABLE = b ' ' . join ( map ( _sanitize_char , list ( map ( chr , list ( range ( 256 ) ) ) ) ) )
2011-08-08 13:06:20 +02:00
del _sanitize_char
2012-02-04 21:43:37 +01:00
_MIME_TYPE_LIST = [
( ' .aac ' , ' audio/aac ' ) ,
( ' .axa ' , ' audio/annodex ' ) ,
( ' .flac ' , ' audio/flac ' ) ,
( ' .m4b ' , ' audio/m4b ' ) ,
( ' .m4a ' , ' audio/mp4 ' ) ,
( ' .mp3 ' , ' audio/mpeg ' ) ,
( ' .spx ' , ' audio/ogg ' ) ,
( ' .oga ' , ' audio/ogg ' ) ,
( ' .ogg ' , ' audio/ogg ' ) ,
( ' .wma ' , ' audio/x-ms-wma ' ) ,
( ' .3gp ' , ' video/3gpp ' ) ,
( ' .axv ' , ' video/annodex ' ) ,
( ' .divx ' , ' video/divx ' ) ,
( ' .m4v ' , ' video/m4v ' ) ,
( ' .mp4 ' , ' video/mp4 ' ) ,
( ' .ogv ' , ' video/ogg ' ) ,
( ' .mov ' , ' video/quicktime ' ) ,
( ' .flv ' , ' video/x-flv ' ) ,
( ' .mkv ' , ' video/x-matroska ' ) ,
( ' .wmv ' , ' video/x-ms-wmv ' ) ,
2012-12-29 09:49:03 +01:00
( ' .opus ' , ' audio/opus ' ) ,
2012-02-04 21:43:37 +01:00
]
_MIME_TYPES = dict ( ( k , v ) for v , k in _MIME_TYPE_LIST )
_MIME_TYPES_EXT = dict ( _MIME_TYPE_LIST )
2009-09-08 17:29:55 +02:00
2018-05-27 20:09:40 +02:00
def make_directory ( path ) :
2007-08-07 20:11:31 +02:00
"""
Tries to create a directory if it does not exist already .
2018-02-01 07:59:22 +01:00
Returns True if the directory exists after the function
2007-08-07 20:11:31 +02:00
call , False otherwise .
"""
2018-05-27 20:09:40 +02:00
if os . path . isdir ( path ) :
2007-08-07 20:11:31 +02:00
return True
try :
2018-05-27 20:09:40 +02:00
os . makedirs ( path )
2007-08-07 20:11:31 +02:00
except :
2011-07-15 16:32:06 +02:00
logger . warn ( ' Could not create directory: %s ' , path )
2007-08-07 20:11:31 +02:00
return False
return True
2008-12-23 20:17:52 +01:00
def normalize_feed_url ( url ) :
2007-08-07 20:11:31 +02:00
"""
2018-02-01 07:59:22 +01:00
Converts any URL to http : / / or ftp : / / so that it can be
2007-08-07 20:11:31 +02:00
used with " wget " . If the URL cannot be converted ( invalid
or unknown scheme ) , " None " is returned .
2007-11-29 08:54:39 +01:00
2010-12-06 13:06:32 +01:00
This will also normalize feed : / / and itpc : / / to http : / / .
2008-09-30 22:07:06 +02:00
2009-11-03 19:15:16 +01:00
>> > normalize_feed_url ( ' itpc://example.org/podcast.rss ' )
' http://example.org/podcast.rss '
2008-09-30 22:07:06 +02:00
If no URL scheme is defined ( e . g . " curry.com " ) , we will
simply assume the user intends to add a http : / / feed .
2007-12-06 09:31:09 +01:00
2009-11-03 19:15:16 +01:00
>> > normalize_feed_url ( ' curry.com ' )
2011-05-09 12:34:23 +02:00
' http://curry.com/ '
2009-11-03 19:15:16 +01:00
There are even some more shortcuts for advanced users
and lazy typists ( see the source for details ) .
>> > normalize_feed_url ( ' fb:43FPodcast ' )
2010-01-07 22:07:14 +01:00
' http://feeds.feedburner.com/43FPodcast '
2011-05-07 18:52:33 +02:00
It will also take care of converting the domain name to
all - lowercase ( because domains are not case sensitive ) :
>> > normalize_feed_url ( ' http://Example.COM/ ' )
' http://example.com/ '
Some other minimalistic changes are also taken care of ,
e . g . a ? with an empty query is removed :
>> > normalize_feed_url ( ' http://example.org/test? ' )
' http://example.org/test '
2014-08-25 12:57:41 +02:00
Username and password in the URL must not be affected
by URL normalization ( see gPodder bug 1942 ) :
>> > normalize_feed_url ( ' http://UserName:PassWord@Example.com/ ' )
' http://UserName:PassWord@example.com/ '
2009-11-03 19:15:16 +01:00
"""
2008-12-23 20:17:52 +01:00
if not url or len ( url ) < 8 :
2007-08-07 20:11:31 +02:00
return None
2007-12-06 09:31:09 +01:00
2009-11-03 19:15:16 +01:00
# This is a list of prefixes that you can use to minimize the amount of
# keystrokes that you have to use.
# Feel free to suggest other useful prefixes, and I'll add them here.
PREFIXES = {
2010-01-07 22:07:14 +01:00
' fb: ' : ' http://feeds.feedburner.com/ %s ' ,
2009-11-04 14:58:23 +01:00
' yt: ' : ' http://www.youtube.com/rss/user/ %s /videos.rss ' ,
2017-03-10 02:26:50 +01:00
' sc: ' : ' https://soundcloud.com/ %s ' ,
2011-08-05 23:58:55 +02:00
# YouTube playlists. To get a list of playlists per-user, use:
# https://gdata.youtube.com/feeds/api/users/<username>/playlists
' ytpl: ' : ' http://gdata.youtube.com/feeds/api/playlists/ %s ' ,
2009-11-03 19:15:16 +01:00
}
2016-11-21 23:13:46 +01:00
for prefix , expansion in PREFIXES . items ( ) :
2009-11-03 19:15:16 +01:00
if url . startswith ( prefix ) :
url = expansion % ( url [ len ( prefix ) : ] , )
break
2008-12-23 20:17:52 +01:00
# Assume HTTP for URLs without scheme
2018-02-06 15:19:08 +01:00
if ' :// ' not in url :
2008-09-30 22:07:06 +02:00
url = ' http:// ' + url
2016-11-21 23:13:46 +01:00
scheme , netloc , path , query , fragment = urllib . parse . urlsplit ( url )
2011-05-07 18:52:33 +02:00
2014-08-25 12:57:41 +02:00
# Domain name is case insensitive, but username/password is not (bug 1942)
if ' @ ' in netloc :
authentication , netloc = netloc . rsplit ( ' @ ' , 1 )
netloc = ' @ ' . join ( ( authentication , netloc . lower ( ) ) )
else :
netloc = netloc . lower ( )
2011-05-07 18:52:33 +02:00
# Schemes and domain names are case insensitive
2014-08-25 12:57:41 +02:00
scheme = scheme . lower ( )
2008-12-23 20:17:52 +01:00
2011-05-09 12:34:23 +02:00
# Normalize empty paths to "/"
if path == ' ' :
path = ' / '
2008-12-23 20:17:52 +01:00
# feed://, itpc:// and itms:// are really http://
if scheme in ( ' feed ' , ' itpc ' , ' itms ' ) :
scheme = ' http '
2011-05-07 18:52:33 +02:00
if scheme not in ( ' http ' , ' https ' , ' ftp ' , ' file ' ) :
return None
2007-08-07 20:11:31 +02:00
2011-05-07 18:52:33 +02:00
# urlunsplit might return "a slighty different, but equivalent URL"
2016-11-21 23:13:46 +01:00
return urllib . parse . urlunsplit ( ( scheme , netloc , path , query , fragment ) )
2007-08-07 20:11:31 +02:00
2009-03-24 20:44:37 +01:00
def username_password_from_url ( url ) :
r """
2007-08-07 20:11:31 +02:00
Returns a tuple ( username , password ) containing authentication
data from the specified URL or ( None , None ) if no authentication
data can be found in the URL .
2009-03-24 20:44:37 +01:00
See Section 3.1 of RFC 1738 ( http : / / www . ietf . org / rfc / rfc1738 . txt )
>> > username_password_from_url ( ' https://@host.com/ ' )
( ' ' , None )
>> > username_password_from_url ( ' telnet://host.com/ ' )
( None , None )
>> > username_password_from_url ( ' ftp://foo:@host.com/ ' )
( ' foo ' , ' ' )
>> > username_password_from_url ( ' http://a:b@host.com/ ' )
( ' a ' , ' b ' )
>> > username_password_from_url ( 1 )
Traceback ( most recent call last ) :
. . .
2016-11-21 23:32:50 +01:00
ValueError : URL has to be a string .
2009-03-24 20:44:37 +01:00
>> > username_password_from_url ( None )
Traceback ( most recent call last ) :
. . .
2016-11-21 23:32:50 +01:00
ValueError : URL has to be a string .
2009-03-24 20:44:37 +01:00
>> > username_password_from_url ( ' http://a@b:c@host.com/ ' )
2012-01-09 14:41:03 +01:00
( ' a@b ' , ' c ' )
2009-03-24 20:44:37 +01:00
>> > username_password_from_url ( ' ftp://a:b:c@host.com/ ' )
2012-01-09 14:41:03 +01:00
( ' a ' , ' b:c ' )
2009-03-24 20:44:37 +01:00
>> > username_password_from_url ( ' http://i %2F o:P %40s s % 3A@host.com/ ' )
( ' i/o ' , ' P@ss: ' )
>> > username_password_from_url ( ' ftp:// % C3 % B6sterreich@host.com/ ' )
2016-11-21 23:32:50 +01:00
( ' österreich ' , None )
2010-07-02 21:37:59 +02:00
>> > username_password_from_url ( ' http://w %20x :y % 20z@example.org/ ' )
( ' w x ' , ' y z ' )
2012-01-09 14:41:03 +01:00
>> > username_password_from_url ( ' http://example.com/x@y:z@test.com/ ' )
( None , None )
2007-08-07 20:11:31 +02:00
"""
2016-11-21 23:32:50 +01:00
if not isinstance ( url , str ) :
raise ValueError ( ' URL has to be a string. ' )
2009-03-24 20:44:37 +01:00
2007-08-07 20:11:31 +02:00
( username , password ) = ( None , None )
2016-11-21 23:13:46 +01:00
( scheme , netloc , path , params , query , fragment ) = urllib . parse . urlparse ( url )
2007-08-07 20:11:31 +02:00
if ' @ ' in netloc :
2007-12-23 12:23:48 +01:00
( authentication , netloc ) = netloc . rsplit ( ' @ ' , 1 )
if ' : ' in authentication :
( username , password ) = authentication . split ( ' : ' , 1 )
2012-01-09 14:41:03 +01:00
# RFC1738 dictates that we should not allow ['/', '@', ':']
# characters in the username and password field (Section 3.1):
#
# 1. The "/" can't be in there at this point because of the way
# urlparse (which we use above) works.
# 2. Due to gPodder bug 1521, we allow "@" in the username and
# password field. We use netloc.rsplit('@', 1), which will
# make sure that we split it at the last '@' in netloc.
# 3. The colon must be excluded (RFC2617, Section 2) in the
# username, but is apparently allowed in the password. This
# is handled by the authentication.split(':', 1) above, and
# will cause any extraneous ':'s to be part of the password.
2016-11-21 23:13:46 +01:00
username = urllib . parse . unquote ( username )
password = urllib . parse . unquote ( password )
2007-12-23 12:23:48 +01:00
else :
2016-11-21 23:13:46 +01:00
username = urllib . parse . unquote ( authentication )
2007-08-07 20:11:31 +02:00
return ( username , password )
2018-02-11 00:22:00 +01:00
2012-07-02 05:53:33 +02:00
def directory_is_writable ( path ) :
"""
Returns True if the specified directory exists and is writable
by the current user .
"""
return os . path . isdir ( path ) and os . access ( path , os . W_OK )
2007-08-07 20:11:31 +02:00
2018-05-27 20:09:40 +02:00
def calculate_size ( path ) :
2007-08-07 20:11:31 +02:00
"""
2018-02-01 07:59:22 +01:00
Tries to calculate the size of a directory , including any
subdirectories found . The returned value might not be
correct if the user doesn ' t have appropriate permissions
2007-08-07 20:11:31 +02:00
to list all subdirectories of the given path .
"""
2008-01-30 09:25:11 +01:00
if path is None :
2016-11-21 23:13:46 +01:00
return 0
2008-01-30 09:25:11 +01:00
2018-05-27 20:09:40 +02:00
if os . path . dirname ( path ) == ' / ' :
2016-11-21 23:13:46 +01:00
return 0
2007-08-07 20:11:31 +02:00
2018-05-27 20:09:40 +02:00
if os . path . isfile ( path ) :
return os . path . getsize ( path )
2007-08-07 20:11:31 +02:00
2018-05-27 20:09:40 +02:00
if os . path . isdir ( path ) and not os . path . islink ( path ) :
sum = os . path . getsize ( path )
2007-08-07 20:11:31 +02:00
2008-05-12 11:07:05 +02:00
try :
for item in os . listdir ( path ) :
try :
sum + = calculate_size ( os . path . join ( path , item ) )
except :
2011-07-15 16:32:06 +02:00
logger . warn ( ' Cannot get size for %s ' , path , exc_info = True )
2008-05-12 11:07:05 +02:00
except :
2011-07-15 16:32:06 +02:00
logger . warn ( ' Cannot access %s ' , path , exc_info = True )
2007-08-07 20:11:31 +02:00
return sum
2016-11-21 23:13:46 +01:00
return 0
2007-08-07 20:11:31 +02:00
2007-12-10 09:41:17 +01:00
def file_modification_datetime ( filename ) :
"""
Returns the modification date of the specified file
as a datetime . datetime object or None if the modification
date cannot be determined .
"""
2008-01-30 09:48:20 +01:00
if filename is None :
return None
2007-12-10 09:41:17 +01:00
if not os . access ( filename , os . R_OK ) :
return None
try :
s = os . stat ( filename )
timestamp = s [ stat . ST_MTIME ]
return datetime . datetime . fromtimestamp ( timestamp )
except :
2011-07-15 16:32:06 +02:00
logger . warn ( ' Cannot get mtime for %s ' , filename , exc_info = True )
2007-12-10 09:41:17 +01:00
return None
2008-01-28 12:38:53 +01:00
def file_age_in_days ( filename ) :
"""
Returns the age of the specified filename in days or
zero if the modification date cannot be determined .
"""
dt = file_modification_datetime ( filename )
if dt is None :
return 0
else :
2018-03-27 21:40:36 +02:00
return ( datetime . datetime . now ( ) - dt ) . days
2008-01-28 12:38:53 +01:00
2018-02-11 00:22:00 +01:00
2012-07-02 05:53:33 +02:00
def file_modification_timestamp ( filename ) :
"""
Returns the modification date of the specified file as a number
or - 1 if the modification date cannot be determined .
"""
if filename is None :
return - 1
try :
s = os . stat ( filename )
return s [ stat . ST_MTIME ]
except :
logger . warn ( ' Cannot get modification timestamp for %s ' , filename )
return - 1
2008-01-28 12:38:53 +01:00
def file_age_to_string ( days ) :
"""
Converts a " number of days " value to a string that
can be used in the UI to display the file age .
>> > file_age_to_string ( 0 )
' '
>> > file_age_to_string ( 1 )
2016-11-21 23:32:50 +01:00
' 1 day ago '
2009-02-25 14:23:03 +01:00
>> > file_age_to_string ( 2 )
2016-11-21 23:32:50 +01:00
' 2 days ago '
2008-01-28 12:38:53 +01:00
"""
2009-12-21 23:18:00 +01:00
if days < 1 :
2008-01-28 12:38:53 +01:00
return ' '
2009-12-21 23:18:00 +01:00
else :
2010-11-22 21:52:58 +01:00
return N_ ( ' %(count)d day ago ' , ' %(count)d days ago ' , days ) % { ' count ' : days }
2008-01-28 12:38:53 +01:00
2012-12-07 20:02:21 +01:00
def is_system_file ( filename ) :
"""
Checks to see if the given file is a system file .
"""
2012-12-17 15:34:08 +01:00
if gpodder . ui . win32 and win32file is not None :
2012-12-07 20:02:21 +01:00
result = win32file . GetFileAttributes ( filename )
2018-05-27 10:05:11 +02:00
# -1 / 0xffffffff is returned by GetFileAttributes when an error occurs
2018-05-16 18:17:52 +02:00
# 0x4 is the FILE_ATTRIBUTE_SYSTEM constant
2018-05-02 10:59:35 +02:00
return result != - 1 and result != 0xffffffff and result & 0x4 != 0
2012-12-07 20:02:21 +01:00
else :
return False
2009-07-12 16:14:15 +02:00
def get_free_disk_space_win32 ( path ) :
"""
Win32 - specific code to determine the free disk space remaining
for a given path . Uses code from :
http : / / mail . python . org / pipermail / python - list / 2003 - May / 203223. html
"""
2012-12-07 20:02:21 +01:00
if win32file is None :
# Cannot determine free disk space
2015-12-16 09:33:21 +01:00
return - 1
2009-07-12 16:14:15 +02:00
drive , tail = os . path . splitdrive ( path )
2012-12-07 20:02:21 +01:00
userFree , userTotal , freeOnDisk = win32file . GetDiskFreeSpaceEx ( drive )
return userFree
2009-07-12 16:14:15 +02:00
2007-11-15 11:10:21 +01:00
def get_free_disk_space ( path ) :
"""
Calculates the free disk space available to the current user
on the file system that contains the given path .
2007-12-01 15:20:57 +01:00
If the path ( or its parent folder ) does not yet exist , this
function returns zero .
2007-11-15 11:10:21 +01:00
"""
2007-12-01 15:20:57 +01:00
if not os . path . exists ( path ) :
2015-12-16 09:33:21 +01:00
return - 1
2007-12-01 15:20:57 +01:00
2012-11-18 19:26:02 +01:00
if gpodder . ui . win32 :
2009-07-12 16:14:15 +02:00
return get_free_disk_space_win32 ( path )
2007-11-15 11:10:21 +01:00
s = os . statvfs ( path )
return s . f_bavail * s . f_bsize
2008-04-19 19:01:09 +02:00
def format_date ( timestamp ) :
"""
Converts a UNIX timestamp to a date representation . This
function returns " Today " , " Yesterday " , a weekday name or
the date in % x format , which ( according to the Python docs )
is the " Locale ' s appropriate date representation " .
Returns None if there has been an error converting the
timestamp to a string representation .
"""
2008-07-10 23:50:28 +02:00
if timestamp is None :
return None
2018-03-27 21:40:36 +02:00
seconds_in_a_day = 60 * 60 * 24
2008-07-14 18:46:59 +02:00
today = time . localtime ( ) [ : 3 ]
yesterday = time . localtime ( time . time ( ) - seconds_in_a_day ) [ : 3 ]
2009-02-14 16:31:53 +01:00
try :
timestamp_date = time . localtime ( timestamp ) [ : 3 ]
2016-11-21 23:13:46 +01:00
except ValueError as ve :
2011-07-15 16:32:06 +02:00
logger . warn ( ' Cannot convert timestamp ' , exc_info = True )
2009-02-14 16:31:53 +01:00
return None
2016-11-21 23:13:46 +01:00
except TypeError as te :
2015-02-15 11:55:32 +01:00
logger . warn ( ' Cannot convert timestamp ' , exc_info = True )
return None
2008-07-14 18:46:59 +02:00
if timestamp_date == today :
2018-03-01 22:25:02 +01:00
return _ ( ' Today ' )
2008-07-14 18:46:59 +02:00
elif timestamp_date == yesterday :
2018-03-01 22:25:02 +01:00
return _ ( ' Yesterday ' )
2018-01-30 14:04:28 +01:00
2008-04-19 19:01:09 +02:00
try :
2018-05-27 20:09:40 +02:00
diff = int ( ( time . time ( ) - timestamp ) / seconds_in_a_day )
2008-04-19 19:01:09 +02:00
except :
2011-07-15 16:32:06 +02:00
logger . warn ( ' Cannot convert " %s " to date. ' , timestamp , exc_info = True )
2008-04-19 19:01:09 +02:00
return None
2008-07-14 18:46:59 +02:00
2009-08-24 01:13:10 +02:00
try :
timestamp = datetime . datetime . fromtimestamp ( timestamp )
except :
return None
2008-07-14 18:46:59 +02:00
if diff < 7 :
2008-04-19 19:01:09 +02:00
# Weekday name
2016-11-21 23:13:46 +01:00
return timestamp . strftime ( ' % A ' )
2008-04-19 19:01:09 +02:00
else :
# Locale's appropriate date representation
2016-11-21 23:13:46 +01:00
return timestamp . strftime ( ' %x ' )
2008-04-19 19:01:09 +02:00
2008-02-06 10:29:56 +01:00
def format_filesize ( bytesize , use_si_units = False , digits = 2 ) :
2007-08-07 20:11:31 +02:00
"""
2018-02-01 07:59:22 +01:00
Formats the given size in bytes to be human - readable ,
2007-08-27 00:04:50 +02:00
Returns a localized " (unknown) " string when the bytesize
has a negative value .
2007-08-07 20:11:31 +02:00
"""
2007-11-03 14:18:59 +01:00
si_units = (
2018-05-27 20:09:40 +02:00
( ' kB ' , 10 * * 3 ) ,
( ' MB ' , 10 * * 6 ) ,
( ' GB ' , 10 * * 9 ) ,
2007-11-03 14:18:59 +01:00
)
binary_units = (
2018-05-27 20:09:40 +02:00
( ' KiB ' , 2 * * 10 ) ,
( ' MiB ' , 2 * * 20 ) ,
( ' GiB ' , 2 * * 30 ) ,
2007-11-03 14:18:59 +01:00
)
2007-08-07 20:11:31 +02:00
2007-09-02 14:27:38 +02:00
try :
2018-05-27 20:09:40 +02:00
bytesize = float ( bytesize )
2007-09-02 14:27:38 +02:00
except :
return _ ( ' (unknown) ' )
2007-08-07 20:11:31 +02:00
2007-08-27 00:04:50 +02:00
if bytesize < 0 :
return _ ( ' (unknown) ' )
2007-11-03 14:18:59 +01:00
if use_si_units :
units = si_units
else :
units = binary_units
2018-05-27 20:09:40 +02:00
( used_unit , used_value ) = ( ' B ' , bytesize )
2007-11-03 14:18:59 +01:00
2018-05-27 20:09:40 +02:00
for ( unit , value ) in units :
2007-11-03 14:18:59 +01:00
if bytesize > = value :
used_value = bytesize / float ( value )
used_unit = unit
2007-08-07 20:11:31 +02:00
2018-03-27 21:40:36 +02:00
return ( ' % . ' + str ( digits ) + ' f %s ' ) % ( used_value , used_unit )
2007-08-07 20:11:31 +02:00
2010-04-30 13:22:50 +02:00
def delete_file ( filename ) :
""" Delete a file from the filesystem
Errors ( permissions errors or file not found )
are silently ignored .
2007-08-07 20:11:31 +02:00
"""
try :
2010-04-30 13:22:50 +02:00
os . remove ( filename )
2007-08-07 20:11:31 +02:00
except :
pass
2017-04-17 19:19:25 +02:00
def is_html ( text ) :
""" Heuristically tell if text is HTML
By looking for an open tag ( more or less : )
>> > is_html ( ' <h1>HELLO</h1> ' )
True
>> > is_html ( ' a < b < c ' )
False
"""
2017-04-18 16:45:38 +02:00
html_test = re . compile ( ' <[a-z][a-z0-9]*(?: \ s.*?>| \ /?>) ' , re . IGNORECASE | re . DOTALL )
return bool ( html_test . search ( text ) )
2017-04-17 19:19:25 +02:00
2008-03-20 11:08:58 +01:00
def remove_html_tags ( html ) :
2007-08-07 20:11:31 +02:00
"""
Remove HTML tags from a string and replace numeric and
2018-02-01 07:59:22 +01:00
named entities with the corresponding character , so the
2007-08-07 20:11:31 +02:00
HTML text can be displayed in a simple text view .
"""
2010-08-16 16:24:29 +02:00
if html is None :
return None
2008-03-20 11:08:58 +01:00
# If we would want more speed, we could make these global
re_strip_tags = re . compile ( ' <[^>]*> ' )
re_unicode_entities = re . compile ( ' &#( \ d { 2,4}); ' )
re_html_entities = re . compile ( ' &(. { 2,8}); ' )
2008-06-15 14:28:24 +02:00
re_newline_tags = re . compile ( ' (<br[^>]*>|<[/]?ul[^>]*>|</li>) ' , re . I )
re_listing_tags = re . compile ( ' <li[^>]*> ' , re . I )
result = html
2018-01-30 14:04:28 +01:00
2008-06-15 14:28:24 +02:00
# Convert common HTML elements to their text equivalent
result = re_newline_tags . sub ( ' \n ' , result )
result = re_listing_tags . sub ( ' \n * ' , result )
result = re . sub ( ' <[Pp]> ' , ' \n \n ' , result )
2008-03-20 11:08:58 +01:00
# Remove all HTML/XML tags from the string
2008-06-15 14:28:24 +02:00
result = re_strip_tags . sub ( ' ' , result )
2008-03-20 11:08:58 +01:00
# Convert numeric XML entities to their unicode character
2016-11-21 23:13:46 +01:00
result = re_unicode_entities . sub ( lambda x : chr ( int ( x . group ( 1 ) ) ) , result )
2008-03-20 11:08:58 +01:00
# Convert named HTML entities to their unicode character
2016-11-21 23:19:20 +01:00
result = re_html_entities . sub ( lambda x : entitydefs . get ( x . group ( 1 ) , ' ' ) , result )
2018-01-30 14:04:28 +01:00
2008-06-15 14:28:24 +02:00
# Convert more than two newlines to two newlines
result = re . sub ( ' ([ \r \n ] {2} )([ \r \n ])+ ' , ' \\ 1 ' , result )
2008-03-20 11:08:58 +01:00
2008-03-29 16:33:18 +01:00
return result . strip ( )
2007-08-07 20:11:31 +02:00
2016-08-11 09:47:30 +02:00
class HyperlinkExtracter ( object ) :
2016-08-08 19:51:15 +02:00
def __init__ ( self ) :
self . parts = [ ]
self . target_stack = [ None ]
2016-08-11 09:47:30 +02:00
def get_result ( self ) :
2016-08-08 19:51:15 +02:00
# Group together multiple consecutive parts with same link target,
# and remove excessive newlines.
2016-08-11 09:47:30 +02:00
group_it = itertools . groupby ( self . parts , key = lambda x : x [ 0 ] )
result = [ ]
2016-08-08 19:51:15 +02:00
for target , parts in group_it :
2016-11-21 23:13:46 +01:00
t = ' ' . join ( text for _ , text in parts if text is not None )
2016-08-08 19:51:15 +02:00
# Remove trailing spaces
t = re . sub ( ' + \n ' , ' \n ' , t )
# Convert more than two newlines to two newlines
t = t . replace ( ' \r ' , ' ' )
t = re . sub ( r ' \ n \ n \ n+ ' , ' \n \n ' , t )
2016-08-11 09:47:30 +02:00
result . append ( ( target , t ) )
# Strip leading and trailing whitespace
result [ 0 ] = ( result [ 0 ] [ 0 ] , result [ 0 ] [ 1 ] . lstrip ( ) )
result [ - 1 ] = ( result [ - 1 ] [ 0 ] , result [ - 1 ] [ 1 ] . rstrip ( ) )
return result
2016-08-08 19:51:15 +02:00
def htmlws ( self , s ) :
# Replace whitespaces with a single space per HTML spec.
if s is not None :
return re . sub ( r ' [ \ t \ n \ r]+ ' , ' ' , s )
2016-08-11 09:47:30 +02:00
def handle_starttag ( self , tag_name , attrs ) :
2016-08-08 19:51:15 +02:00
try :
2016-08-11 09:47:30 +02:00
handler = getattr ( self , ' handle_start_ ' + tag_name )
2016-08-08 19:51:15 +02:00
except AttributeError :
pass
else :
2016-08-11 09:47:30 +02:00
handler ( collections . OrderedDict ( attrs ) )
2016-08-08 19:51:15 +02:00
2016-08-11 09:47:30 +02:00
def handle_endtag ( self , tag_name ) :
2016-08-08 19:51:15 +02:00
try :
2016-08-11 09:47:30 +02:00
handler = getattr ( self , ' handle_end_ ' + tag_name )
2016-08-08 19:51:15 +02:00
except AttributeError :
pass
else :
2016-08-11 09:47:30 +02:00
handler ( )
2016-08-08 19:51:15 +02:00
def handle_start_a ( self , attrs ) :
self . target_stack . append ( attrs . get ( ' href ' ) )
def handle_end_a ( self ) :
if len ( self . target_stack ) > 1 :
self . target_stack . pop ( )
def output ( self , text ) :
self . parts . append ( ( self . target_stack [ - 1 ] , text ) )
def handle_data ( self , data ) :
self . output ( self . htmlws ( data ) )
2016-08-08 20:00:28 +02:00
def handle_entityref ( self , name ) :
2016-11-21 23:13:46 +01:00
c = chr ( name2codepoint [ name ] )
2016-08-08 20:00:28 +02:00
self . output ( c )
def handle_charref ( self , name ) :
if name . startswith ( ' x ' ) :
2016-11-21 23:13:46 +01:00
c = chr ( int ( name [ 1 : ] , 16 ) )
2016-08-08 20:00:28 +02:00
else :
2016-11-21 23:13:46 +01:00
c = chr ( int ( name ) )
2016-08-08 20:00:28 +02:00
self . output ( c )
2016-08-08 19:51:15 +02:00
def output_newline ( self , attrs = None ) :
self . output ( ' \n ' )
def output_double_newline ( self , attrs = None ) :
self . output ( ' \n ' )
def handle_start_img ( self , attrs ) :
self . output ( self . htmlws ( attrs . get ( ' alt ' , ' ' ) ) )
def handle_start_li ( self , attrs ) :
self . output ( ' \n * ' )
handle_end_li = handle_end_ul = handle_start_br = output_newline
handle_start_p = handle_end_p = output_double_newline
2016-08-11 09:47:30 +02:00
class ExtractHyperlinkedText ( object ) :
2016-08-11 09:53:36 +02:00
def __call__ ( self , document ) :
2016-08-11 09:47:30 +02:00
self . extracter = HyperlinkExtracter ( )
self . visit ( document )
return self . extracter . get_result ( )
def visit ( self , element ) :
NS = ' { http://www.w3.org/1999/xhtml} '
tag_name = ( element . tag [ len ( NS ) : ] if element . tag . startswith ( NS ) else element . tag ) . lower ( )
2016-11-21 23:13:46 +01:00
self . extracter . handle_starttag ( tag_name , list ( element . items ( ) ) )
2016-08-11 09:47:30 +02:00
if element . text is not None :
self . extracter . handle_data ( element . text )
for child in element :
self . visit ( child )
if child . tail is not None :
self . extracter . handle_data ( child . tail )
self . extracter . handle_endtag ( tag_name )
2016-08-21 22:57:59 +02:00
class ExtractHyperlinkedTextHTMLParser ( HTMLParser ) :
2016-08-11 09:47:30 +02:00
def __call__ ( self , html ) :
self . extracter = HyperlinkExtracter ( )
self . target_stack = [ None ]
self . feed ( html )
self . close ( )
return self . extracter . get_result ( )
def handle_starttag ( self , tag , attrs ) :
self . extracter . handle_starttag ( tag , attrs )
def handle_endtag ( self , tag ) :
self . extracter . handle_endtag ( tag )
def handle_data ( self , data ) :
self . extracter . handle_data ( data )
def handle_entityref ( self , name ) :
self . extracter . handle_entityref ( name )
def handle_charref ( self , name ) :
self . extracter . handle_charref ( name )
def extract_hyperlinked_text ( html ) :
"""
Convert HTML to hyperlinked text .
The output is a list of ( target , text ) tuples , where target is either a URL
or None , and text is a piece of plain text for rendering in a TextView .
"""
2016-08-08 19:51:15 +02:00
if ' < ' not in html :
# Probably plain text. We would remove all the newlines
# if we treated it as HTML, so just pass it back as-is.
return [ ( None , html ) ]
2016-08-11 09:47:30 +02:00
if html5lib is not None :
2016-08-11 09:53:36 +02:00
return ExtractHyperlinkedText ( ) ( html5lib . parseFragment ( html ) )
2016-08-08 19:51:29 +02:00
else :
2016-08-21 22:57:59 +02:00
return ExtractHyperlinkedTextHTMLParser ( ) ( html )
2016-08-08 19:50:51 +02:00
2010-04-09 02:28:11 +02:00
def wrong_extension ( extension ) :
"""
Determine if a given extension looks like it ' s
wrong ( e . g . empty , extremely long or spaces )
Returns True if the extension most likely is a
wrong one and should be replaced .
>> > wrong_extension ( ' .mp3 ' )
False
>> > wrong_extension ( ' .divx ' )
False
>> > wrong_extension ( ' mp3 ' )
True
>> > wrong_extension ( ' ' )
True
>> > wrong_extension ( ' .12 - Everybody ' )
True
>> > wrong_extension ( ' .mp3 ' )
True
>> > wrong_extension ( ' . ' )
True
>> > wrong_extension ( ' .42 ' )
True
"""
if not extension :
return True
elif len ( extension ) > 5 :
return True
elif ' ' in extension :
return True
elif extension == ' . ' :
return True
elif not extension . startswith ( ' . ' ) :
return True
else :
try :
# ".<number>" is an invalid extension
float ( extension )
return True
except :
pass
return False
2008-07-03 01:36:39 +02:00
def extension_from_mimetype ( mimetype ) :
"""
Simply guesses what the file extension should be from the mimetype
2012-02-04 21:43:37 +01:00
>> > extension_from_mimetype ( ' audio/mp4 ' )
' .m4a '
>> > extension_from_mimetype ( ' audio/ogg ' )
' .ogg '
>> > extension_from_mimetype ( ' audio/mpeg ' )
' .mp3 '
>> > extension_from_mimetype ( ' video/x-matroska ' )
' .mkv '
>> > extension_from_mimetype ( ' wrong-mimetype ' )
' '
2008-07-03 01:36:39 +02:00
"""
2012-02-04 21:43:37 +01:00
if mimetype in _MIME_TYPES :
return _MIME_TYPES [ mimetype ]
2008-07-03 01:36:39 +02:00
return mimetypes . guess_extension ( mimetype ) or ' '
2007-08-22 01:00:49 +02:00
2009-02-25 14:23:03 +01:00
2012-02-04 21:43:37 +01:00
def mimetype_from_extension ( extension ) :
"""
Simply guesses what the mimetype should be from the file extension
>> > mimetype_from_extension ( ' .m4a ' )
' audio/mp4 '
>> > mimetype_from_extension ( ' .ogg ' )
' audio/ogg '
>> > mimetype_from_extension ( ' .mp3 ' )
' audio/mpeg '
>> > mimetype_from_extension ( ' .mkv ' )
' video/x-matroska '
2013-03-05 16:34:43 +01:00
>> > mimetype_from_extension ( ' ._invalid_file_extension_ ' )
2012-02-04 21:43:37 +01:00
' '
"""
if extension in _MIME_TYPES_EXT :
return _MIME_TYPES_EXT [ extension ]
# Need to prepend something to the extension, so guess_type works
2018-03-27 21:40:36 +02:00
type , encoding = mimetypes . guess_type ( ' file ' + extension )
2012-02-04 21:43:37 +01:00
return type or ' '
2009-02-25 14:23:03 +01:00
def extension_correct_for_mimetype ( extension , mimetype ) :
"""
Check if the given filename extension ( e . g . " .ogg " ) is a possible
extension for a given mimetype ( e . g . " application/ogg " ) and return
a boolean value ( True if it ' s possible, False if not). Also do
>> > extension_correct_for_mimetype ( ' .ogg ' , ' application/ogg ' )
True
>> > extension_correct_for_mimetype ( ' .ogv ' , ' video/ogg ' )
True
>> > extension_correct_for_mimetype ( ' .ogg ' , ' audio/mpeg ' )
False
2012-02-04 21:43:37 +01:00
>> > extension_correct_for_mimetype ( ' .m4a ' , ' audio/mp4 ' )
True
2009-02-25 14:23:03 +01:00
>> > extension_correct_for_mimetype ( ' mp3 ' , ' audio/mpeg ' )
Traceback ( most recent call last ) :
. . .
ValueError : " mp3 " is not an extension ( missing . )
>> > extension_correct_for_mimetype ( ' .mp3 ' , ' audio mpeg ' )
Traceback ( most recent call last ) :
. . .
ValueError : " audio mpeg " is not a mimetype ( missing / )
"""
2018-02-06 15:19:08 +01:00
if ' / ' not in mimetype :
2009-02-25 14:23:03 +01:00
raise ValueError ( ' " %s " is not a mimetype (missing /) ' % mimetype )
if not extension . startswith ( ' . ' ) :
raise ValueError ( ' " %s " is not an extension (missing .) ' % extension )
2012-02-04 21:43:37 +01:00
if ( extension , mimetype ) in _MIME_TYPE_LIST :
return True
2009-02-25 14:23:03 +01:00
# Create a "default" extension from the mimetype, e.g. "application/ogg"
# becomes ".ogg", "audio/mpeg" becomes ".mpeg", etc...
2018-03-27 21:40:36 +02:00
default = [ ' . ' + mimetype . split ( ' / ' ) [ - 1 ] ]
2012-02-05 17:16:20 +01:00
2018-03-27 21:40:36 +02:00
return extension in default + mimetypes . guess_all_extensions ( mimetype )
2009-02-25 14:23:03 +01:00
2008-07-03 01:36:39 +02:00
def filename_from_url ( url ) :
2007-08-22 01:00:49 +02:00
"""
2008-07-03 01:36:39 +02:00
Extracts the filename and ( lowercase ) extension ( with dot )
2007-08-22 01:00:49 +02:00
from a URL , e . g . http : / / server . com / file . MP3 ? download = yes
2008-07-03 01:36:39 +02:00
will result in the string ( " file " , " .mp3 " ) being returned .
2007-11-11 14:29:02 +01:00
2018-02-01 07:59:22 +01:00
This function will also try to best - guess the " real "
2008-11-17 21:35:25 +01:00
extension for a media file ( audio , video ) by
2007-11-11 14:29:02 +01:00
trying to match an extension to these types and recurse
2018-02-01 07:59:22 +01:00
into the query string to find better matches , if the
2007-11-11 14:29:02 +01:00
original extension does not resolve to a known type .
2008-07-03 01:36:39 +02:00
http : / / my . net / redirect . php ? my . net / file . ogg = > ( " file " , " .ogg " )
http : / / server / get . jsp ? file = / episode0815 . MOV = > ( " episode0815 " , " .mov " )
http : / / s / redirect . mp4 ? http : / / serv2 / test . mp4 = > ( " test " , " .mp4 " )
2007-08-22 01:00:49 +02:00
"""
2016-11-21 23:13:46 +01:00
( scheme , netloc , path , para , query , fragid ) = urllib . parse . urlparse ( url )
2018-05-27 20:09:40 +02:00
( filename , extension ) = os . path . splitext (
os . path . basename ( urllib . parse . unquote ( path ) ) )
2007-11-11 14:29:02 +01:00
2008-06-13 16:13:27 +02:00
if file_type_by_extension ( extension ) is not None and not \
2018-03-27 21:40:36 +02:00
query . startswith ( scheme + ' :// ' ) :
2008-11-17 21:35:25 +01:00
# We have found a valid extension (audio, video)
2008-06-13 16:13:27 +02:00
# and the query string doesn't look like a URL
2018-05-27 20:09:40 +02:00
return ( filename , extension . lower ( ) )
2008-07-03 01:36:39 +02:00
2007-11-11 14:29:02 +01:00
# If the query string looks like a possible URL, try that first
if len ( query . strip ( ) ) > 0 and query . find ( ' / ' ) != - 1 :
2016-11-21 23:13:46 +01:00
query_url = ' :// ' . join ( ( scheme , urllib . parse . unquote ( query ) ) )
2008-07-03 01:36:39 +02:00
( query_filename , query_extension ) = filename_from_url ( query_url )
2007-11-11 14:29:02 +01:00
2008-04-22 21:16:30 +02:00
if file_type_by_extension ( query_extension ) is not None :
2008-07-03 01:36:39 +02:00
return os . path . splitext ( os . path . basename ( query_url ) )
# No exact match found, simply return the original filename & extension
2018-05-27 20:09:40 +02:00
return ( filename , extension . lower ( ) )
2007-08-22 01:00:49 +02:00
2009-09-08 17:29:55 +02:00
def file_type_by_extension ( extension ) :
2007-08-22 01:00:49 +02:00
"""
2018-02-01 07:59:22 +01:00
Tries to guess the file type by looking up the filename
extension from a table of known file types . Will return
2009-09-08 17:29:55 +02:00
" audio " , " video " or None .
2007-08-22 01:00:49 +02:00
2009-09-08 17:29:55 +02:00
>> > file_type_by_extension ( ' .aif ' )
' audio '
>> > file_type_by_extension ( ' .3GP ' )
' video '
2012-02-04 21:43:37 +01:00
>> > file_type_by_extension ( ' .m4a ' )
' audio '
2009-09-08 17:29:55 +02:00
>> > file_type_by_extension ( ' .txt ' ) is None
True
>> > file_type_by_extension ( None ) is None
True
>> > file_type_by_extension ( ' ogg ' )
Traceback ( most recent call last ) :
. . .
ValueError : Extension does not start with a dot : ogg
"""
if not extension :
2007-09-02 14:27:38 +02:00
return None
2009-09-08 17:29:55 +02:00
if not extension . startswith ( ' . ' ) :
raise ValueError ( ' Extension does not start with a dot: %s ' % extension )
2007-11-11 14:29:02 +01:00
extension = extension . lower ( )
2012-02-04 21:43:37 +01:00
if extension in _MIME_TYPES_EXT :
return _MIME_TYPES_EXT [ extension ] . split ( ' / ' ) [ 0 ]
2009-09-08 17:29:55 +02:00
# Need to prepend something to the extension, so guess_type works
2018-03-27 21:40:36 +02:00
type , encoding = mimetypes . guess_type ( ' file ' + extension )
2009-09-08 17:29:55 +02:00
if type is not None and ' / ' in type :
filetype , rest = type . split ( ' / ' , 1 )
2009-12-29 17:12:47 +01:00
if filetype in ( ' audio ' , ' video ' , ' image ' ) :
2009-09-08 17:29:55 +02:00
return filetype
2018-01-30 14:04:28 +01:00
2007-08-22 01:00:49 +02:00
return None
2007-08-25 08:11:19 +02:00
2018-05-27 20:09:40 +02:00
def get_first_line ( s ) :
2007-08-26 20:21:23 +02:00
"""
Returns only the first line of a string , stripped so
that it doesn ' t have whitespace before or after.
"""
return s . strip ( ) . split ( ' \n ' ) [ 0 ] . strip ( )
2007-08-30 20:49:53 +02:00
2012-07-30 21:58:04 +02:00
def object_string_formatter ( s , * * kwargs ) :
2007-09-08 16:49:54 +02:00
"""
2012-07-30 21:58:04 +02:00
Makes attributes of object passed in as keyword
arguments available as { OBJECTNAME . ATTRNAME } in
the passed - in string and returns a string with
the above arguments replaced with the attribute
2007-09-08 16:49:54 +02:00
values of the corresponding object .
2012-07-30 21:58:04 +02:00
>> > class x : pass
>> > a = x ( )
>> > a . title = ' Hello world '
>> > object_string_formatter ( ' {episode.title} ' , episode = a )
' Hello world '
2007-09-08 16:49:54 +02:00
2012-07-30 21:58:04 +02:00
>> > class x : pass
>> > a = x ( )
>> > a . published = 123
>> > object_string_formatter ( ' Hi {episode.published} 456 ' , episode = a )
' Hi 123 456 '
2007-09-08 16:49:54 +02:00
"""
result = s
2016-11-21 23:13:46 +01:00
for key , o in kwargs . items ( ) :
2012-07-30 21:58:04 +02:00
matches = re . findall ( r ' \ { %s \ .([^ \ }]+) \ } ' % key , s )
2007-09-08 16:49:54 +02:00
for attr in matches :
2012-07-30 21:58:04 +02:00
if hasattr ( o , attr ) :
2007-09-08 16:49:54 +02:00
try :
2012-07-30 21:58:04 +02:00
from_s = ' { %s . %s } ' % ( key , attr )
to_s = str ( getattr ( o , attr ) )
result = result . replace ( from_s , to_s )
2007-09-08 16:49:54 +02:00
except :
2012-07-30 21:58:04 +02:00
logger . warn ( ' Replace of " %s " failed for " %s " . ' , attr , s )
2007-09-08 16:49:54 +02:00
return result
2007-11-02 17:37:14 +01:00
2011-07-05 16:08:25 +02:00
def format_desktop_command ( command , filenames , start_position = None ) :
2007-11-02 17:37:14 +01:00
"""
Formats a command template from the " Exec= " line of a . desktop
file to a string that can be invoked in a shell .
Handled format strings : % U , % u , % F , % f and a fallback that
appends the filename as first parameter of the command .
2011-07-05 16:08:25 +02:00
Also handles non - standard % p which is replaced with the start_position
( probably only makes sense if starting a single file ) . ( see bug 1140 )
2007-11-02 17:37:14 +01:00
See http : / / standards . freedesktop . org / desktop - entry - spec / 1.0 / ar01s06 . html
2009-07-13 15:32:46 +02:00
Returns a list of commands to execute , either one for
each filename if the application does not support multiple
file names or one for all filenames ( % U , % F or unknown ) .
2007-11-02 17:37:14 +01:00
"""
2011-04-08 14:16:17 +02:00
# Replace backslashes with slashes to fix win32 issues
# (even on win32, "/" works, but "\" does not)
command = command . replace ( ' \\ ' , ' / ' )
2011-07-05 16:08:25 +02:00
if start_position is not None :
command = command . replace ( ' % p ' , str ( start_position ) )
2009-07-13 15:32:46 +02:00
command = shlex . split ( command )
2008-11-19 16:25:27 +01:00
2009-07-13 15:32:46 +02:00
command_before = command
command_after = [ ]
multiple_arguments = True
for fieldcode in ( ' % U ' , ' %F ' , ' %u ' , ' %f ' ) :
if fieldcode in command :
command_before = command [ : command . index ( fieldcode ) ]
2018-03-27 21:40:36 +02:00
command_after = command [ command . index ( fieldcode ) + 1 : ]
2009-07-13 15:32:46 +02:00
multiple_arguments = fieldcode in ( ' % U ' , ' %F ' )
break
if multiple_arguments :
return [ command_before + filenames + command_after ]
2007-11-02 17:37:14 +01:00
2009-07-13 15:32:46 +02:00
commands = [ ]
for filename in filenames :
2018-03-27 21:40:36 +02:00
commands . append ( command_before + [ filename ] + command_after )
2007-11-02 17:37:14 +01:00
2009-07-13 15:32:46 +02:00
return commands
2007-11-02 17:37:14 +01:00
2018-02-11 00:22:00 +01:00
2009-08-24 13:04:11 +02:00
def url_strip_authentication ( url ) :
"""
Strips authentication data from an URL . Returns the URL with
the authentication data removed from it .
>> > url_strip_authentication ( ' https://host.com/ ' )
' https://host.com/ '
>> > url_strip_authentication ( ' telnet://foo:bar@host.com/ ' )
' telnet://host.com/ '
>> > url_strip_authentication ( ' ftp://billy@example.org ' )
' ftp://example.org '
>> > url_strip_authentication ( ' ftp://billy:@example.org ' )
' ftp://example.org '
>> > url_strip_authentication ( ' http://aa:bc@localhost/x ' )
' http://localhost/x '
>> > url_strip_authentication ( ' http://i %2F o:P %40s s % 3A@blubb.lan/u.html ' )
' http://blubb.lan/u.html '
>> > url_strip_authentication ( ' http://c:d@x.org/ ' )
' http://x.org/ '
>> > url_strip_authentication ( ' http://P %40% 3A:i %2F @cx.lan ' )
' http://cx.lan '
2012-01-09 14:41:03 +01:00
>> > url_strip_authentication ( ' http://x@x.com:s3cret@example.com/ ' )
' http://example.com/ '
2009-08-24 13:04:11 +02:00
"""
2016-11-21 23:13:46 +01:00
url_parts = list ( urllib . parse . urlsplit ( url ) )
2009-08-24 13:04:11 +02:00
# url_parts[1] is the HOST part of the URL
# Remove existing authentication data
if ' @ ' in url_parts [ 1 ] :
2012-01-09 14:41:03 +01:00
url_parts [ 1 ] = url_parts [ 1 ] . rsplit ( ' @ ' , 1 ) [ 1 ]
2009-08-24 13:04:11 +02:00
2016-11-21 23:13:46 +01:00
return urllib . parse . urlunsplit ( url_parts )
2009-08-24 13:04:11 +02:00
def url_add_authentication ( url , username , password ) :
"""
Adds authentication data ( username , password ) to a given
URL in order to construct an authenticated URL .
>> > url_add_authentication ( ' https://host.com/ ' , ' ' , None )
' https://host.com/ '
>> > url_add_authentication ( ' http://example.org/ ' , None , None )
' http://example.org/ '
>> > url_add_authentication ( ' telnet://host.com/ ' , ' foo ' , ' bar ' )
' telnet://foo:bar@host.com/ '
>> > url_add_authentication ( ' ftp://example.org ' , ' billy ' , None )
' ftp://billy@example.org '
>> > url_add_authentication ( ' ftp://example.org ' , ' billy ' , ' ' )
' ftp://billy:@example.org '
>> > url_add_authentication ( ' http://localhost/x ' , ' aa ' , ' bc ' )
' http://aa:bc@localhost/x '
>> > url_add_authentication ( ' http://blubb.lan/u.html ' , ' i/o ' , ' P@ss: ' )
2012-01-09 14:41:03 +01:00
' http://i %2F o:P@ss:@blubb.lan/u.html '
2009-08-24 13:04:11 +02:00
>> > url_add_authentication ( ' http://a:b@x.org/ ' , ' c ' , ' d ' )
' http://c:d@x.org/ '
2012-01-09 14:41:03 +01:00
>> > url_add_authentication ( ' http://i %2F :P %40% 3A@cx.lan ' , ' P@x ' , ' i/ ' )
' http://P@x:i %2F @cx.lan '
2010-07-02 21:37:59 +02:00
>> > url_add_authentication ( ' http://x.org/ ' , ' a b ' , ' c d ' )
' http://a % 20b:c %20d @x.org/ '
2009-08-24 13:04:11 +02:00
"""
if username is None or username == ' ' :
return url
2012-01-09 14:41:03 +01:00
# Relaxations of the strict quoting rules (bug 1521):
# 1. Accept '@' in username and password
# 2. Acecpt ':' in password only
2016-11-21 23:13:46 +01:00
username = urllib . parse . quote ( username , safe = ' @ ' )
2009-08-24 13:04:11 +02:00
if password is not None :
2016-11-21 23:13:46 +01:00
password = urllib . parse . quote ( password , safe = ' @: ' )
2009-08-24 13:04:11 +02:00
auth_string = ' : ' . join ( ( username , password ) )
else :
auth_string = username
url = url_strip_authentication ( url )
2016-11-21 23:13:46 +01:00
url_parts = list ( urllib . parse . urlsplit ( url ) )
2009-08-24 13:04:11 +02:00
# url_parts[1] is the HOST part of the URL
url_parts [ 1 ] = ' @ ' . join ( ( auth_string , url_parts [ 1 ] ) )
2016-11-21 23:13:46 +01:00
return urllib . parse . urlunsplit ( url_parts )
2009-08-24 13:04:11 +02:00
2007-11-05 01:15:56 +01:00
2013-02-26 21:46:38 +01:00
def urlopen ( url , headers = None , data = None , timeout = None ) :
2010-05-12 13:38:31 +02:00
"""
An URL opener with the User - agent set to gPodder ( with version )
"""
username , password = username_password_from_url ( url )
if username is not None or password is not None :
url = url_strip_authentication ( url )
2016-11-21 23:13:46 +01:00
password_mgr = urllib . request . HTTPPasswordMgrWithDefaultRealm ( )
2010-05-12 13:38:31 +02:00
password_mgr . add_password ( None , url , username , password )
2016-11-21 23:13:46 +01:00
handler = urllib . request . HTTPBasicAuthHandler ( password_mgr )
opener = urllib . request . build_opener ( handler )
2010-05-12 13:38:31 +02:00
else :
2016-11-21 23:13:46 +01:00
opener = urllib . request . build_opener ( )
2010-05-12 13:38:31 +02:00
2012-05-24 23:17:55 +02:00
if headers is None :
headers = { }
else :
headers = dict ( headers )
headers . update ( { ' User-agent ' : gpodder . user_agent } )
2016-11-21 23:13:46 +01:00
request = urllib . request . Request ( url , data = data , headers = headers )
2013-02-26 21:46:38 +01:00
if timeout is None :
return opener . open ( request )
else :
return opener . open ( request , timeout = timeout )
2010-05-12 13:38:31 +02:00
2018-02-11 00:22:00 +01:00
2009-02-09 23:26:47 +01:00
def get_real_url ( url ) :
"""
Gets the real URL of a file and resolves all redirects .
"""
2009-09-28 16:03:15 +02:00
try :
2010-05-12 13:38:31 +02:00
return urlopen ( url ) . geturl ( )
2009-09-28 16:03:15 +02:00
except :
2011-07-15 16:32:06 +02:00
logger . error ( ' Getting real url for %s ' , url , exc_info = True )
2009-09-28 16:03:15 +02:00
return url
2009-02-09 23:26:47 +01:00
2011-11-05 21:22:21 +01:00
def find_command ( command ) :
2007-11-05 01:15:56 +01:00
"""
Searches the system ' s PATH for a specific command that is
executable by the user . Returns the first occurence of an
2011-11-05 21:22:21 +01:00
executable binary in the PATH , or None if the command is
2007-11-05 01:15:56 +01:00
not available .
2011-11-05 21:22:21 +01:00
On Windows , this also looks for " <command>.bat " and
" <command>.exe " files if " <command> " itself doesn ' t exist.
2007-11-05 01:15:56 +01:00
"""
if ' PATH ' not in os . environ :
return None
2011-11-05 21:22:21 +01:00
for path in os . environ [ ' PATH ' ] . split ( os . pathsep ) :
command_file = os . path . join ( path , command )
2012-11-18 19:26:02 +01:00
if gpodder . ui . win32 and not os . path . exists ( command_file ) :
2011-11-05 21:22:21 +01:00
for extension in ( ' .bat ' , ' .exe ' ) :
cmd = command_file + extension
if os . path . isfile ( cmd ) :
command_file = cmd
break
if os . path . isfile ( command_file ) and os . access ( command_file , os . X_OK ) :
2007-11-05 01:15:56 +01:00
return command_file
2011-11-05 21:22:21 +01:00
2007-11-05 01:15:56 +01:00
return None
2011-10-12 20:51:46 +02:00
2008-01-15 14:54:22 +01:00
def idle_add ( func , * args ) :
2011-10-11 21:08:48 +02:00
""" Run a function in the main GUI thread
2008-01-15 14:54:22 +01:00
2011-10-11 21:08:48 +02:00
This is a wrapper function that does the Right Thing depending on if we are
running on Gtk + , Qt or CLI .
You should use this function if you are calling from a Python thread and
modify UI data , so that you make sure that the function is called as soon
as possible from the main UI thread .
2008-01-15 14:54:22 +01:00
"""
2011-10-11 21:08:48 +02:00
if gpodder . ui . gtk :
2016-09-25 14:31:58 +02:00
from gi . repository import GObject
GObject . idle_add ( func , * args )
2008-01-15 14:54:22 +01:00
else :
func ( * args )
2008-01-21 10:52:09 +01:00
2008-11-19 20:50:38 +01:00
def bluetooth_available ( ) :
"""
Returns True or False depending on the availability
of bluetooth functionality on the system .
"""
2011-02-01 18:23:37 +01:00
if find_command ( ' bluetooth-sendto ' ) or \
2009-07-06 15:21:36 +02:00
find_command ( ' gnome-obex-send ' ) :
2008-11-19 20:50:38 +01:00
return True
else :
return False
2009-07-06 15:21:36 +02:00
def bluetooth_send_file ( filename ) :
2008-01-21 10:52:09 +01:00
"""
2009-07-06 15:21:36 +02:00
Sends a file via bluetooth .
2008-02-26 16:33:23 +01:00
This function tries to use " bluetooth-sendto " , and if
it is not available , it also tries " gnome-obex-send " .
2008-01-21 10:52:09 +01:00
"""
2008-02-26 16:33:23 +01:00
command_line = None
if find_command ( ' bluetooth-sendto ' ) :
command_line = [ ' bluetooth-sendto ' ]
elif find_command ( ' gnome-obex-send ' ) :
2008-01-21 10:52:09 +01:00
command_line = [ ' gnome-obex-send ' ]
2008-03-19 15:43:22 +01:00
if command_line is not None :
2008-01-21 10:52:09 +01:00
command_line . append ( filename )
2018-05-28 21:13:29 +02:00
return ( Popen ( command_line , close_fds = True ) . wait ( ) == 0 )
2008-01-21 10:52:09 +01:00
else :
2011-07-15 16:32:06 +02:00
logger . error ( ' Cannot send file. Please install " bluetooth-sendto " or " gnome-obex-send " . ' )
2008-01-21 10:52:09 +01:00
return False
2010-06-04 20:43:38 +02:00
def format_time ( value ) :
""" Format a seconds value to a string
>> > format_time ( 0 )
' 00:00 '
>> > format_time ( 20 )
' 00:20 '
>> > format_time ( 3600 )
' 01:00:00 '
>> > format_time ( 10921 )
' 03:02:01 '
"""
dt = datetime . datetime . utcfromtimestamp ( value )
if dt . hour == 0 :
return dt . strftime ( ' % M: % S ' )
else :
return dt . strftime ( ' % H: % M: % S ' )
2018-02-11 00:22:00 +01:00
2010-06-04 20:43:38 +02:00
def parse_time ( value ) :
""" Parse a time string into seconds
2012-12-29 19:01:30 +01:00
2010-06-04 20:43:38 +02:00
>> > parse_time ( ' 00:00 ' )
0
>> > parse_time ( ' 00:00:00 ' )
0
>> > parse_time ( ' 00:20 ' )
20
>> > parse_time ( ' 00:00:20 ' )
20
>> > parse_time ( ' 01:00:00 ' )
3600
>> > parse_time ( ' 03:02:01 ' )
10921
2012-12-29 01:26:57 +01:00
>> > parse_time ( ' 61:08 ' )
3668
2012-12-29 19:01:30 +01:00
>> > parse_time ( ' 25:03:30 ' )
90210
2013-02-17 18:58:47 +01:00
>> > parse_time ( ' 25:3:30 ' )
90210
>> > parse_time ( ' 61.08 ' )
3668
2010-06-04 20:43:38 +02:00
"""
2013-02-17 18:58:47 +01:00
if value == ' ' :
return 0
2010-06-04 20:43:38 +02:00
if not value :
raise ValueError ( ' Invalid value: %s ' % ( str ( value ) , ) )
2013-02-17 18:58:47 +01:00
m = re . match ( r ' ( \ d+)[:.]( \ d \ d?)[:.]( \ d \ d?) ' , value )
2012-12-29 19:01:30 +01:00
if m :
hours , minutes , seconds = m . groups ( )
return ( int ( hours ) * 60 + int ( minutes ) ) * 60 + int ( seconds )
2010-06-04 20:43:38 +02:00
2013-02-17 18:58:47 +01:00
m = re . match ( r ' ( \ d+)[:.]( \ d \ d?) ' , value )
2012-12-29 01:26:57 +01:00
if m :
minutes , seconds = m . groups ( )
return int ( minutes ) * 60 + int ( seconds )
2010-06-04 20:43:38 +02:00
return int ( value )
2008-02-20 13:46:51 +01:00
def format_seconds_to_hour_min_sec ( seconds ) :
"""
Take the number of seconds and format it into a
human - readable string ( duration ) .
>> > format_seconds_to_hour_min_sec ( 3834 )
2016-11-21 23:32:50 +01:00
' 1 hour, 3 minutes and 54 seconds '
2009-02-25 14:23:03 +01:00
>> > format_seconds_to_hour_min_sec ( 3600 )
2016-11-21 23:32:50 +01:00
' 1 hour '
2008-02-20 13:46:51 +01:00
>> > format_seconds_to_hour_min_sec ( 62 )
2016-11-21 23:32:50 +01:00
' 1 minute and 2 seconds '
2008-02-20 13:46:51 +01:00
"""
if seconds < 1 :
2010-11-22 21:52:58 +01:00
return N_ ( ' %(count)d second ' , ' %(count)d seconds ' , seconds ) % { ' count ' : seconds }
2008-02-20 13:46:51 +01:00
result = [ ]
2010-03-02 00:45:47 +01:00
seconds = int ( seconds )
2018-03-18 01:00:02 +01:00
hours = seconds / / 3600
seconds = seconds % 3600
2008-02-20 13:46:51 +01:00
2018-03-18 01:00:02 +01:00
minutes = seconds / / 60
seconds = seconds % 60
2008-02-20 13:46:51 +01:00
2009-12-21 23:18:00 +01:00
if hours :
2010-11-22 21:52:58 +01:00
result . append ( N_ ( ' %(count)d hour ' , ' %(count)d hours ' , hours ) % { ' count ' : hours } )
2008-02-20 13:46:51 +01:00
2009-12-21 23:18:00 +01:00
if minutes :
2010-11-22 21:52:58 +01:00
result . append ( N_ ( ' %(count)d minute ' , ' %(count)d minutes ' , minutes ) % { ' count ' : minutes } )
2008-02-20 13:46:51 +01:00
2009-12-21 23:18:00 +01:00
if seconds :
2010-11-22 21:52:58 +01:00
result . append ( N_ ( ' %(count)d second ' , ' %(count)d seconds ' , seconds ) % { ' count ' : seconds } )
2008-02-20 13:46:51 +01:00
if len ( result ) > 1 :
2018-04-17 14:39:05 +02:00
return ( ' ' + _ ( ' and ' ) + ' ' ) . join ( ( ' , ' . join ( result [ : - 1 ] ) , result [ - 1 ] ) )
2008-02-20 13:46:51 +01:00
else :
return result [ 0 ]
2008-01-21 10:52:09 +01:00
2018-02-11 00:22:00 +01:00
2009-12-17 13:08:55 +01:00
def http_request ( url , method = ' HEAD ' ) :
2016-11-21 23:13:46 +01:00
( scheme , netloc , path , parms , qry , fragid ) = urllib . parse . urlparse ( url )
conn = http . client . HTTPConnection ( netloc )
2009-12-17 13:08:55 +01:00
start = len ( scheme ) + len ( ' :// ' ) + len ( netloc )
conn . request ( method , url [ start : ] )
2008-10-13 15:28:05 +02:00
return conn . getresponse ( )
2008-03-02 13:56:16 +01:00
2008-03-02 14:04:37 +01:00
def gui_open ( filename ) :
"""
Open a file or folder with the default application set
2009-05-09 15:21:04 +02:00
by the Desktop environment . This uses " xdg-open " on all
2009-05-11 20:06:39 +02:00
systems with a few exceptions :
on Win32 , os . startfile ( ) is used
2008-03-02 14:04:37 +01:00
"""
try :
2012-11-18 19:26:02 +01:00
if gpodder . ui . win32 :
2009-05-09 15:21:04 +02:00
os . startfile ( filename )
2012-11-18 19:26:02 +01:00
elif gpodder . ui . osx :
2018-05-28 21:13:29 +02:00
Popen ( [ ' open ' , filename ] , close_fds = True )
2009-05-09 15:21:04 +02:00
else :
2018-05-28 21:13:29 +02:00
Popen ( [ ' xdg-open ' , filename ] , close_fds = True )
2008-11-06 18:06:12 +01:00
return True
2008-03-02 14:04:37 +01:00
except :
2011-07-15 16:32:06 +02:00
logger . error ( ' Cannot open file/folder: " %s " ' , filename , exc_info = True )
2008-11-06 18:06:12 +01:00
return False
2008-03-02 14:04:37 +01:00
2008-03-03 20:38:22 +01:00
def open_website ( url ) :
"""
Opens the specified URL using the default system web
browser . This uses Python ' s " webbrowser " module, so
make sure your system is set up correctly .
"""
2012-09-15 20:22:06 +02:00
run_in_background ( lambda : webbrowser . open ( url ) )
2008-03-03 20:38:22 +01:00
2018-02-11 00:22:00 +01:00
2012-01-10 13:47:20 +01:00
def convert_bytes ( d ) :
"""
Convert byte strings to unicode strings
This function will decode byte strings into unicode
strings . Any other data types will be left alone .
>> > convert_bytes ( None )
2016-11-21 23:32:50 +01:00
>> > convert_bytes ( 4711 )
4711
2012-01-10 13:47:20 +01:00
>> > convert_bytes ( True )
True
>> > convert_bytes ( 3.1415 )
3.1415
>> > convert_bytes ( ' Hello ' )
2016-11-21 23:32:50 +01:00
' Hello '
>> > type ( convert_bytes ( b ' hoho ' ) )
< class ' bytes ' >
2012-01-10 13:47:20 +01:00
"""
if d is None :
return d
2016-11-21 23:32:50 +01:00
elif isinstance ( d , bytes ) :
2016-11-20 11:50:05 +01:00
return d
2016-11-21 23:13:46 +01:00
elif any ( isinstance ( d , t ) for t in ( int , int , bool , float ) ) :
2012-01-10 13:47:20 +01:00
return d
2016-11-21 23:13:46 +01:00
elif not isinstance ( d , str ) :
2012-01-10 13:47:20 +01:00
return d . decode ( ' utf-8 ' , ' ignore ' )
return d
2009-03-30 20:30:19 +02:00
2016-11-21 23:42:14 +01:00
def sanitize_filename ( filename , max_length = 0 ) :
2008-08-03 22:41:19 +02:00
"""
2016-11-21 23:13:46 +01:00
Generate a sanitized version of a filename ; trim filename
if greater than max_length ( 0 = no limit ) .
2017-12-31 11:55:46 +01:00
>> > sanitize_filename ( ' https://www.host.name/feed ' )
' https___www.host.name_feed '
>> > sanitize_filename ( ' Binärgewitter ' )
' Binärgewitter '
>> > sanitize_filename ( ' Cool feed (ogg) ' )
' Cool feed (ogg) '
>> > sanitize_filename ( ' Cool feed (ogg) ' , 1 )
' C '
2008-06-13 09:41:36 +02:00
"""
2009-03-30 20:30:19 +02:00
if max_length > 0 and len ( filename ) > max_length :
2016-11-21 23:13:46 +01:00
logger . info ( ' Limiting file/folder name " %s " to %d characters. ' , filename , max_length )
2009-03-30 20:30:19 +02:00
filename = filename [ : max_length ]
2017-12-31 11:55:46 +01:00
# see #361 - at least slash must be removed
filename = re . sub ( r " [ \" */:<>? \\ |] " , " _ " , filename )
2016-11-21 23:13:46 +01:00
return filename . strip ( ' . ' + string . whitespace )
2008-03-10 16:50:12 +01:00
2008-03-29 16:22:01 +01:00
def find_mount_point ( directory ) :
"""
Try to find the mount point for a given directory .
If the directory is itself a mount point , return
it . If not , remove the last part of the path and
re - check if it ' s a mount point. If the directory
resides on your root filesystem , " / " is returned .
2009-03-24 19:53:19 +01:00
>> > find_mount_point ( ' / ' )
' / '
2016-11-21 23:32:50 +01:00
>> > find_mount_point ( b ' /something ' )
2009-03-24 19:53:19 +01:00
Traceback ( most recent call last ) :
. . .
2016-11-21 23:32:50 +01:00
ValueError : Convert bytes objects to str first .
2009-03-24 19:53:19 +01:00
>> > find_mount_point ( None )
Traceback ( most recent call last ) :
. . .
ValueError : Directory names should be of type str .
>> > find_mount_point ( 42 )
Traceback ( most recent call last ) :
. . .
ValueError : Directory names should be of type str .
2009-04-01 12:53:13 +02:00
>> > from minimock import mock , restore
2009-03-24 19:53:19 +01:00
>> > mocked_mntpoints = ( ' / ' , ' /home ' , ' /media/usbdisk ' , ' /media/cdrom ' )
>> > mock ( ' os.path.ismount ' , returns_func = lambda x : x in mocked_mntpoints )
>> >
>> > # For mocking os.getcwd(), we simply use a lambda to avoid the
>> > # massive output of "Called os.getcwd()" lines in this doctest
>> > os . getcwd = lambda : ' /home/thp '
>> >
>> > find_mount_point ( ' . ' )
Called os . path . ismount ( ' /home/thp ' )
Called os . path . ismount ( ' /home ' )
' /home '
>> > find_mount_point ( ' relativity ' )
Called os . path . ismount ( ' /home/thp/relativity ' )
Called os . path . ismount ( ' /home/thp ' )
Called os . path . ismount ( ' /home ' )
' /home '
>> > find_mount_point ( ' /media/usbdisk/ ' )
Called os . path . ismount ( ' /media/usbdisk ' )
' /media/usbdisk '
>> > find_mount_point ( ' /home/thp/Desktop ' )
Called os . path . ismount ( ' /home/thp/Desktop ' )
Called os . path . ismount ( ' /home/thp ' )
Called os . path . ismount ( ' /home ' )
' /home '
>> > find_mount_point ( ' /media/usbdisk/Podcasts/With Spaces ' )
Called os . path . ismount ( ' /media/usbdisk/Podcasts/With Spaces ' )
Called os . path . ismount ( ' /media/usbdisk/Podcasts ' )
Called os . path . ismount ( ' /media/usbdisk ' )
' /media/usbdisk '
>> > find_mount_point ( ' /home/ ' )
Called os . path . ismount ( ' /home ' )
' /home '
>> > find_mount_point ( ' /media/cdrom/../usbdisk/blubb// ' )
Called os . path . ismount ( ' /media/usbdisk/blubb ' )
Called os . path . ismount ( ' /media/usbdisk ' )
' /media/usbdisk '
2009-04-01 12:53:13 +02:00
>> > restore ( )
2009-03-24 19:53:19 +01:00
"""
2016-11-21 23:32:50 +01:00
if isinstance ( directory , bytes ) :
# We do not accept byte strings, because they could fail when
2009-03-24 19:53:19 +01:00
# trying to be converted to some native encoding, so fail loudly
2016-11-21 23:32:50 +01:00
# and leave it up to the callee to decode from the proper encoding.
raise ValueError ( ' Convert bytes objects to str first. ' )
2009-03-24 19:53:19 +01:00
if not isinstance ( directory , str ) :
2016-11-21 23:32:50 +01:00
# In Python 2, we assumed it's a byte str; in Python 3, we assume
2012-01-10 13:47:20 +01:00
# that it's a unicode str. The abspath/ismount/split functions of
# os.path work with unicode str in Python 3, but not in Python 2.
2009-03-24 19:53:19 +01:00
raise ValueError ( ' Directory names should be of type str. ' )
directory = os . path . abspath ( directory )
while directory != ' / ' :
2008-03-29 16:22:01 +01:00
if os . path . ismount ( directory ) :
return directory
else :
( directory , tail_data ) = os . path . split ( directory )
return ' / '
2008-06-14 13:43:53 +02:00
2009-01-03 17:38:35 +01:00
# matches http:// and ftp:// and mailto://
protocolPattern = re . compile ( r ' ^ \ w+:// ' )
2018-02-11 00:22:00 +01:00
2009-01-03 17:38:35 +01:00
def isabs ( string ) :
"""
@return true if string is an absolute path or protocoladdress
for addresses beginning in http : / / or ftp : / / or ldap : / / -
they are considered " absolute " paths .
Source : http : / / code . activestate . com / recipes / 208993 /
"""
if protocolPattern . match ( string ) : return 1
return os . path . isabs ( string )
def commonpath ( l1 , l2 , common = [ ] ) :
"""
helper functions for relpath
Source : http : / / code . activestate . com / recipes / 208993 /
"""
if len ( l1 ) < 1 : return ( common , l1 , l2 )
if len ( l2 ) < 1 : return ( common , l1 , l2 )
if l1 [ 0 ] != l2 [ 0 ] : return ( common , l1 , l2 )
2018-03-27 21:40:36 +02:00
return commonpath ( l1 [ 1 : ] , l2 [ 1 : ] , common + [ l1 [ 0 ] ] )
2009-01-03 17:38:35 +01:00
2018-02-11 00:22:00 +01:00
2009-01-03 17:38:35 +01:00
def relpath ( p1 , p2 ) :
"""
Finds relative path from p1 to p2
Source : http : / / code . activestate . com / recipes / 208993 /
"""
2018-05-06 22:26:54 +02:00
def pathsplit ( s ) :
return s . split ( os . path . sep )
2009-01-03 17:38:35 +01:00
( common , l1 , l2 ) = commonpath ( pathsplit ( p1 ) , pathsplit ( p2 ) )
p = [ ]
if len ( l1 ) > 0 :
2018-05-27 20:09:40 +02:00
p = [ ( ' .. ' + os . sep ) * len ( l1 ) ]
2009-01-03 17:38:35 +01:00
p = p + l2
if len ( p ) is 0 :
return " . "
return os . path . join ( * p )
2009-04-01 12:53:13 +02:00
2010-01-28 17:39:10 +01:00
def get_hostname ( ) :
""" Return the hostname of this computer
This can be implemented in a different way on each
platform and should yield a unique - per - user device ID .
"""
nodename = platform . node ( )
if nodename :
return nodename
# Fallback - but can this give us "localhost"?
return socket . gethostname ( )
2018-02-11 00:22:00 +01:00
2010-05-03 20:11:12 +02:00
def detect_device_type ( ) :
""" Device type detection for gpodder.net
This function tries to detect on which
kind of device gPodder is running on .
Possible return values :
desktop , laptop , mobile , server , other
"""
2016-02-03 19:54:33 +01:00
if glob . glob ( ' /proc/acpi/battery/* ' ) :
2010-05-03 20:11:12 +02:00
# Linux: If we have a battery, assume Laptop
return ' laptop '
return ' desktop '
2010-07-18 20:55:08 +02:00
def write_m3u_playlist ( m3u_filename , episodes , extm3u = True ) :
""" Create an M3U playlist from a episode list
If the parameter " extm3u " is False , the list of
episodes should be a list of filenames , and no
extended information will be written into the
M3U files ( #EXTM3U / #EXTINF).
If the parameter " extm3u " is True ( default ) , then the
list of episodes should be PodcastEpisode objects ,
as the extended metadata will be taken from them .
"""
f = open ( m3u_filename , ' w ' )
if extm3u :
# Mandatory header for extended playlists
f . write ( ' #EXTM3U \n ' )
for episode in episodes :
if not extm3u :
# Episode objects are strings that contain file names
2018-03-27 21:40:36 +02:00
f . write ( episode + ' \n ' )
2010-07-18 20:55:08 +02:00
continue
if episode . was_downloaded ( and_exists = True ) :
filename = episode . local_filename ( create = False )
assert filename is not None
if os . path . dirname ( filename ) . startswith ( os . path . dirname ( m3u_filename ) ) :
2018-03-27 21:40:36 +02:00
filename = filename [ len ( os . path . dirname ( m3u_filename ) + os . sep ) : ]
f . write ( ' #EXTINF:0, ' + episode . playlist_title ( ) + ' \n ' )
f . write ( filename + ' \n ' )
2010-07-18 20:55:08 +02:00
f . close ( )
2011-08-07 23:39:46 +02:00
def generate_names ( filename ) :
basename , ext = os . path . splitext ( filename )
for i in itertools . count ( ) :
if i :
2018-03-27 21:40:36 +02:00
yield ' %s ( %d ) %s ' % ( basename , i + 1 , ext )
2011-08-07 23:39:46 +02:00
else :
yield filename
2011-09-18 22:04:33 +02:00
def is_known_redirecter ( url ) :
""" Check if a URL redirect is expected, and no filenames should be updated
We usually honor URL redirects , and update filenames accordingly .
In some cases ( e . g . Soundcloud ) this results in a worse filename ,
so we hardcode and detect these cases here to avoid renaming files
for which we know that a " known good default " exists .
The problem here is that by comparing the currently - assigned filename
with the new filename determined by the URL , we cannot really determine
which one is the " better " URL ( e . g . " n5rMSpXrqmR9.128.mp3 " for Soundcloud ) .
"""
# Soundcloud-hosted media downloads (we take the track name as filename)
if url . startswith ( ' http://ak-media.soundcloud.com/ ' ) :
return True
return False
2012-01-03 23:59:19 +01:00
def atomic_rename ( old_name , new_name ) :
""" Atomically rename/move a (temporary) file
This is usually used when updating a file safely by writing
the new contents into a temporary file and then moving the
temporary file over the original file to replace it .
"""
2012-11-18 19:26:02 +01:00
if gpodder . ui . win32 :
2012-01-03 23:59:19 +01:00
# Win32 does not support atomic rename with os.rename
shutil . move ( old_name , new_name )
else :
os . rename ( old_name , new_name )
2012-02-20 23:55:36 +01:00
def check_command ( self , cmd ) :
""" Check if a command line command/program exists """
# Prior to Python 2.7.3, this module (shlex) did not support Unicode input.
program = shlex . split ( cmd ) [ 0 ]
return ( find_command ( program ) is not None )
2012-03-17 13:06:29 +01:00
def rename_episode_file ( episode , filename ) :
2012-02-20 23:55:36 +01:00
""" Helper method to update a PodcastEpisode object
Useful after renaming / converting its download file .
"""
if not os . path . exists ( filename ) :
raise ValueError ( ' Target filename does not exist. ' )
basename , extension = os . path . splitext ( filename )
episode . download_filename = os . path . basename ( filename )
episode . file_size = os . path . getsize ( filename )
episode . mime_type = mimetype_from_extension ( extension )
episode . save ( )
episode . db . commit ( )
2012-02-28 12:06:35 +01:00
2017-11-27 22:09:32 +01:00
def get_update_info ( ) :
2012-03-03 21:09:05 +01:00
"""
Get up to date release information from gpodder . org .
Returns a tuple : ( up_to_date , latest_version , release_date , days_since )
Example result ( up to date version , 20 days after release ) :
( True , ' 3.0.4 ' , ' 2012-01-24 ' , 20 )
Example result ( outdated version , 10 days after release ) :
( False , ' 3.0.5 ' , ' 2012-02-29 ' , 10 )
"""
2017-11-27 22:09:32 +01:00
url = ' https://api.github.com/repos/gpodder/gpodder/releases/latest '
data = urlopen ( url ) . read ( ) . decode ( ' utf-8 ' )
info = json . loads ( data )
2012-03-03 21:09:05 +01:00
2017-11-27 22:09:32 +01:00
latest_version = info . get ( ' tag_name ' , ' ' ) . replace ( ' gpodder- ' , ' ' )
release_date = info [ ' published_at ' ]
2012-03-03 21:09:05 +01:00
2017-11-27 22:09:32 +01:00
release_parsed = datetime . datetime . strptime ( release_date , ' % Y- % m- %d T % H: % M: % SZ ' )
2012-03-03 21:09:05 +01:00
days_since_release = ( datetime . datetime . today ( ) - release_parsed ) . days
2018-05-06 22:26:54 +02:00
def convert ( s ) :
return tuple ( int ( x ) for x in s . split ( ' . ' ) )
2012-03-03 21:09:05 +01:00
up_to_date = ( convert ( gpodder . __version__ ) > = convert ( latest_version ) )
return up_to_date , latest_version , release_date , days_since_release
2012-07-10 13:52:34 +02:00
def run_in_background ( function , daemon = False ) :
logger . debug ( ' run_in_background: %s ( %s ) ' , function , str ( daemon ) )
thread = threading . Thread ( target = function )
thread . setDaemon ( daemon )
thread . start ( )
return thread
2012-08-19 14:30:36 +02:00
2012-10-13 13:52:14 +02:00
def linux_get_active_interfaces ( ) :
""" Get active network interfaces using ' ip link '
Returns a list of active network interfaces or an
empty list if the device is offline . The loopback
interface is not included .
"""
2018-05-28 21:13:29 +02:00
process = Popen ( [ ' ip ' , ' link ' ] , close_fds = True , stdout = subprocess . PIPE )
2012-11-23 12:03:55 +01:00
data , _ = process . communicate ( )
2016-12-07 19:39:31 +01:00
for interface , _ in re . findall ( r ' \ d+: ([^:]+):.*state (UP|UNKNOWN) ' , data . decode ( locale . getpreferredencoding ( ) ) ) :
2012-11-03 13:43:15 +01:00
if interface != ' lo ' :
yield interface
2012-10-13 13:52:14 +02:00
2012-10-23 12:41:47 +02:00
def osx_get_active_interfaces ( ) :
""" Get active network interfaces using ' ifconfig '
Returns a list of active network interfaces or an
empty list if the device is offline . The loopback
interface is not included .
"""
2018-05-28 21:13:29 +02:00
process = Popen ( [ ' ifconfig ' ] , close_fds = True , stdout = subprocess . PIPE )
2013-01-30 19:07:43 +01:00
stdout , _ = process . communicate ( )
2016-11-21 23:19:20 +01:00
for i in re . split ( ' \n (?! \t ) ' , stdout . decode ( ' utf-8 ' ) , re . MULTILINE ) :
2013-12-07 12:54:51 +01:00
b = re . match ( ' ( \\ w+):.*status: (active|associated)$ ' , i , re . MULTILINE | re . DOTALL )
2012-10-23 12:41:47 +02:00
if b :
yield b . group ( 1 )
2018-02-11 00:22:00 +01:00
2013-01-05 01:51:55 +01:00
def unix_get_active_interfaces ( ) :
""" Get active network interfaces using ' ifconfig '
Returns a list of active network interfaces or an
empty list if the device is offline . The loopback
interface is not included .
"""
2018-05-28 21:13:29 +02:00
process = Popen ( [ ' ifconfig ' ] , close_fds = True , stdout = subprocess . PIPE )
2013-01-30 19:07:43 +01:00
stdout , _ = process . communicate ( )
2016-12-07 19:39:31 +01:00
for i in re . split ( ' \n (?! \t ) ' , stdout . decode ( locale . getpreferredencoding ( ) ) , re . MULTILINE ) :
2016-12-29 19:02:24 +01:00
b = re . match ( ' ( \\ w+):.*status: (active|associated)$ ' , i , re . MULTILINE | re . DOTALL )
2013-01-05 01:51:55 +01:00
if b :
yield b . group ( 1 )
2012-10-23 12:41:47 +02:00
2012-10-13 13:52:14 +02:00
def connection_available ( ) :
""" Check if an Internet connection is available
Returns True if a connection is available ( or if there
is no way to determine the connection ) . Returns False
if no network interfaces are up ( i . e . no connectivity ) .
"""
try :
2012-11-18 19:26:02 +01:00
if gpodder . ui . win32 :
2012-10-13 13:52:14 +02:00
# FIXME: Implement for Windows
return True
2012-11-18 19:26:02 +01:00
elif gpodder . ui . osx :
2012-10-23 12:41:47 +02:00
return len ( list ( osx_get_active_interfaces ( ) ) ) > 0
2012-10-13 13:52:14 +02:00
else :
2013-01-29 18:38:47 +01:00
# By default, we assume we're not offline (bug 1730)
offline = False
if find_command ( ' ifconfig ' ) is not None :
# If ifconfig is available, and it says we don't have
# any active interfaces, assume we're offline
if len ( list ( unix_get_active_interfaces ( ) ) ) == 0 :
offline = True
# If we assume we're offline, try the "ip" command as fallback
if offline and find_command ( ' ip ' ) is not None :
if len ( list ( linux_get_active_interfaces ( ) ) ) == 0 :
offline = True
else :
offline = False
return not offline
2013-01-05 01:51:55 +01:00
return False
2016-11-21 23:13:46 +01:00
except Exception as e :
2012-10-13 13:52:14 +02:00
logger . warn ( ' Cannot get connection status: %s ' , e , exc_info = True )
2013-01-29 18:26:07 +01:00
# When we can't determine the connection status, act as if we're online (bug 1730)
return True
2012-10-13 13:52:14 +02:00
def website_reachable ( url ) :
2012-08-19 14:30:36 +02:00
"""
Check if a specific website is available .
2012-10-13 13:52:14 +02:00
"""
if not connection_available ( ) :
# No network interfaces up - assume website not reachable
return ( False , None )
2012-08-19 14:30:36 +02:00
try :
2016-11-21 23:13:46 +01:00
response = urllib . request . urlopen ( url , timeout = 1 )
2012-08-19 14:30:36 +02:00
return ( True , response )
2016-11-21 23:13:46 +01:00
except urllib . error . URLError as err :
2012-08-19 14:30:36 +02:00
pass
2012-10-13 13:52:14 +02:00
2012-08-19 14:30:36 +02:00
return ( False , None )
2018-02-11 00:22:00 +01:00
2013-11-12 03:43:24 +01:00
def delete_empty_folders ( top ) :
for root , dirs , files in os . walk ( top , topdown = False ) :
for name in dirs :
2014-05-17 11:42:05 +02:00
dirname = os . path . join ( root , name )
if not os . listdir ( dirname ) :
2014-05-23 15:18:43 +02:00
os . rmdir ( dirname )
2013-11-12 03:43:24 +01:00
2018-02-06 16:13:21 +01:00
def guess_encoding ( filename ) :
"""
read filename encoding as defined in PEP 263
- BOM marker = > utf - 8
- coding : xxx comment in first 2 lines
- else return None
>> > guess_encoding ( " not.there " )
>> > guess_encoding ( " setup.py " )
>> > guess_encoding ( " share/gpodder/extensions/mpris-listener.py " )
' utf-8 '
"""
def re_encoding ( line ) :
m = re . match ( b """ ^[ \t \v ]*#.*?coding[:=][ \t ]*([-_.a-zA-Z0-9]+) """ , line )
if m :
return m . group ( 1 ) . decode ( )
else :
return None
if not filename or not os . path . exists ( filename ) :
return None
with open ( filename , " rb " ) as f :
fst = f . readline ( )
if fst [ : 3 ] == b " \xef \xbb \xbf " :
return " utf-8 "
encoding = re_encoding ( fst )
if not encoding :
snd = f . readline ( )
encoding = re_encoding ( snd )
return encoding
2018-03-25 18:41:33 +02:00
def iri_to_url ( url ) :
"""
Properly escapes Unicode characters in the URL path section
TODO : Explore if this should also handle the domain
Based on : http : / / stackoverflow . com / a / 18269491 / 1072626
In response to issue : https : / / github . com / gpodder / gpodder / issues / 232
>> > iri_to_url ( ' http://www.valpskott.se/Valpcast/MP3/Valpcast % 20- %20E xistentiella %20f rågor.mp3 ' )
' http://www.valpskott.se/Valpcast/MP3/Valpcast % 20- %20E xistentiella %20f r % C3 %83% C2 % A5gor.mp3 '
See https : / / github . com / gpodder / gpodder / issues / 399
>> > iri_to_url ( ' //dts.podtrac.com/redirect.mp3/http://myhost/myepisode.mp3 ' )
' //dts.podtrac.com/redirect.mp3/http://myhost/myepisode.mp3 '
"""
url = urllib . parse . urlsplit ( url )
url = list ( url )
# First unquote to avoid escaping quoted content
url [ 2 ] = urllib . parse . unquote ( url [ 2 ] )
# extend safe with all allowed chars in path segment of URL, cf pchar rule
# in https://tools.ietf.org/html/rfc3986#appendix-A
url [ 2 ] = urllib . parse . quote ( url [ 2 ] , safe = " /-._~!$& ' ()*+,;=:@ " )
url = urllib . parse . urlunsplit ( url )
return url
2018-05-28 21:13:29 +02:00
class Popen ( subprocess . Popen ) :
""" A Popen process that tries not to leak file descriptors.
This is a drop - in replacement for subprocess . Popen ( ) , which takes the same
arguments .
' close_fds ' will default to True , if omitted . This stops the process from
inheriting ALL of gPodder ' s file descriptors, which would keep them
' in-use ' . That is of particular concern whenever the download queue is
active and interacting with the filesystem in the background .
On Windows however , redirection cannot coexist with ' close_fds=True ' .
Specifying both will raise a ValueError . A message will appear in the log .
For communication with short - lived Windows commands , setting ' close_fds '
to False may be a tolerable risk . Otherwise as a last resort , sending
output to temp files to read afterward might work ( probably involving
' shell=True ' ) .
See https : / / github . com / gpodder / gpodder / issues / 420
"""
def __init__ ( self , * args , * * kwargs ) :
self . __logged_returncode = False
if ' close_fds ' not in kwargs :
kwargs [ ' close_fds ' ] = True
try :
super ( Popen , self ) . __init__ ( * args , * * kwargs ) #Python 2 syntax
except ( ValueError ) as e :
if gpodder . ui . win32 and kwargs [ ' close_fds ' ] :
if [ ( k , v ) for ( k , v ) in kwargs . items ( ) if k in ( ' stdin ' , ' stdout ' , ' stderr ' ) and v ] :
logger = logging . getLogger ( __name__ )
logger . error ( ' util.Popen(close_fds=True) is incompatible with stream redirection on Windows. ' )
logger . error ( ' With close_fds=False, the process keeps all currently open files locked. It might be tolerable for short-lived commands. Or use temp files. ' )
raise e
@classmethod
def testPopen ( ) :
# Commands that will complain on stderr.
if gpodder . ui . win32 :
cmd = [ ' findstr.exe ' , ' /! ' ]
cmd_pipe = [ ' findstr ' , ' hello ' ]
else :
cmd = [ ' cat ' , ' --helpp ' ]
cmd_pipe = [ ' grep ' , ' hello ' ]
logger . info ( ' Test #1: Implicit close_fds=True, with no redirection ' )
logger . info ( ' No race condition. ' )
logger . info ( ' Streams left in the console. ' )
logger . info ( ' Typical spawn and forget. Might as well wait(). ' )
p = Popen ( cmd )
out , err = p . communicate ( )
print ( " - - stderr - - \n {} \n - - - - - - \n " . format ( err ) )
logger . info ( ' Test #2: Explicit close_fds=False, with redirection. ' )
logger . info ( ' This has a race condition, but communicate() always returns streams. ' )
p = Popen ( cmd , close_fds = False , stderr = subprocess . PIPE , universal_newlines = True )
out , err = p . communicate ( )
print ( " - - stderr - - \n {} \n - - - - - - \n " . format ( err ) )
try :
logger . info ( ' Test #3: Implicit close_fds=True, with attempted redirection. ' )
logger . info ( ' No race condition. ' )
logger . info ( ' On Windows, this will raise ValueError. ' )
logger . info ( ' Other platforms will have readable streams returned. ' )
p = Popen ( cmd , stderr = subprocess . PIPE , universal_newlines = True )
out , err = p . communicate ( )
print ( " - - stderr - - \n {} \n - - - - - - \n " . format ( err ) )
except ( ValueError ) as e :
print ( " - - Caught - - \n {} : {} \n - - - - - - \n " . format ( e . __class__ . __name__ , e ) )
try :
logger . info ( ' Test #4: Implicit close_fds=True, given input. ' )
p = Popen ( cmd_pipe , stdin = subprocess . PIPE )
out , err = p . communicate ( input = b ' hello world ' )
print ( " NEVER REACHED ON WINDOWS " )
print ( " - - stderr - - \n {} \n - - - - - - \n " . format ( err ) )
except ( ValueError ) as e :
print ( " - - Caught - - \n {} : {} \n - - - - - - \n " . format ( e . __class__ . __name__ , e ) )
logger . info ( ' Log spam only occurs if returncode is non-zero or if explaining the Windows redirection error. ' )