pip/pip/download.py

import xmlrpclib
import re
import urllib
import urllib2
import os
from pip.exceptions import InstallationError
from pip.util import splitext

xmlrpclib_transport = xmlrpclib.Transport()


def get_file_content(url, comes_from=None):
    """Gets the content of a file; it may be a filename, file: URL, or
    http: URL.  Returns (location, content)"""
    match = _scheme_re.search(url)
    if match:
        scheme = match.group(1).lower()
        if (scheme == 'file' and comes_from
            and comes_from.startswith('http')):
            raise InstallationError(
                'Requirements file %s references URL %s, which is local'
                % (comes_from, url))
        if scheme == 'file':
            path = url.split(':', 1)[1]
            path = path.replace('\\', '/')
            match = _url_slash_drive_re.match(path)
            if match:
                path = match.group(1) + ':' + path.split('|', 1)[1]
            path = urllib.unquote(path)
            if path.startswith('/'):
                path = '/' + path.lstrip('/')
            url = path
        else:
            ## FIXME: catch some errors
            resp = urllib2.urlopen(url)
            return geturl(resp), resp.read()
    f = open(url)
    content = f.read()
    f.close()
    return url, content


_scheme_re = re.compile(r'^(http|https|file):', re.I)
_url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)


# Insurance against "creative" interpretation of the RFC:
# http://bugs.python.org/issue8732
def urlopen(url):
    if isinstance(url, basestring):
        url = urllib2.Request(url, headers={'Accept-encoding': 'identity'})
    return urllib2.urlopen(url)


def is_url(name):
    """Returns true if the name looks like a URL"""
    from pip.vcs import vcs
    if ':' not in name:
        return False
    scheme = name.split(':', 1)[0].lower()
    return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes


def url_to_path(url):
    """
    Convert a file: URL to a path.
    """
    assert url.startswith('file:'), (
        "You can only turn file: urls into filenames (not %r)" % url)
    path = url[len('file:'):].lstrip('/')
    path = urllib.unquote(path)
    if _url_drive_re.match(path):
        path = path[0] + ':' + path[2:]
    else:
        path = '/' + path
    return path


_drive_re = re.compile('^([a-z]):', re.I)
_url_drive_re = re.compile('^([a-z])[:|]', re.I)


def path_to_url(path):
    """
    Convert a path to a file: URL.  The path will be made absolute.
    """
    path = os.path.normcase(os.path.abspath(path))
    if _drive_re.match(path):
        path = path[0] + '|' + path[2:]
    url = urllib.quote(path)
    url = url.replace(os.path.sep, '/')
    url = url.lstrip('/')
    return 'file:///' + url


def path_to_url2(path):
    """
    Convert a path to a file: URL.  The path will be made absolute and have
    quoted path parts.
    """
    path = os.path.normpath(os.path.abspath(path))
    drive, path = os.path.splitdrive(path)
    filepath = path.split(os.path.sep)
    url = '/'.join([urllib.quote(part) for part in filepath])
    if not drive:
        url = url.lstrip('/')
    return 'file:///' + drive + url


def geturl(urllib2_resp):
    """
    Use instead of urllib.addinfourl.geturl(), which appears to have
    some issues with dropping the double slash for certain schemes
    (e.g. file://).  This implementation is probably over-eager, as it
    always restores '://' if it is missing, and it appears some url
    schemata aren't always followed by '//' after the colon, but as
    far as I know pip doesn't need any of those.
    """
    url = urllib2_resp.geturl()
    scheme, rest = url.split(':', 1)
    if rest.startswith('//'):
        return url
    else:
        return '%s//%s' % (scheme, rest)


def is_archive_file(name):
    """Return True if `name` is a considered as an archive file."""
    archives = ('.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.pybundle')
    ext = splitext(name)[1].lower()
    if ext in archives:
        return True
    return False
using custom xmlrpclib.Transport (testability purposes) 2010-07-02 14:27:22 +02:00			`import xmlrpclib`
moved pip.util functions ``get_file_content``, ``urlopen``, ``is_url``, ``url_to_path``, ``path_to_url``, ``path_to_url2``, ``geturl`` and ``is_archive_file`` to pip.download 2010-07-02 16:53:07 +02:00			`import re`
			`import urllib`
			`import urllib2`
			`import os`
			`from pip.exceptions import InstallationError`
			`from pip.util import splitext`
using custom xmlrpclib.Transport (testability purposes) 2010-07-02 14:27:22 +02:00
			`xmlrpclib_transport = xmlrpclib.Transport()`
moved pip.util functions ``get_file_content``, ``urlopen``, ``is_url``, ``url_to_path``, ``path_to_url``, ``path_to_url2``, ``geturl`` and ``is_archive_file`` to pip.download 2010-07-02 16:53:07 +02:00

			`def get_file_content(url, comes_from=None):`
			`"""Gets the content of a file; it may be a filename, file: URL, or`
			`http: URL. Returns (location, content)"""`
			`match = _scheme_re.search(url)`
			`if match:`
			`scheme = match.group(1).lower()`
			`if (scheme == 'file' and comes_from`
			`and comes_from.startswith('http')):`
			`raise InstallationError(`
			`'Requirements file %s references URL %s, which is local'`
			`% (comes_from, url))`
			`if scheme == 'file':`
			`path = url.split(':', 1)[1]`
			`path = path.replace('\\', '/')`
			`match = _url_slash_drive_re.match(path)`
			`if match:`
			`path = match.group(1) + ':' + path.split('\|', 1)[1]`
			`path = urllib.unquote(path)`
			`if path.startswith('/'):`
			`path = '/' + path.lstrip('/')`
			`url = path`
			`else:`
			`## FIXME: catch some errors`
			`resp = urllib2.urlopen(url)`
			`return geturl(resp), resp.read()`
			`f = open(url)`
			`content = f.read()`
			`f.close()`
			`return url, content`


			`_scheme_re = re.compile(r'^(http\|https\|file):', re.I)`
			`_url_slash_drive_re = re.compile(r'/*([a-z])\\|', re.I)`


			`# Insurance against "creative" interpretation of the RFC:`
			`# http://bugs.python.org/issue8732`
			`def urlopen(url):`
			`if isinstance(url, basestring):`
			`url = urllib2.Request(url, headers={'Accept-encoding': 'identity'})`
			`return urllib2.urlopen(url)`


			`def is_url(name):`
			`"""Returns true if the name looks like a URL"""`
			`from pip.vcs import vcs`
			`if ':' not in name:`
			`return False`
			`scheme = name.split(':', 1)[0].lower()`
			`return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes`


			`def url_to_path(url):`
			`"""`
			`Convert a file: URL to a path.`
			`"""`
			`assert url.startswith('file:'), (`
			`"You can only turn file: urls into filenames (not %r)" % url)`
			`path = url[len('file:'):].lstrip('/')`
			`path = urllib.unquote(path)`
			`if _url_drive_re.match(path):`
			`path = path[0] + ':' + path[2:]`
			`else:`
			`path = '/' + path`
			`return path`


			`_drive_re = re.compile('^([a-z]):', re.I)`
			`_url_drive_re = re.compile('^([a-z])[:\|]', re.I)`


			`def path_to_url(path):`
			`"""`
			`Convert a path to a file: URL. The path will be made absolute.`
			`"""`
			`path = os.path.normcase(os.path.abspath(path))`
			`if _drive_re.match(path):`
			`path = path[0] + '\|' + path[2:]`
			`url = urllib.quote(path)`
			`url = url.replace(os.path.sep, '/')`
			`url = url.lstrip('/')`
			`return 'file:///' + url`


			`def path_to_url2(path):`
			`"""`
			`Convert a path to a file: URL. The path will be made absolute and have`
			`quoted path parts.`
			`"""`
			`path = os.path.normpath(os.path.abspath(path))`
			`drive, path = os.path.splitdrive(path)`
			`filepath = path.split(os.path.sep)`
			`url = '/'.join([urllib.quote(part) for part in filepath])`
			`if not drive:`
			`url = url.lstrip('/')`
			`return 'file:///' + drive + url`


			`def geturl(urllib2_resp):`
			`"""`
			`Use instead of urllib.addinfourl.geturl(), which appears to have`
			`some issues with dropping the double slash for certain schemes`
			`(e.g. file://). This implementation is probably over-eager, as it`
			`always restores '://' if it is missing, and it appears some url`
			`schemata aren't always followed by '//' after the colon, but as`
			`far as I know pip doesn't need any of those.`
			`"""`
			`url = urllib2_resp.geturl()`
			`scheme, rest = url.split(':', 1)`
			`if rest.startswith('//'):`
			`return url`
			`else:`
			`return '%s//%s' % (scheme, rest)`


			`def is_archive_file(name):`
			"""Return True if `name` is a considered as an archive file."""
			`archives = ('.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.pybundle')`
			`ext = splitext(name)[1].lower()`
			`if ext in archives:`
			`return True`
			`return False`