pip/pip/download.py

136 lines
4.0 KiB
Python
Raw Normal View History

import xmlrpclib
import re
import urllib
import urllib2
import os
from pip.exceptions import InstallationError
from pip.util import splitext
xmlrpclib_transport = xmlrpclib.Transport()
def get_file_content(url, comes_from=None):
"""Gets the content of a file; it may be a filename, file: URL, or
http: URL. Returns (location, content)"""
match = _scheme_re.search(url)
if match:
scheme = match.group(1).lower()
if (scheme == 'file' and comes_from
and comes_from.startswith('http')):
raise InstallationError(
'Requirements file %s references URL %s, which is local'
% (comes_from, url))
if scheme == 'file':
path = url.split(':', 1)[1]
path = path.replace('\\', '/')
match = _url_slash_drive_re.match(path)
if match:
path = match.group(1) + ':' + path.split('|', 1)[1]
path = urllib.unquote(path)
if path.startswith('/'):
path = '/' + path.lstrip('/')
url = path
else:
## FIXME: catch some errors
resp = urllib2.urlopen(url)
return geturl(resp), resp.read()
f = open(url)
content = f.read()
f.close()
return url, content
_scheme_re = re.compile(r'^(http|https|file):', re.I)
_url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)
# Insurance against "creative" interpretation of the RFC:
# http://bugs.python.org/issue8732
def urlopen(url):
if isinstance(url, basestring):
url = urllib2.Request(url, headers={'Accept-encoding': 'identity'})
return urllib2.urlopen(url)
def is_url(name):
"""Returns true if the name looks like a URL"""
from pip.vcs import vcs
if ':' not in name:
return False
scheme = name.split(':', 1)[0].lower()
return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes
def url_to_path(url):
"""
Convert a file: URL to a path.
"""
assert url.startswith('file:'), (
"You can only turn file: urls into filenames (not %r)" % url)
path = url[len('file:'):].lstrip('/')
path = urllib.unquote(path)
if _url_drive_re.match(path):
path = path[0] + ':' + path[2:]
else:
path = '/' + path
return path
_drive_re = re.compile('^([a-z]):', re.I)
_url_drive_re = re.compile('^([a-z])[:|]', re.I)
def path_to_url(path):
"""
Convert a path to a file: URL. The path will be made absolute.
"""
path = os.path.normcase(os.path.abspath(path))
if _drive_re.match(path):
path = path[0] + '|' + path[2:]
url = urllib.quote(path)
url = url.replace(os.path.sep, '/')
url = url.lstrip('/')
return 'file:///' + url
def path_to_url2(path):
"""
Convert a path to a file: URL. The path will be made absolute and have
quoted path parts.
"""
path = os.path.normpath(os.path.abspath(path))
drive, path = os.path.splitdrive(path)
filepath = path.split(os.path.sep)
url = '/'.join([urllib.quote(part) for part in filepath])
if not drive:
url = url.lstrip('/')
return 'file:///' + drive + url
def geturl(urllib2_resp):
"""
Use instead of urllib.addinfourl.geturl(), which appears to have
some issues with dropping the double slash for certain schemes
(e.g. file://). This implementation is probably over-eager, as it
always restores '://' if it is missing, and it appears some url
schemata aren't always followed by '//' after the colon, but as
far as I know pip doesn't need any of those.
"""
url = urllib2_resp.geturl()
scheme, rest = url.split(':', 1)
if rest.startswith('//'):
return url
else:
return '%s//%s' % (scheme, rest)
def is_archive_file(name):
"""Return True if `name` is a considered as an archive file."""
archives = ('.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.pybundle')
ext = splitext(name)[1].lower()
if ext in archives:
return True
return False