mirror of https://github.com/pypa/pip
Differentiate between safe and unsafe urls where possible
* Links and HTMLPages know if they are "trusted" * File Links know if they are safe or not * A "Safe" file Link comes from a trusted Link/HTMLPage and has a hash allowing verification of the download * Adds a --allow-unsafe PACKAGE argument to allow unsafe files on a per package basis * Optimizes scraping external sites by short circuiting if the current trust rules won't allow using it's files anyways
This commit is contained in:
parent
9ccd5f0bb3
commit
cc46cdc8b3
|
@ -79,6 +79,16 @@ no_allow_external = make_option(
|
|||
help="Disallow the installation of externally hosted files",
|
||||
)
|
||||
|
||||
allow_unsafe = make_option(
|
||||
"--allow-unsafe",
|
||||
dest="allow_unsafe",
|
||||
action="append",
|
||||
default=[],
|
||||
metavar="PACKAGE",
|
||||
help="Allow the installation of unsafe and unverifiable files for a "
|
||||
"package",
|
||||
)
|
||||
|
||||
requirements = make_option(
|
||||
'-r', '--requirement',
|
||||
dest='requirements',
|
||||
|
@ -157,5 +167,6 @@ index_group = {
|
|||
mirrors,
|
||||
allow_external,
|
||||
no_allow_external,
|
||||
allow_unsafe,
|
||||
]
|
||||
}
|
||||
|
|
|
@ -164,6 +164,7 @@ class InstallCommand(Command):
|
|||
mirrors=options.mirrors,
|
||||
use_wheel=options.use_wheel,
|
||||
allow_external=options.allow_external,
|
||||
allow_unsafe=options.allow_unsafe,
|
||||
)
|
||||
|
||||
def run(self, options, args):
|
||||
|
|
121
pip/index.py
121
pip/index.py
|
@ -48,7 +48,7 @@ class PackageFinder(object):
|
|||
|
||||
def __init__(self, find_links, index_urls,
|
||||
use_mirrors=False, mirrors=None, main_mirror_url=None,
|
||||
use_wheel=False, allow_external=False):
|
||||
use_wheel=False, allow_external=False, allow_unsafe=[]):
|
||||
self.find_links = find_links
|
||||
self.index_urls = index_urls
|
||||
self.dependency_links = []
|
||||
|
@ -62,12 +62,23 @@ class PackageFinder(object):
|
|||
self.mirror_urls = []
|
||||
self.use_wheel = use_wheel
|
||||
|
||||
# Do we allow (safe and verifiable) externally hosted files?
|
||||
self.allow_external = allow_external
|
||||
|
||||
# Which names are allowed to install unsafe and unverifiable files?
|
||||
self.allow_unsafe = set(normalize_name(n) for n in allow_unsafe)
|
||||
|
||||
# Do we allow unsafe and unverifiable files?
|
||||
self.allow_all_unsafe = False
|
||||
|
||||
# Stores if we ignored any external links so that we can instruct
|
||||
# end users how to install them if no distributions are available
|
||||
self.need_warn_external = False
|
||||
|
||||
# Stores if we ignored any unsafe links so that we can instruct
|
||||
# end users how to install them if no distributions are available
|
||||
self.need_warn_unsafe = False
|
||||
|
||||
@property
|
||||
def use_wheel(self):
|
||||
return self._use_wheel
|
||||
|
@ -177,11 +188,11 @@ class PackageFinder(object):
|
|||
main_index_url = None
|
||||
if self.index_urls:
|
||||
# Check that we have the url_name correctly spelled:
|
||||
main_index_url = Link(mkurl_pypi_url(self.index_urls[0]))
|
||||
main_index_url = Link(mkurl_pypi_url(self.index_urls[0]), trusted=True)
|
||||
# This will also cache the page, so it's okay that we get it again later:
|
||||
page = self._get_page(main_index_url, req)
|
||||
if page is None:
|
||||
url_name = self._find_url_name(Link(self.index_urls[0]), url_name, req) or req.url_name
|
||||
url_name = self._find_url_name(Link(self.index_urls[0], trusted=True), url_name, req) or req.url_name
|
||||
|
||||
# Combine index URLs with mirror URLs here to allow
|
||||
# adding more index URLs from requirements files
|
||||
|
@ -193,22 +204,31 @@ class PackageFinder(object):
|
|||
for url in all_index_urls] + self.find_links
|
||||
else:
|
||||
locations = list(self.find_links)
|
||||
locations.extend(self.dependency_links)
|
||||
for version in req.absolute_versions:
|
||||
if url_name is not None and main_index_url is not None:
|
||||
locations = [
|
||||
posixpath.join(main_index_url.url, version)] + locations
|
||||
|
||||
file_locations, url_locations = self._sort_locations(locations)
|
||||
_flocations, _ulocations = self._sort_locations(self.dependency_links)
|
||||
file_locations.extend(_flocations)
|
||||
|
||||
# We trust every url that the user has given us whether it was given
|
||||
# via --index-url, --user-mirrors/--mirror, or --find-links or a
|
||||
# default option thereof
|
||||
locations = [Link(url, trusted=True) for url in url_locations]
|
||||
|
||||
# We explicitly do not trust links that came from dependency_links
|
||||
locations.extend([Link(url) for url in _ulocations])
|
||||
|
||||
locations = [Link(url) for url in url_locations]
|
||||
logger.debug('URLs to search for versions for %s:' % req)
|
||||
for location in locations:
|
||||
logger.debug('* %s' % location)
|
||||
found_versions = []
|
||||
found_versions.extend(
|
||||
self._package_versions(
|
||||
[Link(url, '-f') for url in self.find_links], req.name.lower()))
|
||||
# We trust every directly linked archive in find_links
|
||||
[Link(url, '-f', trusted=True) for url in self.find_links], req.name.lower()))
|
||||
page_versions = []
|
||||
for page in self._get_pages(locations, req):
|
||||
logger.debug('Analyzing links from page %s' % page.url)
|
||||
|
@ -230,6 +250,10 @@ class PackageFinder(object):
|
|||
logger.warn("Some externally hosted files were ignored (use "
|
||||
"--allow-external to allow).")
|
||||
|
||||
if self.need_warn_unsafe:
|
||||
logger.warn("Some unsafe and unverifiable files were ignored "
|
||||
"(use --allow-unsafe %s to allow)." % req.name)
|
||||
|
||||
raise DistributionNotFound('No distributions at all found for %s' % req)
|
||||
installed_version = []
|
||||
if req.satisfied_by is not None:
|
||||
|
@ -267,6 +291,10 @@ class PackageFinder(object):
|
|||
logger.warn("Some externally hosted files were ignored (use "
|
||||
"--allow-external to allow).")
|
||||
|
||||
if self.need_warn_unsafe:
|
||||
logger.warn("Some unsafe and unverifiable files were ignored "
|
||||
"(use --allow-unsafe %s to allow)." % req.name)
|
||||
|
||||
raise DistributionNotFound('No distributions matching the version for %s' % req)
|
||||
if applicable_versions[0][1] is InfLink:
|
||||
# We have an existing version, and its the best version
|
||||
|
@ -286,6 +314,12 @@ class PackageFinder(object):
|
|||
"versions of pip will default to disallowing "
|
||||
"externally hosted files.")
|
||||
|
||||
if (selected_version.verifiable is not None
|
||||
and not selected_version.verifiable):
|
||||
logger.warn("You are installing a potentially unsafe and "
|
||||
"unverifiable file. Future versions of pip will "
|
||||
"default to disallowing unsafe files.")
|
||||
|
||||
return selected_version
|
||||
|
||||
|
||||
|
@ -342,6 +376,22 @@ class PackageFinder(object):
|
|||
continue
|
||||
done.append(page)
|
||||
for link in page.rel_links():
|
||||
if not self.allow_external:
|
||||
self.need_warn_external = True
|
||||
logger.debug("Not searching %s for files because external "
|
||||
"urls are disallowed." % link)
|
||||
continue
|
||||
|
||||
if (link.trusted is not None
|
||||
and not link.trusted
|
||||
and not normalize_name(req.name).lower() in self.allow_unsafe
|
||||
and not self.allow_all_unsafe): # TODO: Remove after release
|
||||
logger.debug("Not searching %s for urls, it is an "
|
||||
"untrusted link and cannot produce safe or "
|
||||
"verifiable files." % link)
|
||||
self.need_warn_unsafe = True
|
||||
continue
|
||||
|
||||
pending_queue.put(link)
|
||||
|
||||
_egg_fragment_re = re.compile(r'#egg=([^&]*)')
|
||||
|
@ -424,6 +474,18 @@ class PackageFinder(object):
|
|||
self.need_warn_external = True
|
||||
return []
|
||||
|
||||
if (link.verifiable is not None
|
||||
and not link.verifiable
|
||||
and not normalize_name(search_name).lower() in self.allow_unsafe
|
||||
and not self.allow_all_unsafe): # TODO: Remove after release
|
||||
# We have a link that we are sure we cannot verify it's integrity,
|
||||
# so we should skip it unless we are allowing unsafe installs
|
||||
# for this requirement.
|
||||
logger.debug("Skipping %s because it is an unsafe and unverifiable"
|
||||
" file." % link)
|
||||
self.need_warn_unsafe = True
|
||||
return []
|
||||
|
||||
match = self._py_version_re.search(version)
|
||||
if match:
|
||||
version = version[:match.start()]
|
||||
|
@ -514,11 +576,12 @@ class HTMLPage(object):
|
|||
_download_re = re.compile(r'<th>\s*download\s+url', re.I)
|
||||
_href_re = re.compile('href=(?:"([^"]*)"|\'([^\']*)\'|([^>\\s\\n]*))', re.I|re.S)
|
||||
|
||||
def __init__(self, content, url, headers=None):
|
||||
def __init__(self, content, url, headers=None, trusted=None):
|
||||
self.content = content
|
||||
self.parsed = html5lib.parse(self.content, namespaceHTMLElements=False)
|
||||
self.url = url
|
||||
self.headers = headers
|
||||
self.trusted = trusted
|
||||
|
||||
def __str__(self):
|
||||
return self.url
|
||||
|
@ -595,7 +658,7 @@ class HTMLPage(object):
|
|||
cache.set_is_archive(url)
|
||||
return None
|
||||
|
||||
inst = cls(u(contents), real_url, headers)
|
||||
inst = cls(u(contents), real_url, headers, trusted=link.trusted)
|
||||
except (HTTPError, URLError, socket.timeout, socket.error, OSError, WindowsError):
|
||||
e = sys.exc_info()[1]
|
||||
desc = str(e)
|
||||
|
@ -712,7 +775,7 @@ class HTMLPage(object):
|
|||
if found_rels & rels:
|
||||
href = anchor.get("href")
|
||||
url = self.clean_link(urlparse.urljoin(self.base_url, href))
|
||||
yield Link(url, self)
|
||||
yield Link(url, self, trusted=False)
|
||||
|
||||
def scraped_rel_links(self):
|
||||
# Can we get rid of this horrible horrible method?
|
||||
|
@ -727,7 +790,7 @@ class HTMLPage(object):
|
|||
if not url:
|
||||
continue
|
||||
url = self.clean_link(urlparse.urljoin(self.base_url, url))
|
||||
yield Link(url, self)
|
||||
yield Link(url, self, trusted=False)
|
||||
|
||||
_clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
|
||||
|
||||
|
@ -741,10 +804,11 @@ class HTMLPage(object):
|
|||
|
||||
class Link(object):
|
||||
|
||||
def __init__(self, url, comes_from=None, internal=None):
|
||||
def __init__(self, url, comes_from=None, internal=None, trusted=None):
|
||||
self.url = url
|
||||
self.comes_from = comes_from
|
||||
self.internal = internal
|
||||
self.trusted = trusted
|
||||
|
||||
# Set whether it's a wheel
|
||||
self.wheel = None
|
||||
|
@ -833,6 +897,41 @@ class Link(object):
|
|||
def show_url(self):
|
||||
return posixpath.basename(self.url.split('#', 1)[0].split('?', 1)[0])
|
||||
|
||||
@property
|
||||
def verifiable(self):
|
||||
"""
|
||||
Returns True if this link can be verified after download, False if it
|
||||
cannot, and None if we cannot determine.
|
||||
"""
|
||||
trusted = self.trusted or getattr(self.comes_from, "trusted", None)
|
||||
if trusted is not None and trusted:
|
||||
# This link came from a trusted source. It *may* be verifiable but
|
||||
# first we need to see if this page is operating under the new
|
||||
# API version.
|
||||
try:
|
||||
api_version = getattr(self.comes_from, "api_version", None)
|
||||
api_version = int(api_version)
|
||||
except (ValueError, TypeError):
|
||||
api_version = None
|
||||
|
||||
if api_version is None or api_version <= 1:
|
||||
# This link is either trusted, or it came from a trusted,
|
||||
# however it is not operating under the API version 2 so
|
||||
# we can't make any claims about if it's safe or not
|
||||
return
|
||||
|
||||
if self.hash:
|
||||
# This link came from a trusted source and it has a hash, so we
|
||||
# can consider it safe.
|
||||
return True
|
||||
else:
|
||||
# This link came from a trusted source, using the new API
|
||||
# version, and it does not have a hash. It is NOT verifiable
|
||||
return False
|
||||
elif trusted is not None:
|
||||
# This link came from an untrusted source and we cannot trust it
|
||||
return False
|
||||
|
||||
#An "Infinite Link" that compares greater than other links
|
||||
InfLink = Link(Inf) #this object is not currently used as a sortable
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ from pip.util import (display_path, rmtree, ask, ask_path_exists, backup_dir,
|
|||
is_installable_dir, is_local, dist_is_local,
|
||||
dist_in_usersite, dist_in_site_packages, renames,
|
||||
normalize_path, egg_link_path, make_path_relative,
|
||||
call_subprocess, is_prerelease)
|
||||
call_subprocess, is_prerelease, normalize_name)
|
||||
from pip.backwardcompat import (urlparse, urllib, uses_pycache,
|
||||
ConfigParser, string_types, HTTPError,
|
||||
get_python_version, b)
|
||||
|
@ -1399,6 +1399,9 @@ def parse_requirements(filename, finder=None, comes_from=None, options=None):
|
|||
finder.allow_external = True
|
||||
elif line.startswith("--no-allow-external"):
|
||||
finder.allow_external = False
|
||||
elif line.startswith("--allow-unsafe"):
|
||||
line = line[len("--allow-unsafe"):].strip().lstrip("=")
|
||||
finder.allow_unsafe |= set([normalize_name(line).lower()])
|
||||
else:
|
||||
comes_from = '-r %s (line %s)' % (filename, line_number)
|
||||
if line.startswith('-e') or line.startswith('--editable'):
|
||||
|
|
Loading…
Reference in New Issue