mirror of
https://github.com/pypa/pip
synced 2023-12-13 21:30:23 +01:00
[svn r21196] quote unsafe characters in links
This commit is contained in:
parent
5c7b318f88
commit
c1c84a4c97
2 changed files with 17 additions and 3 deletions
|
@ -13,6 +13,9 @@ svn trunk
|
|||
* Fixed Windows problem with putting the install record in the right
|
||||
place, and generating the ``pip`` script with Setuptools.
|
||||
|
||||
* Download links that include embedded spaces or other unsafe
|
||||
characters (those characters get %-encoded).
|
||||
|
||||
0.2
|
||||
---
|
||||
|
||||
|
|
17
pip.py
17
pip.py
|
@ -2071,7 +2071,8 @@ class HTMLPage(object):
|
|||
"""Yields all links in the page"""
|
||||
for match in self._href_re.finditer(self.content):
|
||||
url = match.group(1) or match.group(2) or match.group(3)
|
||||
yield Link(urlparse.urljoin(self.url, url), self)
|
||||
url = self.clean_link(urlparse.urljoin(self.url, url))
|
||||
yield Link(url, self)
|
||||
|
||||
def rel_links(self):
|
||||
for url in self.explicit_rel_links():
|
||||
|
@ -2092,7 +2093,8 @@ class HTMLPage(object):
|
|||
if not match:
|
||||
continue
|
||||
url = match.group(1) or match.group(2) or match.group(3)
|
||||
yield Link(urlparse.urljoin(self.url, url), self)
|
||||
url = self.clean_link(urlparse.urljoin(self.url, url))
|
||||
yield Link(url, self)
|
||||
|
||||
def scraped_rel_links(self):
|
||||
for regex in (self._homepage_re, self._download_re):
|
||||
|
@ -2105,9 +2107,18 @@ class HTMLPage(object):
|
|||
url = match.group(1) or match.group(2) or match.group(3)
|
||||
if not url:
|
||||
continue
|
||||
url = urlparse.urljoin(self.url, url)
|
||||
url = self.clean_link(urlparse.urljoin(self.url, url))
|
||||
yield Link(url, self)
|
||||
|
||||
_clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
|
||||
|
||||
def clean_link(self, url):
|
||||
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
|
||||
the link, it will be rewritten to %20 (while not over-quoting
|
||||
% or other characters)."""
|
||||
return self._clean_re.sub(
|
||||
lambda match: '%%%2x' % ord(match.group(0)), url)
|
||||
|
||||
class PageCache(object):
|
||||
"""Cache of HTML pages"""
|
||||
|
||||
|
|
Loading…
Reference in a new issue