1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

Explicitly ignore rel='download' links while looking for html pages.

This way we avoid requesting archive headers just to see they're not
HTML pages.
This commit is contained in:
Maxime Rouyrre 2012-09-14 17:27:58 +02:00
parent 8c9a241fb2
commit 0bb9c31aea

View file

@ -250,7 +250,7 @@ class PackageFinder(object):
if page is None:
continue
done.append(page)
for link in page.rel_links():
for link in page.rel_links(rels=('homepage',)):
pending_queue.put(link)
_egg_fragment_re = re.compile(r'#egg=([^&]*)')
@ -534,8 +534,8 @@ class HTMLPage(object):
url = self.clean_link(urlparse.urljoin(self.base_url, url))
yield Link(url, self)
def rel_links(self):
for url in self.explicit_rel_links():
def rel_links(self, rels=('homepage', 'download')):
for url in self.explicit_rel_links(rels):
yield url
for url in self.scraped_rel_links():
yield url