1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

Fix parsing of JSON index dist-info-metadata values

This commit is contained in:
Paul Moore 2023-06-07 20:58:40 +01:00
parent 72a32e9907
commit 6c3db098ff
2 changed files with 89 additions and 45 deletions

View file

@ -69,18 +69,6 @@ class LinkHash:
def __post_init__(self) -> None:
assert self.name in _SUPPORTED_HASHES
@classmethod
def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]:
"""Parse a PEP 658 data-dist-info-metadata hash."""
if dist_info_metadata == "true":
return None
name, sep, value = dist_info_metadata.partition("=")
if not sep:
return None
if name not in _SUPPORTED_HASHES:
return None
return cls(name=name, value=value)
@classmethod
@functools.lru_cache(maxsize=None)
def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]:
@ -107,6 +95,20 @@ class LinkHash:
return hashes.is_hash_allowed(self.name, hex_digest=self.value)
@dataclass(frozen=True)
class MetadataFile:
"""Information about a core metadata file associated with a distribution."""
hashes: Optional[dict[str, str]]
# TODO: Do we care about stripping out unsupported hash methods?
def __init__(self, hashes: Optional[dict[str, str]]):
if hashes:
hashes = {n: v for n, v in hashes.items() if n in _SUPPORTED_HASHES}
# We need to use this as this is a frozen dataclass
object.__setattr__(self, "hashes", hashes)
def _clean_url_path_part(part: str) -> str:
"""
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
@ -179,7 +181,7 @@ class Link(KeyBasedCompareMixin):
"comes_from",
"requires_python",
"yanked_reason",
"dist_info_metadata",
"metadata_file_data",
"cache_link_parsing",
"egg_fragment",
]
@ -190,7 +192,7 @@ class Link(KeyBasedCompareMixin):
comes_from: Optional[Union[str, "IndexContent"]] = None,
requires_python: Optional[str] = None,
yanked_reason: Optional[str] = None,
dist_info_metadata: Optional[str] = None,
metadata_file_data: Optional[MetadataFile] = None,
cache_link_parsing: bool = True,
hashes: Optional[Mapping[str, str]] = None,
) -> None:
@ -208,11 +210,10 @@ class Link(KeyBasedCompareMixin):
a simple repository HTML link. If the file has been yanked but
no reason was provided, this should be the empty string. See
PEP 592 for more information and the specification.
:param dist_info_metadata: the metadata attached to the file, or None if no such
metadata is provided. This is the value of the "data-dist-info-metadata"
attribute, if present, in a simple repository HTML link. This may be parsed
into its own `Link` by `self.metadata_link()`. See PEP 658 for more
information and the specification.
:param metadata_file_data: the metadata attached to the file, or None if
no such metadata is provided. This argument, if not None, indicates
that a separate metadata file exists, and also optionally supplies
hashes for that file.
:param cache_link_parsing: A flag that is used elsewhere to determine
whether resources retrieved from this link should be cached. PyPI
URLs should generally have this set to False, for example.
@ -220,6 +221,10 @@ class Link(KeyBasedCompareMixin):
determine the validity of a download.
"""
# The comes_from, requires_python, and metadata_file_data arguments are
# only used by classmethods of this class, and are not used in client
# code directly.
# url can be a UNC windows share
if url.startswith("\\\\"):
url = path_to_url(url)
@ -239,7 +244,7 @@ class Link(KeyBasedCompareMixin):
self.comes_from = comes_from
self.requires_python = requires_python if requires_python else None
self.yanked_reason = yanked_reason
self.dist_info_metadata = dist_info_metadata
self.metadata_file_data = metadata_file_data
super().__init__(key=url, defining_class=Link)
@ -262,9 +267,20 @@ class Link(KeyBasedCompareMixin):
url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url))
pyrequire = file_data.get("requires-python")
yanked_reason = file_data.get("yanked")
dist_info_metadata = file_data.get("dist-info-metadata")
hashes = file_data.get("hashes", {})
# The dist-info-metadata value may be a boolean, or a dict of hashes.
metadata_info = file_data.get("dist-info-metadata", False)
if isinstance(metadata_info, dict):
# The file exists, and hashes have been supplied
metadata_file_data = MetadataFile(metadata_info)
elif metadata_info:
# The file exists, but there are no hashes
metadata_file_data = MetadataFile(None)
else:
# The file does not exist
metadata_file_data = None
# The Link.yanked_reason expects an empty string instead of a boolean.
if yanked_reason and not isinstance(yanked_reason, str):
yanked_reason = ""
@ -278,7 +294,7 @@ class Link(KeyBasedCompareMixin):
requires_python=pyrequire,
yanked_reason=yanked_reason,
hashes=hashes,
dist_info_metadata=dist_info_metadata,
metadata_file_data=metadata_file_data,
)
@classmethod
@ -298,14 +314,35 @@ class Link(KeyBasedCompareMixin):
url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
pyrequire = anchor_attribs.get("data-requires-python")
yanked_reason = anchor_attribs.get("data-yanked")
dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")
# The dist-info-metadata value may be the string "true", or a string of
# the form "hashname=hashval"
metadata_info = anchor_attribs.get("data-dist-info-metadata")
if metadata_info == "true":
# The file exists, but there are no hashes
metadata_file_data = MetadataFile(None)
elif metadata_info is None:
# The file does not exist
metadata_file_data = None
else:
# The file exists, and hashes have been supplied
hashname, sep, hashval = metadata_info.partition("=")
if sep == "=":
metadata_file_data = MetadataFile({hashname: hashval})
else:
# Error - data is wrong. Treat as no hashes supplied.
logger.debug(
"Index returned invalid data-dist-info-metadata value: %s",
metadata_info,
)
metadata_file_data = MetadataFile(None)
return cls(
url,
comes_from=page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
dist_info_metadata=dist_info_metadata,
metadata_file_data=metadata_file_data,
)
def __str__(self) -> str:
@ -407,17 +444,13 @@ class Link(KeyBasedCompareMixin):
return match.group(1)
def metadata_link(self) -> Optional["Link"]:
"""Implementation of PEP 658 parsing."""
# Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
# from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
# gets set.
if self.dist_info_metadata is None:
"""Return a link to the associated core metadata file (if any)."""
if self.metadata_file_data is None:
return None
metadata_url = f"{self.url_without_fragment}.metadata"
metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata)
if metadata_link_hash is None:
if self.metadata_file_data.hashes is None:
return Link(metadata_url)
return Link(metadata_url, hashes=metadata_link_hash.as_dict())
return Link(metadata_url, hashes=self.metadata_file_data.hashes)
def as_hashes(self) -> Hashes:
return Hashes({k: [v] for k, v in self._hashes.items()})

View file

@ -30,6 +30,7 @@ from pip._internal.models.index import PyPI
from pip._internal.models.link import (
Link,
LinkHash,
MetadataFile,
_clean_url_path,
_ensure_quoted_url,
)
@ -527,7 +528,7 @@ def test_parse_links_json() -> None:
requires_python=">=3.7",
yanked_reason=None,
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
dist_info_metadata="sha512=aabdd41",
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
),
]
@ -603,12 +604,12 @@ _pkg1_requirement = Requirement("pkg1==1.0")
),
],
)
def test_parse_links__dist_info_metadata(
def test_parse_links__metadata_file_data(
anchor_html: str,
expected: Optional[str],
hashes: Dict[str, str],
) -> None:
link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
link = _test_parse_links_data_attribute(anchor_html, "metadata_file_data", expected)
assert link._hashes == hashes
@ -1080,17 +1081,27 @@ def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
@pytest.mark.parametrize(
"dist_info_metadata, result",
"metadata_attrib, expected",
[
("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")),
("sha256=", LinkHash("sha256", "")),
("sha500=aa113592bbe", None),
("true", None),
("", None),
("aa113592bbe", None),
("sha256=aa113592bbe", MetadataFile({"sha256": "aa113592bbe"})),
("sha256=", MetadataFile({"sha256": ""})),
("sha500=aa113592bbe", MetadataFile({})),
("true", MetadataFile(None)),
(None, None),
# TODO: Are these correct?
("", MetadataFile(None)),
("aa113592bbe", MetadataFile(None)),
],
)
def test_pep658_hash_parsing(
dist_info_metadata: str, result: Optional[LinkHash]
def test_metadata_file_info_parsing_html(
metadata_attrib: str, expected: Optional[MetadataFile]
) -> None:
assert LinkHash.parse_pep658_hash(dist_info_metadata) == result
attribs: Dict[str, Optional[str]] = {
"href": "something",
"data-dist-info-metadata": metadata_attrib,
}
page_url = "dummy_for_comes_from"
base_url = "https://index.url/simple"
link = Link.from_element(attribs, page_url, base_url)
assert link is not None and link.metadata_file_data == expected
# TODO: Do we need to do something for the JSON data?