mirror of
https://github.com/pypa/pip
synced 2023-12-13 21:30:23 +01:00
Fix parsing of JSON index dist-info-metadata values
This commit is contained in:
parent
72a32e9907
commit
6c3db098ff
|
@ -69,18 +69,6 @@ class LinkHash:
|
|||
def __post_init__(self) -> None:
|
||||
assert self.name in _SUPPORTED_HASHES
|
||||
|
||||
@classmethod
|
||||
def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]:
|
||||
"""Parse a PEP 658 data-dist-info-metadata hash."""
|
||||
if dist_info_metadata == "true":
|
||||
return None
|
||||
name, sep, value = dist_info_metadata.partition("=")
|
||||
if not sep:
|
||||
return None
|
||||
if name not in _SUPPORTED_HASHES:
|
||||
return None
|
||||
return cls(name=name, value=value)
|
||||
|
||||
@classmethod
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]:
|
||||
|
@ -107,6 +95,20 @@ class LinkHash:
|
|||
return hashes.is_hash_allowed(self.name, hex_digest=self.value)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MetadataFile:
|
||||
"""Information about a core metadata file associated with a distribution."""
|
||||
|
||||
hashes: Optional[dict[str, str]]
|
||||
|
||||
# TODO: Do we care about stripping out unsupported hash methods?
|
||||
def __init__(self, hashes: Optional[dict[str, str]]):
|
||||
if hashes:
|
||||
hashes = {n: v for n, v in hashes.items() if n in _SUPPORTED_HASHES}
|
||||
# We need to use this as this is a frozen dataclass
|
||||
object.__setattr__(self, "hashes", hashes)
|
||||
|
||||
|
||||
def _clean_url_path_part(part: str) -> str:
|
||||
"""
|
||||
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
|
||||
|
@ -179,7 +181,7 @@ class Link(KeyBasedCompareMixin):
|
|||
"comes_from",
|
||||
"requires_python",
|
||||
"yanked_reason",
|
||||
"dist_info_metadata",
|
||||
"metadata_file_data",
|
||||
"cache_link_parsing",
|
||||
"egg_fragment",
|
||||
]
|
||||
|
@ -190,7 +192,7 @@ class Link(KeyBasedCompareMixin):
|
|||
comes_from: Optional[Union[str, "IndexContent"]] = None,
|
||||
requires_python: Optional[str] = None,
|
||||
yanked_reason: Optional[str] = None,
|
||||
dist_info_metadata: Optional[str] = None,
|
||||
metadata_file_data: Optional[MetadataFile] = None,
|
||||
cache_link_parsing: bool = True,
|
||||
hashes: Optional[Mapping[str, str]] = None,
|
||||
) -> None:
|
||||
|
@ -208,11 +210,10 @@ class Link(KeyBasedCompareMixin):
|
|||
a simple repository HTML link. If the file has been yanked but
|
||||
no reason was provided, this should be the empty string. See
|
||||
PEP 592 for more information and the specification.
|
||||
:param dist_info_metadata: the metadata attached to the file, or None if no such
|
||||
metadata is provided. This is the value of the "data-dist-info-metadata"
|
||||
attribute, if present, in a simple repository HTML link. This may be parsed
|
||||
into its own `Link` by `self.metadata_link()`. See PEP 658 for more
|
||||
information and the specification.
|
||||
:param metadata_file_data: the metadata attached to the file, or None if
|
||||
no such metadata is provided. This argument, if not None, indicates
|
||||
that a separate metadata file exists, and also optionally supplies
|
||||
hashes for that file.
|
||||
:param cache_link_parsing: A flag that is used elsewhere to determine
|
||||
whether resources retrieved from this link should be cached. PyPI
|
||||
URLs should generally have this set to False, for example.
|
||||
|
@ -220,6 +221,10 @@ class Link(KeyBasedCompareMixin):
|
|||
determine the validity of a download.
|
||||
"""
|
||||
|
||||
# The comes_from, requires_python, and metadata_file_data arguments are
|
||||
# only used by classmethods of this class, and are not used in client
|
||||
# code directly.
|
||||
|
||||
# url can be a UNC windows share
|
||||
if url.startswith("\\\\"):
|
||||
url = path_to_url(url)
|
||||
|
@ -239,7 +244,7 @@ class Link(KeyBasedCompareMixin):
|
|||
self.comes_from = comes_from
|
||||
self.requires_python = requires_python if requires_python else None
|
||||
self.yanked_reason = yanked_reason
|
||||
self.dist_info_metadata = dist_info_metadata
|
||||
self.metadata_file_data = metadata_file_data
|
||||
|
||||
super().__init__(key=url, defining_class=Link)
|
||||
|
||||
|
@ -262,9 +267,20 @@ class Link(KeyBasedCompareMixin):
|
|||
url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url))
|
||||
pyrequire = file_data.get("requires-python")
|
||||
yanked_reason = file_data.get("yanked")
|
||||
dist_info_metadata = file_data.get("dist-info-metadata")
|
||||
hashes = file_data.get("hashes", {})
|
||||
|
||||
# The dist-info-metadata value may be a boolean, or a dict of hashes.
|
||||
metadata_info = file_data.get("dist-info-metadata", False)
|
||||
if isinstance(metadata_info, dict):
|
||||
# The file exists, and hashes have been supplied
|
||||
metadata_file_data = MetadataFile(metadata_info)
|
||||
elif metadata_info:
|
||||
# The file exists, but there are no hashes
|
||||
metadata_file_data = MetadataFile(None)
|
||||
else:
|
||||
# The file does not exist
|
||||
metadata_file_data = None
|
||||
|
||||
# The Link.yanked_reason expects an empty string instead of a boolean.
|
||||
if yanked_reason and not isinstance(yanked_reason, str):
|
||||
yanked_reason = ""
|
||||
|
@ -278,7 +294,7 @@ class Link(KeyBasedCompareMixin):
|
|||
requires_python=pyrequire,
|
||||
yanked_reason=yanked_reason,
|
||||
hashes=hashes,
|
||||
dist_info_metadata=dist_info_metadata,
|
||||
metadata_file_data=metadata_file_data,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
@ -298,14 +314,35 @@ class Link(KeyBasedCompareMixin):
|
|||
url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
|
||||
pyrequire = anchor_attribs.get("data-requires-python")
|
||||
yanked_reason = anchor_attribs.get("data-yanked")
|
||||
dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")
|
||||
|
||||
# The dist-info-metadata value may be the string "true", or a string of
|
||||
# the form "hashname=hashval"
|
||||
metadata_info = anchor_attribs.get("data-dist-info-metadata")
|
||||
if metadata_info == "true":
|
||||
# The file exists, but there are no hashes
|
||||
metadata_file_data = MetadataFile(None)
|
||||
elif metadata_info is None:
|
||||
# The file does not exist
|
||||
metadata_file_data = None
|
||||
else:
|
||||
# The file exists, and hashes have been supplied
|
||||
hashname, sep, hashval = metadata_info.partition("=")
|
||||
if sep == "=":
|
||||
metadata_file_data = MetadataFile({hashname: hashval})
|
||||
else:
|
||||
# Error - data is wrong. Treat as no hashes supplied.
|
||||
logger.debug(
|
||||
"Index returned invalid data-dist-info-metadata value: %s",
|
||||
metadata_info,
|
||||
)
|
||||
metadata_file_data = MetadataFile(None)
|
||||
|
||||
return cls(
|
||||
url,
|
||||
comes_from=page_url,
|
||||
requires_python=pyrequire,
|
||||
yanked_reason=yanked_reason,
|
||||
dist_info_metadata=dist_info_metadata,
|
||||
metadata_file_data=metadata_file_data,
|
||||
)
|
||||
|
||||
def __str__(self) -> str:
|
||||
|
@ -407,17 +444,13 @@ class Link(KeyBasedCompareMixin):
|
|||
return match.group(1)
|
||||
|
||||
def metadata_link(self) -> Optional["Link"]:
|
||||
"""Implementation of PEP 658 parsing."""
|
||||
# Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
|
||||
# from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
|
||||
# gets set.
|
||||
if self.dist_info_metadata is None:
|
||||
"""Return a link to the associated core metadata file (if any)."""
|
||||
if self.metadata_file_data is None:
|
||||
return None
|
||||
metadata_url = f"{self.url_without_fragment}.metadata"
|
||||
metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata)
|
||||
if metadata_link_hash is None:
|
||||
if self.metadata_file_data.hashes is None:
|
||||
return Link(metadata_url)
|
||||
return Link(metadata_url, hashes=metadata_link_hash.as_dict())
|
||||
return Link(metadata_url, hashes=self.metadata_file_data.hashes)
|
||||
|
||||
def as_hashes(self) -> Hashes:
|
||||
return Hashes({k: [v] for k, v in self._hashes.items()})
|
||||
|
|
|
@ -30,6 +30,7 @@ from pip._internal.models.index import PyPI
|
|||
from pip._internal.models.link import (
|
||||
Link,
|
||||
LinkHash,
|
||||
MetadataFile,
|
||||
_clean_url_path,
|
||||
_ensure_quoted_url,
|
||||
)
|
||||
|
@ -527,7 +528,7 @@ def test_parse_links_json() -> None:
|
|||
requires_python=">=3.7",
|
||||
yanked_reason=None,
|
||||
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
|
||||
dist_info_metadata="sha512=aabdd41",
|
||||
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -603,12 +604,12 @@ _pkg1_requirement = Requirement("pkg1==1.0")
|
|||
),
|
||||
],
|
||||
)
|
||||
def test_parse_links__dist_info_metadata(
|
||||
def test_parse_links__metadata_file_data(
|
||||
anchor_html: str,
|
||||
expected: Optional[str],
|
||||
hashes: Dict[str, str],
|
||||
) -> None:
|
||||
link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
|
||||
link = _test_parse_links_data_attribute(anchor_html, "metadata_file_data", expected)
|
||||
assert link._hashes == hashes
|
||||
|
||||
|
||||
|
@ -1080,17 +1081,27 @@ def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dist_info_metadata, result",
|
||||
"metadata_attrib, expected",
|
||||
[
|
||||
("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")),
|
||||
("sha256=", LinkHash("sha256", "")),
|
||||
("sha500=aa113592bbe", None),
|
||||
("true", None),
|
||||
("", None),
|
||||
("aa113592bbe", None),
|
||||
("sha256=aa113592bbe", MetadataFile({"sha256": "aa113592bbe"})),
|
||||
("sha256=", MetadataFile({"sha256": ""})),
|
||||
("sha500=aa113592bbe", MetadataFile({})),
|
||||
("true", MetadataFile(None)),
|
||||
(None, None),
|
||||
# TODO: Are these correct?
|
||||
("", MetadataFile(None)),
|
||||
("aa113592bbe", MetadataFile(None)),
|
||||
],
|
||||
)
|
||||
def test_pep658_hash_parsing(
|
||||
dist_info_metadata: str, result: Optional[LinkHash]
|
||||
def test_metadata_file_info_parsing_html(
|
||||
metadata_attrib: str, expected: Optional[MetadataFile]
|
||||
) -> None:
|
||||
assert LinkHash.parse_pep658_hash(dist_info_metadata) == result
|
||||
attribs: Dict[str, Optional[str]] = {
|
||||
"href": "something",
|
||||
"data-dist-info-metadata": metadata_attrib,
|
||||
}
|
||||
page_url = "dummy_for_comes_from"
|
||||
base_url = "https://index.url/simple"
|
||||
link = Link.from_element(attribs, page_url, base_url)
|
||||
assert link is not None and link.metadata_file_data == expected
|
||||
# TODO: Do we need to do something for the JSON data?
|
||||
|
|
Loading…
Reference in a new issue