jpayne@68: from __future__ import annotations jpayne@68: jpayne@68: import email.feedparser jpayne@68: import email.header jpayne@68: import email.message jpayne@68: import email.parser jpayne@68: import email.policy jpayne@68: import pathlib jpayne@68: import sys jpayne@68: import typing jpayne@68: from typing import ( jpayne@68: Any, jpayne@68: Callable, jpayne@68: Generic, jpayne@68: Literal, jpayne@68: TypedDict, jpayne@68: cast, jpayne@68: ) jpayne@68: jpayne@68: from . import licenses, requirements, specifiers, utils jpayne@68: from . import version as version_module jpayne@68: from .licenses import NormalizedLicenseExpression jpayne@68: jpayne@68: T = typing.TypeVar("T") jpayne@68: jpayne@68: jpayne@68: if sys.version_info >= (3, 11): # pragma: no cover jpayne@68: ExceptionGroup = ExceptionGroup jpayne@68: else: # pragma: no cover jpayne@68: jpayne@68: class ExceptionGroup(Exception): jpayne@68: """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. jpayne@68: jpayne@68: If :external:exc:`ExceptionGroup` is already defined by Python itself, jpayne@68: that version is used instead. jpayne@68: """ jpayne@68: jpayne@68: message: str jpayne@68: exceptions: list[Exception] jpayne@68: jpayne@68: def __init__(self, message: str, exceptions: list[Exception]) -> None: jpayne@68: self.message = message jpayne@68: self.exceptions = exceptions jpayne@68: jpayne@68: def __repr__(self) -> str: jpayne@68: return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" jpayne@68: jpayne@68: jpayne@68: class InvalidMetadata(ValueError): jpayne@68: """A metadata field contains invalid data.""" jpayne@68: jpayne@68: field: str jpayne@68: """The name of the field that contains invalid data.""" jpayne@68: jpayne@68: def __init__(self, field: str, message: str) -> None: jpayne@68: self.field = field jpayne@68: super().__init__(message) jpayne@68: jpayne@68: jpayne@68: # The RawMetadata class attempts to make as few assumptions about the underlying jpayne@68: # serialization formats as possible. The idea is that as long as a serialization jpayne@68: # formats offer some very basic primitives in *some* way then we can support jpayne@68: # serializing to and from that format. jpayne@68: class RawMetadata(TypedDict, total=False): jpayne@68: """A dictionary of raw core metadata. jpayne@68: jpayne@68: Each field in core metadata maps to a key of this dictionary (when data is jpayne@68: provided). The key is lower-case and underscores are used instead of dashes jpayne@68: compared to the equivalent core metadata field. Any core metadata field that jpayne@68: can be specified multiple times or can hold multiple values in a single jpayne@68: field have a key with a plural name. See :class:`Metadata` whose attributes jpayne@68: match the keys of this dictionary. jpayne@68: jpayne@68: Core metadata fields that can be specified multiple times are stored as a jpayne@68: list or dict depending on which is appropriate for the field. Any fields jpayne@68: which hold multiple values in a single field are stored as a list. jpayne@68: jpayne@68: """ jpayne@68: jpayne@68: # Metadata 1.0 - PEP 241 jpayne@68: metadata_version: str jpayne@68: name: str jpayne@68: version: str jpayne@68: platforms: list[str] jpayne@68: summary: str jpayne@68: description: str jpayne@68: keywords: list[str] jpayne@68: home_page: str jpayne@68: author: str jpayne@68: author_email: str jpayne@68: license: str jpayne@68: jpayne@68: # Metadata 1.1 - PEP 314 jpayne@68: supported_platforms: list[str] jpayne@68: download_url: str jpayne@68: classifiers: list[str] jpayne@68: requires: list[str] jpayne@68: provides: list[str] jpayne@68: obsoletes: list[str] jpayne@68: jpayne@68: # Metadata 1.2 - PEP 345 jpayne@68: maintainer: str jpayne@68: maintainer_email: str jpayne@68: requires_dist: list[str] jpayne@68: provides_dist: list[str] jpayne@68: obsoletes_dist: list[str] jpayne@68: requires_python: str jpayne@68: requires_external: list[str] jpayne@68: project_urls: dict[str, str] jpayne@68: jpayne@68: # Metadata 2.0 jpayne@68: # PEP 426 attempted to completely revamp the metadata format jpayne@68: # but got stuck without ever being able to build consensus on jpayne@68: # it and ultimately ended up withdrawn. jpayne@68: # jpayne@68: # However, a number of tools had started emitting METADATA with jpayne@68: # `2.0` Metadata-Version, so for historical reasons, this version jpayne@68: # was skipped. jpayne@68: jpayne@68: # Metadata 2.1 - PEP 566 jpayne@68: description_content_type: str jpayne@68: provides_extra: list[str] jpayne@68: jpayne@68: # Metadata 2.2 - PEP 643 jpayne@68: dynamic: list[str] jpayne@68: jpayne@68: # Metadata 2.3 - PEP 685 jpayne@68: # No new fields were added in PEP 685, just some edge case were jpayne@68: # tightened up to provide better interoptability. jpayne@68: jpayne@68: # Metadata 2.4 - PEP 639 jpayne@68: license_expression: str jpayne@68: license_files: list[str] jpayne@68: jpayne@68: jpayne@68: _STRING_FIELDS = { jpayne@68: "author", jpayne@68: "author_email", jpayne@68: "description", jpayne@68: "description_content_type", jpayne@68: "download_url", jpayne@68: "home_page", jpayne@68: "license", jpayne@68: "license_expression", jpayne@68: "maintainer", jpayne@68: "maintainer_email", jpayne@68: "metadata_version", jpayne@68: "name", jpayne@68: "requires_python", jpayne@68: "summary", jpayne@68: "version", jpayne@68: } jpayne@68: jpayne@68: _LIST_FIELDS = { jpayne@68: "classifiers", jpayne@68: "dynamic", jpayne@68: "license_files", jpayne@68: "obsoletes", jpayne@68: "obsoletes_dist", jpayne@68: "platforms", jpayne@68: "provides", jpayne@68: "provides_dist", jpayne@68: "provides_extra", jpayne@68: "requires", jpayne@68: "requires_dist", jpayne@68: "requires_external", jpayne@68: "supported_platforms", jpayne@68: } jpayne@68: jpayne@68: _DICT_FIELDS = { jpayne@68: "project_urls", jpayne@68: } jpayne@68: jpayne@68: jpayne@68: def _parse_keywords(data: str) -> list[str]: jpayne@68: """Split a string of comma-separated keywords into a list of keywords.""" jpayne@68: return [k.strip() for k in data.split(",")] jpayne@68: jpayne@68: jpayne@68: def _parse_project_urls(data: list[str]) -> dict[str, str]: jpayne@68: """Parse a list of label/URL string pairings separated by a comma.""" jpayne@68: urls = {} jpayne@68: for pair in data: jpayne@68: # Our logic is slightly tricky here as we want to try and do jpayne@68: # *something* reasonable with malformed data. jpayne@68: # jpayne@68: # The main thing that we have to worry about, is data that does jpayne@68: # not have a ',' at all to split the label from the Value. There jpayne@68: # isn't a singular right answer here, and we will fail validation jpayne@68: # later on (if the caller is validating) so it doesn't *really* jpayne@68: # matter, but since the missing value has to be an empty str jpayne@68: # and our return value is dict[str, str], if we let the key jpayne@68: # be the missing value, then they'd have multiple '' values that jpayne@68: # overwrite each other in a accumulating dict. jpayne@68: # jpayne@68: # The other potentional issue is that it's possible to have the jpayne@68: # same label multiple times in the metadata, with no solid "right" jpayne@68: # answer with what to do in that case. As such, we'll do the only jpayne@68: # thing we can, which is treat the field as unparseable and add it jpayne@68: # to our list of unparsed fields. jpayne@68: parts = [p.strip() for p in pair.split(",", 1)] jpayne@68: parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items jpayne@68: jpayne@68: # TODO: The spec doesn't say anything about if the keys should be jpayne@68: # considered case sensitive or not... logically they should jpayne@68: # be case-preserving and case-insensitive, but doing that jpayne@68: # would open up more cases where we might have duplicate jpayne@68: # entries. jpayne@68: label, url = parts jpayne@68: if label in urls: jpayne@68: # The label already exists in our set of urls, so this field jpayne@68: # is unparseable, and we can just add the whole thing to our jpayne@68: # unparseable data and stop processing it. jpayne@68: raise KeyError("duplicate labels in project urls") jpayne@68: urls[label] = url jpayne@68: jpayne@68: return urls jpayne@68: jpayne@68: jpayne@68: def _get_payload(msg: email.message.Message, source: bytes | str) -> str: jpayne@68: """Get the body of the message.""" jpayne@68: # If our source is a str, then our caller has managed encodings for us, jpayne@68: # and we don't need to deal with it. jpayne@68: if isinstance(source, str): jpayne@68: payload = msg.get_payload() jpayne@68: assert isinstance(payload, str) jpayne@68: return payload jpayne@68: # If our source is a bytes, then we're managing the encoding and we need jpayne@68: # to deal with it. jpayne@68: else: jpayne@68: bpayload = msg.get_payload(decode=True) jpayne@68: assert isinstance(bpayload, bytes) jpayne@68: try: jpayne@68: return bpayload.decode("utf8", "strict") jpayne@68: except UnicodeDecodeError as exc: jpayne@68: raise ValueError("payload in an invalid encoding") from exc jpayne@68: jpayne@68: jpayne@68: # The various parse_FORMAT functions here are intended to be as lenient as jpayne@68: # possible in their parsing, while still returning a correctly typed jpayne@68: # RawMetadata. jpayne@68: # jpayne@68: # To aid in this, we also generally want to do as little touching of the jpayne@68: # data as possible, except where there are possibly some historic holdovers jpayne@68: # that make valid data awkward to work with. jpayne@68: # jpayne@68: # While this is a lower level, intermediate format than our ``Metadata`` jpayne@68: # class, some light touch ups can make a massive difference in usability. jpayne@68: jpayne@68: # Map METADATA fields to RawMetadata. jpayne@68: _EMAIL_TO_RAW_MAPPING = { jpayne@68: "author": "author", jpayne@68: "author-email": "author_email", jpayne@68: "classifier": "classifiers", jpayne@68: "description": "description", jpayne@68: "description-content-type": "description_content_type", jpayne@68: "download-url": "download_url", jpayne@68: "dynamic": "dynamic", jpayne@68: "home-page": "home_page", jpayne@68: "keywords": "keywords", jpayne@68: "license": "license", jpayne@68: "license-expression": "license_expression", jpayne@68: "license-file": "license_files", jpayne@68: "maintainer": "maintainer", jpayne@68: "maintainer-email": "maintainer_email", jpayne@68: "metadata-version": "metadata_version", jpayne@68: "name": "name", jpayne@68: "obsoletes": "obsoletes", jpayne@68: "obsoletes-dist": "obsoletes_dist", jpayne@68: "platform": "platforms", jpayne@68: "project-url": "project_urls", jpayne@68: "provides": "provides", jpayne@68: "provides-dist": "provides_dist", jpayne@68: "provides-extra": "provides_extra", jpayne@68: "requires": "requires", jpayne@68: "requires-dist": "requires_dist", jpayne@68: "requires-external": "requires_external", jpayne@68: "requires-python": "requires_python", jpayne@68: "summary": "summary", jpayne@68: "supported-platform": "supported_platforms", jpayne@68: "version": "version", jpayne@68: } jpayne@68: _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} jpayne@68: jpayne@68: jpayne@68: def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]: jpayne@68: """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). jpayne@68: jpayne@68: This function returns a two-item tuple of dicts. The first dict is of jpayne@68: recognized fields from the core metadata specification. Fields that can be jpayne@68: parsed and translated into Python's built-in types are converted jpayne@68: appropriately. All other fields are left as-is. Fields that are allowed to jpayne@68: appear multiple times are stored as lists. jpayne@68: jpayne@68: The second dict contains all other fields from the metadata. This includes jpayne@68: any unrecognized fields. It also includes any fields which are expected to jpayne@68: be parsed into a built-in type but were not formatted appropriately. Finally, jpayne@68: any fields that are expected to appear only once but are repeated are jpayne@68: included in this dict. jpayne@68: jpayne@68: """ jpayne@68: raw: dict[str, str | list[str] | dict[str, str]] = {} jpayne@68: unparsed: dict[str, list[str]] = {} jpayne@68: jpayne@68: if isinstance(data, str): jpayne@68: parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) jpayne@68: else: jpayne@68: parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) jpayne@68: jpayne@68: # We have to wrap parsed.keys() in a set, because in the case of multiple jpayne@68: # values for a key (a list), the key will appear multiple times in the jpayne@68: # list of keys, but we're avoiding that by using get_all(). jpayne@68: for name in frozenset(parsed.keys()): jpayne@68: # Header names in RFC are case insensitive, so we'll normalize to all jpayne@68: # lower case to make comparisons easier. jpayne@68: name = name.lower() jpayne@68: jpayne@68: # We use get_all() here, even for fields that aren't multiple use, jpayne@68: # because otherwise someone could have e.g. two Name fields, and we jpayne@68: # would just silently ignore it rather than doing something about it. jpayne@68: headers = parsed.get_all(name) or [] jpayne@68: jpayne@68: # The way the email module works when parsing bytes is that it jpayne@68: # unconditionally decodes the bytes as ascii using the surrogateescape jpayne@68: # handler. When you pull that data back out (such as with get_all() ), jpayne@68: # it looks to see if the str has any surrogate escapes, and if it does jpayne@68: # it wraps it in a Header object instead of returning the string. jpayne@68: # jpayne@68: # As such, we'll look for those Header objects, and fix up the encoding. jpayne@68: value = [] jpayne@68: # Flag if we have run into any issues processing the headers, thus jpayne@68: # signalling that the data belongs in 'unparsed'. jpayne@68: valid_encoding = True jpayne@68: for h in headers: jpayne@68: # It's unclear if this can return more types than just a Header or jpayne@68: # a str, so we'll just assert here to make sure. jpayne@68: assert isinstance(h, (email.header.Header, str)) jpayne@68: jpayne@68: # If it's a header object, we need to do our little dance to get jpayne@68: # the real data out of it. In cases where there is invalid data jpayne@68: # we're going to end up with mojibake, but there's no obvious, good jpayne@68: # way around that without reimplementing parts of the Header object jpayne@68: # ourselves. jpayne@68: # jpayne@68: # That should be fine since, if mojibacked happens, this key is jpayne@68: # going into the unparsed dict anyways. jpayne@68: if isinstance(h, email.header.Header): jpayne@68: # The Header object stores it's data as chunks, and each chunk jpayne@68: # can be independently encoded, so we'll need to check each jpayne@68: # of them. jpayne@68: chunks: list[tuple[bytes, str | None]] = [] jpayne@68: for bin, encoding in email.header.decode_header(h): jpayne@68: try: jpayne@68: bin.decode("utf8", "strict") jpayne@68: except UnicodeDecodeError: jpayne@68: # Enable mojibake. jpayne@68: encoding = "latin1" jpayne@68: valid_encoding = False jpayne@68: else: jpayne@68: encoding = "utf8" jpayne@68: chunks.append((bin, encoding)) jpayne@68: jpayne@68: # Turn our chunks back into a Header object, then let that jpayne@68: # Header object do the right thing to turn them into a jpayne@68: # string for us. jpayne@68: value.append(str(email.header.make_header(chunks))) jpayne@68: # This is already a string, so just add it. jpayne@68: else: jpayne@68: value.append(h) jpayne@68: jpayne@68: # We've processed all of our values to get them into a list of str, jpayne@68: # but we may have mojibake data, in which case this is an unparsed jpayne@68: # field. jpayne@68: if not valid_encoding: jpayne@68: unparsed[name] = value jpayne@68: continue jpayne@68: jpayne@68: raw_name = _EMAIL_TO_RAW_MAPPING.get(name) jpayne@68: if raw_name is None: jpayne@68: # This is a bit of a weird situation, we've encountered a key that jpayne@68: # we don't know what it means, so we don't know whether it's meant jpayne@68: # to be a list or not. jpayne@68: # jpayne@68: # Since we can't really tell one way or another, we'll just leave it jpayne@68: # as a list, even though it may be a single item list, because that's jpayne@68: # what makes the most sense for email headers. jpayne@68: unparsed[name] = value jpayne@68: continue jpayne@68: jpayne@68: # If this is one of our string fields, then we'll check to see if our jpayne@68: # value is a list of a single item. If it is then we'll assume that jpayne@68: # it was emitted as a single string, and unwrap the str from inside jpayne@68: # the list. jpayne@68: # jpayne@68: # If it's any other kind of data, then we haven't the faintest clue jpayne@68: # what we should parse it as, and we have to just add it to our list jpayne@68: # of unparsed stuff. jpayne@68: if raw_name in _STRING_FIELDS and len(value) == 1: jpayne@68: raw[raw_name] = value[0] jpayne@68: # If this is one of our list of string fields, then we can just assign jpayne@68: # the value, since email *only* has strings, and our get_all() call jpayne@68: # above ensures that this is a list. jpayne@68: elif raw_name in _LIST_FIELDS: jpayne@68: raw[raw_name] = value jpayne@68: # Special Case: Keywords jpayne@68: # The keywords field is implemented in the metadata spec as a str, jpayne@68: # but it conceptually is a list of strings, and is serialized using jpayne@68: # ", ".join(keywords), so we'll do some light data massaging to turn jpayne@68: # this into what it logically is. jpayne@68: elif raw_name == "keywords" and len(value) == 1: jpayne@68: raw[raw_name] = _parse_keywords(value[0]) jpayne@68: # Special Case: Project-URL jpayne@68: # The project urls is implemented in the metadata spec as a list of jpayne@68: # specially-formatted strings that represent a key and a value, which jpayne@68: # is fundamentally a mapping, however the email format doesn't support jpayne@68: # mappings in a sane way, so it was crammed into a list of strings jpayne@68: # instead. jpayne@68: # jpayne@68: # We will do a little light data massaging to turn this into a map as jpayne@68: # it logically should be. jpayne@68: elif raw_name == "project_urls": jpayne@68: try: jpayne@68: raw[raw_name] = _parse_project_urls(value) jpayne@68: except KeyError: jpayne@68: unparsed[name] = value jpayne@68: # Nothing that we've done has managed to parse this, so it'll just jpayne@68: # throw it in our unparseable data and move on. jpayne@68: else: jpayne@68: unparsed[name] = value jpayne@68: jpayne@68: # We need to support getting the Description from the message payload in jpayne@68: # addition to getting it from the the headers. This does mean, though, there jpayne@68: # is the possibility of it being set both ways, in which case we put both jpayne@68: # in 'unparsed' since we don't know which is right. jpayne@68: try: jpayne@68: payload = _get_payload(parsed, data) jpayne@68: except ValueError: jpayne@68: unparsed.setdefault("description", []).append( jpayne@68: parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload] jpayne@68: ) jpayne@68: else: jpayne@68: if payload: jpayne@68: # Check to see if we've already got a description, if so then both jpayne@68: # it, and this body move to unparseable. jpayne@68: if "description" in raw: jpayne@68: description_header = cast(str, raw.pop("description")) jpayne@68: unparsed.setdefault("description", []).extend( jpayne@68: [description_header, payload] jpayne@68: ) jpayne@68: elif "description" in unparsed: jpayne@68: unparsed["description"].append(payload) jpayne@68: else: jpayne@68: raw["description"] = payload jpayne@68: jpayne@68: # We need to cast our `raw` to a metadata, because a TypedDict only support jpayne@68: # literal key names, but we're computing our key names on purpose, but the jpayne@68: # way this function is implemented, our `TypedDict` can only have valid key jpayne@68: # names. jpayne@68: return cast(RawMetadata, raw), unparsed jpayne@68: jpayne@68: jpayne@68: _NOT_FOUND = object() jpayne@68: jpayne@68: jpayne@68: # Keep the two values in sync. jpayne@68: _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"] jpayne@68: _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"] jpayne@68: jpayne@68: _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) jpayne@68: jpayne@68: jpayne@68: class _Validator(Generic[T]): jpayne@68: """Validate a metadata field. jpayne@68: jpayne@68: All _process_*() methods correspond to a core metadata field. The method is jpayne@68: called with the field's raw value. If the raw value is valid it is returned jpayne@68: in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). jpayne@68: If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause jpayne@68: as appropriate). jpayne@68: """ jpayne@68: jpayne@68: name: str jpayne@68: raw_name: str jpayne@68: added: _MetadataVersion jpayne@68: jpayne@68: def __init__( jpayne@68: self, jpayne@68: *, jpayne@68: added: _MetadataVersion = "1.0", jpayne@68: ) -> None: jpayne@68: self.added = added jpayne@68: jpayne@68: def __set_name__(self, _owner: Metadata, name: str) -> None: jpayne@68: self.name = name jpayne@68: self.raw_name = _RAW_TO_EMAIL_MAPPING[name] jpayne@68: jpayne@68: def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T: jpayne@68: # With Python 3.8, the caching can be replaced with functools.cached_property(). jpayne@68: # No need to check the cache as attribute lookup will resolve into the jpayne@68: # instance's __dict__ before __get__ is called. jpayne@68: cache = instance.__dict__ jpayne@68: value = instance._raw.get(self.name) jpayne@68: jpayne@68: # To make the _process_* methods easier, we'll check if the value is None jpayne@68: # and if this field is NOT a required attribute, and if both of those jpayne@68: # things are true, we'll skip the the converter. This will mean that the jpayne@68: # converters never have to deal with the None union. jpayne@68: if self.name in _REQUIRED_ATTRS or value is not None: jpayne@68: try: jpayne@68: converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") jpayne@68: except AttributeError: jpayne@68: pass jpayne@68: else: jpayne@68: value = converter(value) jpayne@68: jpayne@68: cache[self.name] = value jpayne@68: try: jpayne@68: del instance._raw[self.name] # type: ignore[misc] jpayne@68: except KeyError: jpayne@68: pass jpayne@68: jpayne@68: return cast(T, value) jpayne@68: jpayne@68: def _invalid_metadata( jpayne@68: self, msg: str, cause: Exception | None = None jpayne@68: ) -> InvalidMetadata: jpayne@68: exc = InvalidMetadata( jpayne@68: self.raw_name, msg.format_map({"field": repr(self.raw_name)}) jpayne@68: ) jpayne@68: exc.__cause__ = cause jpayne@68: return exc jpayne@68: jpayne@68: def _process_metadata_version(self, value: str) -> _MetadataVersion: jpayne@68: # Implicitly makes Metadata-Version required. jpayne@68: if value not in _VALID_METADATA_VERSIONS: jpayne@68: raise self._invalid_metadata(f"{value!r} is not a valid metadata version") jpayne@68: return cast(_MetadataVersion, value) jpayne@68: jpayne@68: def _process_name(self, value: str) -> str: jpayne@68: if not value: jpayne@68: raise self._invalid_metadata("{field} is a required field") jpayne@68: # Validate the name as a side-effect. jpayne@68: try: jpayne@68: utils.canonicalize_name(value, validate=True) jpayne@68: except utils.InvalidName as exc: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{value!r} is invalid for {{field}}", cause=exc jpayne@68: ) from exc jpayne@68: else: jpayne@68: return value jpayne@68: jpayne@68: def _process_version(self, value: str) -> version_module.Version: jpayne@68: if not value: jpayne@68: raise self._invalid_metadata("{field} is a required field") jpayne@68: try: jpayne@68: return version_module.parse(value) jpayne@68: except version_module.InvalidVersion as exc: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{value!r} is invalid for {{field}}", cause=exc jpayne@68: ) from exc jpayne@68: jpayne@68: def _process_summary(self, value: str) -> str: jpayne@68: """Check the field contains no newlines.""" jpayne@68: if "\n" in value: jpayne@68: raise self._invalid_metadata("{field} must be a single line") jpayne@68: return value jpayne@68: jpayne@68: def _process_description_content_type(self, value: str) -> str: jpayne@68: content_types = {"text/plain", "text/x-rst", "text/markdown"} jpayne@68: message = email.message.EmailMessage() jpayne@68: message["content-type"] = value jpayne@68: jpayne@68: content_type, parameters = ( jpayne@68: # Defaults to `text/plain` if parsing failed. jpayne@68: message.get_content_type().lower(), jpayne@68: message["content-type"].params, jpayne@68: ) jpayne@68: # Check if content-type is valid or defaulted to `text/plain` and thus was jpayne@68: # not parseable. jpayne@68: if content_type not in content_types or content_type not in value.lower(): jpayne@68: raise self._invalid_metadata( jpayne@68: f"{{field}} must be one of {list(content_types)}, not {value!r}" jpayne@68: ) jpayne@68: jpayne@68: charset = parameters.get("charset", "UTF-8") jpayne@68: if charset != "UTF-8": jpayne@68: raise self._invalid_metadata( jpayne@68: f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" jpayne@68: ) jpayne@68: jpayne@68: markdown_variants = {"GFM", "CommonMark"} jpayne@68: variant = parameters.get("variant", "GFM") # Use an acceptable default. jpayne@68: if content_type == "text/markdown" and variant not in markdown_variants: jpayne@68: raise self._invalid_metadata( jpayne@68: f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " jpayne@68: f"not {variant!r}", jpayne@68: ) jpayne@68: return value jpayne@68: jpayne@68: def _process_dynamic(self, value: list[str]) -> list[str]: jpayne@68: for dynamic_field in map(str.lower, value): jpayne@68: if dynamic_field in {"name", "version", "metadata-version"}: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{dynamic_field!r} is not allowed as a dynamic field" jpayne@68: ) jpayne@68: elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{dynamic_field!r} is not a valid dynamic field" jpayne@68: ) jpayne@68: return list(map(str.lower, value)) jpayne@68: jpayne@68: def _process_provides_extra( jpayne@68: self, jpayne@68: value: list[str], jpayne@68: ) -> list[utils.NormalizedName]: jpayne@68: normalized_names = [] jpayne@68: try: jpayne@68: for name in value: jpayne@68: normalized_names.append(utils.canonicalize_name(name, validate=True)) jpayne@68: except utils.InvalidName as exc: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{name!r} is invalid for {{field}}", cause=exc jpayne@68: ) from exc jpayne@68: else: jpayne@68: return normalized_names jpayne@68: jpayne@68: def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: jpayne@68: try: jpayne@68: return specifiers.SpecifierSet(value) jpayne@68: except specifiers.InvalidSpecifier as exc: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{value!r} is invalid for {{field}}", cause=exc jpayne@68: ) from exc jpayne@68: jpayne@68: def _process_requires_dist( jpayne@68: self, jpayne@68: value: list[str], jpayne@68: ) -> list[requirements.Requirement]: jpayne@68: reqs = [] jpayne@68: try: jpayne@68: for req in value: jpayne@68: reqs.append(requirements.Requirement(req)) jpayne@68: except requirements.InvalidRequirement as exc: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{req!r} is invalid for {{field}}", cause=exc jpayne@68: ) from exc jpayne@68: else: jpayne@68: return reqs jpayne@68: jpayne@68: def _process_license_expression( jpayne@68: self, value: str jpayne@68: ) -> NormalizedLicenseExpression | None: jpayne@68: try: jpayne@68: return licenses.canonicalize_license_expression(value) jpayne@68: except ValueError as exc: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{value!r} is invalid for {{field}}", cause=exc jpayne@68: ) from exc jpayne@68: jpayne@68: def _process_license_files(self, value: list[str]) -> list[str]: jpayne@68: paths = [] jpayne@68: for path in value: jpayne@68: if ".." in path: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{path!r} is invalid for {{field}}, " jpayne@68: "parent directory indicators are not allowed" jpayne@68: ) jpayne@68: if "*" in path: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{path!r} is invalid for {{field}}, paths must be resolved" jpayne@68: ) jpayne@68: if ( jpayne@68: pathlib.PurePosixPath(path).is_absolute() jpayne@68: or pathlib.PureWindowsPath(path).is_absolute() jpayne@68: ): jpayne@68: raise self._invalid_metadata( jpayne@68: f"{path!r} is invalid for {{field}}, paths must be relative" jpayne@68: ) jpayne@68: if pathlib.PureWindowsPath(path).as_posix() != path: jpayne@68: raise self._invalid_metadata( jpayne@68: f"{path!r} is invalid for {{field}}, " jpayne@68: "paths must use '/' delimiter" jpayne@68: ) jpayne@68: paths.append(path) jpayne@68: return paths jpayne@68: jpayne@68: jpayne@68: class Metadata: jpayne@68: """Representation of distribution metadata. jpayne@68: jpayne@68: Compared to :class:`RawMetadata`, this class provides objects representing jpayne@68: metadata fields instead of only using built-in types. Any invalid metadata jpayne@68: will cause :exc:`InvalidMetadata` to be raised (with a jpayne@68: :py:attr:`~BaseException.__cause__` attribute as appropriate). jpayne@68: """ jpayne@68: jpayne@68: _raw: RawMetadata jpayne@68: jpayne@68: @classmethod jpayne@68: def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata: jpayne@68: """Create an instance from :class:`RawMetadata`. jpayne@68: jpayne@68: If *validate* is true, all metadata will be validated. All exceptions jpayne@68: related to validation will be gathered and raised as an :class:`ExceptionGroup`. jpayne@68: """ jpayne@68: ins = cls() jpayne@68: ins._raw = data.copy() # Mutations occur due to caching enriched values. jpayne@68: jpayne@68: if validate: jpayne@68: exceptions: list[Exception] = [] jpayne@68: try: jpayne@68: metadata_version = ins.metadata_version jpayne@68: metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) jpayne@68: except InvalidMetadata as metadata_version_exc: jpayne@68: exceptions.append(metadata_version_exc) jpayne@68: metadata_version = None jpayne@68: jpayne@68: # Make sure to check for the fields that are present, the required jpayne@68: # fields (so their absence can be reported). jpayne@68: fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS jpayne@68: # Remove fields that have already been checked. jpayne@68: fields_to_check -= {"metadata_version"} jpayne@68: jpayne@68: for key in fields_to_check: jpayne@68: try: jpayne@68: if metadata_version: jpayne@68: # Can't use getattr() as that triggers descriptor protocol which jpayne@68: # will fail due to no value for the instance argument. jpayne@68: try: jpayne@68: field_metadata_version = cls.__dict__[key].added jpayne@68: except KeyError: jpayne@68: exc = InvalidMetadata(key, f"unrecognized field: {key!r}") jpayne@68: exceptions.append(exc) jpayne@68: continue jpayne@68: field_age = _VALID_METADATA_VERSIONS.index( jpayne@68: field_metadata_version jpayne@68: ) jpayne@68: if field_age > metadata_age: jpayne@68: field = _RAW_TO_EMAIL_MAPPING[key] jpayne@68: exc = InvalidMetadata( jpayne@68: field, jpayne@68: f"{field} introduced in metadata version " jpayne@68: f"{field_metadata_version}, not {metadata_version}", jpayne@68: ) jpayne@68: exceptions.append(exc) jpayne@68: continue jpayne@68: getattr(ins, key) jpayne@68: except InvalidMetadata as exc: jpayne@68: exceptions.append(exc) jpayne@68: jpayne@68: if exceptions: jpayne@68: raise ExceptionGroup("invalid metadata", exceptions) jpayne@68: jpayne@68: return ins jpayne@68: jpayne@68: @classmethod jpayne@68: def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata: jpayne@68: """Parse metadata from email headers. jpayne@68: jpayne@68: If *validate* is true, the metadata will be validated. All exceptions jpayne@68: related to validation will be gathered and raised as an :class:`ExceptionGroup`. jpayne@68: """ jpayne@68: raw, unparsed = parse_email(data) jpayne@68: jpayne@68: if validate: jpayne@68: exceptions: list[Exception] = [] jpayne@68: for unparsed_key in unparsed: jpayne@68: if unparsed_key in _EMAIL_TO_RAW_MAPPING: jpayne@68: message = f"{unparsed_key!r} has invalid data" jpayne@68: else: jpayne@68: message = f"unrecognized field: {unparsed_key!r}" jpayne@68: exceptions.append(InvalidMetadata(unparsed_key, message)) jpayne@68: jpayne@68: if exceptions: jpayne@68: raise ExceptionGroup("unparsed", exceptions) jpayne@68: jpayne@68: try: jpayne@68: return cls.from_raw(raw, validate=validate) jpayne@68: except ExceptionGroup as exc_group: jpayne@68: raise ExceptionGroup( jpayne@68: "invalid or unparsed metadata", exc_group.exceptions jpayne@68: ) from None jpayne@68: jpayne@68: metadata_version: _Validator[_MetadataVersion] = _Validator() jpayne@68: """:external:ref:`core-metadata-metadata-version` jpayne@68: (required; validated to be a valid metadata version)""" jpayne@68: # `name` is not normalized/typed to NormalizedName so as to provide access to jpayne@68: # the original/raw name. jpayne@68: name: _Validator[str] = _Validator() jpayne@68: """:external:ref:`core-metadata-name` jpayne@68: (required; validated using :func:`~packaging.utils.canonicalize_name` and its jpayne@68: *validate* parameter)""" jpayne@68: version: _Validator[version_module.Version] = _Validator() jpayne@68: """:external:ref:`core-metadata-version` (required)""" jpayne@68: dynamic: _Validator[list[str] | None] = _Validator( jpayne@68: added="2.2", jpayne@68: ) jpayne@68: """:external:ref:`core-metadata-dynamic` jpayne@68: (validated against core metadata field names and lowercased)""" jpayne@68: platforms: _Validator[list[str] | None] = _Validator() jpayne@68: """:external:ref:`core-metadata-platform`""" jpayne@68: supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1") jpayne@68: """:external:ref:`core-metadata-supported-platform`""" jpayne@68: summary: _Validator[str | None] = _Validator() jpayne@68: """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" jpayne@68: description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body jpayne@68: """:external:ref:`core-metadata-description`""" jpayne@68: description_content_type: _Validator[str | None] = _Validator(added="2.1") jpayne@68: """:external:ref:`core-metadata-description-content-type` (validated)""" jpayne@68: keywords: _Validator[list[str] | None] = _Validator() jpayne@68: """:external:ref:`core-metadata-keywords`""" jpayne@68: home_page: _Validator[str | None] = _Validator() jpayne@68: """:external:ref:`core-metadata-home-page`""" jpayne@68: download_url: _Validator[str | None] = _Validator(added="1.1") jpayne@68: """:external:ref:`core-metadata-download-url`""" jpayne@68: author: _Validator[str | None] = _Validator() jpayne@68: """:external:ref:`core-metadata-author`""" jpayne@68: author_email: _Validator[str | None] = _Validator() jpayne@68: """:external:ref:`core-metadata-author-email`""" jpayne@68: maintainer: _Validator[str | None] = _Validator(added="1.2") jpayne@68: """:external:ref:`core-metadata-maintainer`""" jpayne@68: maintainer_email: _Validator[str | None] = _Validator(added="1.2") jpayne@68: """:external:ref:`core-metadata-maintainer-email`""" jpayne@68: license: _Validator[str | None] = _Validator() jpayne@68: """:external:ref:`core-metadata-license`""" jpayne@68: license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator( jpayne@68: added="2.4" jpayne@68: ) jpayne@68: """:external:ref:`core-metadata-license-expression`""" jpayne@68: license_files: _Validator[list[str] | None] = _Validator(added="2.4") jpayne@68: """:external:ref:`core-metadata-license-file`""" jpayne@68: classifiers: _Validator[list[str] | None] = _Validator(added="1.1") jpayne@68: """:external:ref:`core-metadata-classifier`""" jpayne@68: requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator( jpayne@68: added="1.2" jpayne@68: ) jpayne@68: """:external:ref:`core-metadata-requires-dist`""" jpayne@68: requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator( jpayne@68: added="1.2" jpayne@68: ) jpayne@68: """:external:ref:`core-metadata-requires-python`""" jpayne@68: # Because `Requires-External` allows for non-PEP 440 version specifiers, we jpayne@68: # don't do any processing on the values. jpayne@68: requires_external: _Validator[list[str] | None] = _Validator(added="1.2") jpayne@68: """:external:ref:`core-metadata-requires-external`""" jpayne@68: project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2") jpayne@68: """:external:ref:`core-metadata-project-url`""" jpayne@68: # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation jpayne@68: # regardless of metadata version. jpayne@68: provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator( jpayne@68: added="2.1", jpayne@68: ) jpayne@68: """:external:ref:`core-metadata-provides-extra`""" jpayne@68: provides_dist: _Validator[list[str] | None] = _Validator(added="1.2") jpayne@68: """:external:ref:`core-metadata-provides-dist`""" jpayne@68: obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2") jpayne@68: """:external:ref:`core-metadata-obsoletes-dist`""" jpayne@68: requires: _Validator[list[str] | None] = _Validator(added="1.1") jpayne@68: """``Requires`` (deprecated)""" jpayne@68: provides: _Validator[list[str] | None] = _Validator(added="1.1") jpayne@68: """``Provides`` (deprecated)""" jpayne@68: obsoletes: _Validator[list[str] | None] = _Validator(added="1.1") jpayne@68: """``Obsoletes`` (deprecated)"""