annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/packaging/metadata.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 from __future__ import annotations
jpayne@68 2
jpayne@68 3 import email.feedparser
jpayne@68 4 import email.header
jpayne@68 5 import email.message
jpayne@68 6 import email.parser
jpayne@68 7 import email.policy
jpayne@68 8 import pathlib
jpayne@68 9 import sys
jpayne@68 10 import typing
jpayne@68 11 from typing import (
jpayne@68 12 Any,
jpayne@68 13 Callable,
jpayne@68 14 Generic,
jpayne@68 15 Literal,
jpayne@68 16 TypedDict,
jpayne@68 17 cast,
jpayne@68 18 )
jpayne@68 19
jpayne@68 20 from . import licenses, requirements, specifiers, utils
jpayne@68 21 from . import version as version_module
jpayne@68 22 from .licenses import NormalizedLicenseExpression
jpayne@68 23
jpayne@68 24 T = typing.TypeVar("T")
jpayne@68 25
jpayne@68 26
jpayne@68 27 if sys.version_info >= (3, 11): # pragma: no cover
jpayne@68 28 ExceptionGroup = ExceptionGroup
jpayne@68 29 else: # pragma: no cover
jpayne@68 30
jpayne@68 31 class ExceptionGroup(Exception):
jpayne@68 32 """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
jpayne@68 33
jpayne@68 34 If :external:exc:`ExceptionGroup` is already defined by Python itself,
jpayne@68 35 that version is used instead.
jpayne@68 36 """
jpayne@68 37
jpayne@68 38 message: str
jpayne@68 39 exceptions: list[Exception]
jpayne@68 40
jpayne@68 41 def __init__(self, message: str, exceptions: list[Exception]) -> None:
jpayne@68 42 self.message = message
jpayne@68 43 self.exceptions = exceptions
jpayne@68 44
jpayne@68 45 def __repr__(self) -> str:
jpayne@68 46 return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
jpayne@68 47
jpayne@68 48
jpayne@68 49 class InvalidMetadata(ValueError):
jpayne@68 50 """A metadata field contains invalid data."""
jpayne@68 51
jpayne@68 52 field: str
jpayne@68 53 """The name of the field that contains invalid data."""
jpayne@68 54
jpayne@68 55 def __init__(self, field: str, message: str) -> None:
jpayne@68 56 self.field = field
jpayne@68 57 super().__init__(message)
jpayne@68 58
jpayne@68 59
jpayne@68 60 # The RawMetadata class attempts to make as few assumptions about the underlying
jpayne@68 61 # serialization formats as possible. The idea is that as long as a serialization
jpayne@68 62 # formats offer some very basic primitives in *some* way then we can support
jpayne@68 63 # serializing to and from that format.
jpayne@68 64 class RawMetadata(TypedDict, total=False):
jpayne@68 65 """A dictionary of raw core metadata.
jpayne@68 66
jpayne@68 67 Each field in core metadata maps to a key of this dictionary (when data is
jpayne@68 68 provided). The key is lower-case and underscores are used instead of dashes
jpayne@68 69 compared to the equivalent core metadata field. Any core metadata field that
jpayne@68 70 can be specified multiple times or can hold multiple values in a single
jpayne@68 71 field have a key with a plural name. See :class:`Metadata` whose attributes
jpayne@68 72 match the keys of this dictionary.
jpayne@68 73
jpayne@68 74 Core metadata fields that can be specified multiple times are stored as a
jpayne@68 75 list or dict depending on which is appropriate for the field. Any fields
jpayne@68 76 which hold multiple values in a single field are stored as a list.
jpayne@68 77
jpayne@68 78 """
jpayne@68 79
jpayne@68 80 # Metadata 1.0 - PEP 241
jpayne@68 81 metadata_version: str
jpayne@68 82 name: str
jpayne@68 83 version: str
jpayne@68 84 platforms: list[str]
jpayne@68 85 summary: str
jpayne@68 86 description: str
jpayne@68 87 keywords: list[str]
jpayne@68 88 home_page: str
jpayne@68 89 author: str
jpayne@68 90 author_email: str
jpayne@68 91 license: str
jpayne@68 92
jpayne@68 93 # Metadata 1.1 - PEP 314
jpayne@68 94 supported_platforms: list[str]
jpayne@68 95 download_url: str
jpayne@68 96 classifiers: list[str]
jpayne@68 97 requires: list[str]
jpayne@68 98 provides: list[str]
jpayne@68 99 obsoletes: list[str]
jpayne@68 100
jpayne@68 101 # Metadata 1.2 - PEP 345
jpayne@68 102 maintainer: str
jpayne@68 103 maintainer_email: str
jpayne@68 104 requires_dist: list[str]
jpayne@68 105 provides_dist: list[str]
jpayne@68 106 obsoletes_dist: list[str]
jpayne@68 107 requires_python: str
jpayne@68 108 requires_external: list[str]
jpayne@68 109 project_urls: dict[str, str]
jpayne@68 110
jpayne@68 111 # Metadata 2.0
jpayne@68 112 # PEP 426 attempted to completely revamp the metadata format
jpayne@68 113 # but got stuck without ever being able to build consensus on
jpayne@68 114 # it and ultimately ended up withdrawn.
jpayne@68 115 #
jpayne@68 116 # However, a number of tools had started emitting METADATA with
jpayne@68 117 # `2.0` Metadata-Version, so for historical reasons, this version
jpayne@68 118 # was skipped.
jpayne@68 119
jpayne@68 120 # Metadata 2.1 - PEP 566
jpayne@68 121 description_content_type: str
jpayne@68 122 provides_extra: list[str]
jpayne@68 123
jpayne@68 124 # Metadata 2.2 - PEP 643
jpayne@68 125 dynamic: list[str]
jpayne@68 126
jpayne@68 127 # Metadata 2.3 - PEP 685
jpayne@68 128 # No new fields were added in PEP 685, just some edge case were
jpayne@68 129 # tightened up to provide better interoptability.
jpayne@68 130
jpayne@68 131 # Metadata 2.4 - PEP 639
jpayne@68 132 license_expression: str
jpayne@68 133 license_files: list[str]
jpayne@68 134
jpayne@68 135
jpayne@68 136 _STRING_FIELDS = {
jpayne@68 137 "author",
jpayne@68 138 "author_email",
jpayne@68 139 "description",
jpayne@68 140 "description_content_type",
jpayne@68 141 "download_url",
jpayne@68 142 "home_page",
jpayne@68 143 "license",
jpayne@68 144 "license_expression",
jpayne@68 145 "maintainer",
jpayne@68 146 "maintainer_email",
jpayne@68 147 "metadata_version",
jpayne@68 148 "name",
jpayne@68 149 "requires_python",
jpayne@68 150 "summary",
jpayne@68 151 "version",
jpayne@68 152 }
jpayne@68 153
jpayne@68 154 _LIST_FIELDS = {
jpayne@68 155 "classifiers",
jpayne@68 156 "dynamic",
jpayne@68 157 "license_files",
jpayne@68 158 "obsoletes",
jpayne@68 159 "obsoletes_dist",
jpayne@68 160 "platforms",
jpayne@68 161 "provides",
jpayne@68 162 "provides_dist",
jpayne@68 163 "provides_extra",
jpayne@68 164 "requires",
jpayne@68 165 "requires_dist",
jpayne@68 166 "requires_external",
jpayne@68 167 "supported_platforms",
jpayne@68 168 }
jpayne@68 169
jpayne@68 170 _DICT_FIELDS = {
jpayne@68 171 "project_urls",
jpayne@68 172 }
jpayne@68 173
jpayne@68 174
jpayne@68 175 def _parse_keywords(data: str) -> list[str]:
jpayne@68 176 """Split a string of comma-separated keywords into a list of keywords."""
jpayne@68 177 return [k.strip() for k in data.split(",")]
jpayne@68 178
jpayne@68 179
jpayne@68 180 def _parse_project_urls(data: list[str]) -> dict[str, str]:
jpayne@68 181 """Parse a list of label/URL string pairings separated by a comma."""
jpayne@68 182 urls = {}
jpayne@68 183 for pair in data:
jpayne@68 184 # Our logic is slightly tricky here as we want to try and do
jpayne@68 185 # *something* reasonable with malformed data.
jpayne@68 186 #
jpayne@68 187 # The main thing that we have to worry about, is data that does
jpayne@68 188 # not have a ',' at all to split the label from the Value. There
jpayne@68 189 # isn't a singular right answer here, and we will fail validation
jpayne@68 190 # later on (if the caller is validating) so it doesn't *really*
jpayne@68 191 # matter, but since the missing value has to be an empty str
jpayne@68 192 # and our return value is dict[str, str], if we let the key
jpayne@68 193 # be the missing value, then they'd have multiple '' values that
jpayne@68 194 # overwrite each other in a accumulating dict.
jpayne@68 195 #
jpayne@68 196 # The other potentional issue is that it's possible to have the
jpayne@68 197 # same label multiple times in the metadata, with no solid "right"
jpayne@68 198 # answer with what to do in that case. As such, we'll do the only
jpayne@68 199 # thing we can, which is treat the field as unparseable and add it
jpayne@68 200 # to our list of unparsed fields.
jpayne@68 201 parts = [p.strip() for p in pair.split(",", 1)]
jpayne@68 202 parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
jpayne@68 203
jpayne@68 204 # TODO: The spec doesn't say anything about if the keys should be
jpayne@68 205 # considered case sensitive or not... logically they should
jpayne@68 206 # be case-preserving and case-insensitive, but doing that
jpayne@68 207 # would open up more cases where we might have duplicate
jpayne@68 208 # entries.
jpayne@68 209 label, url = parts
jpayne@68 210 if label in urls:
jpayne@68 211 # The label already exists in our set of urls, so this field
jpayne@68 212 # is unparseable, and we can just add the whole thing to our
jpayne@68 213 # unparseable data and stop processing it.
jpayne@68 214 raise KeyError("duplicate labels in project urls")
jpayne@68 215 urls[label] = url
jpayne@68 216
jpayne@68 217 return urls
jpayne@68 218
jpayne@68 219
jpayne@68 220 def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
jpayne@68 221 """Get the body of the message."""
jpayne@68 222 # If our source is a str, then our caller has managed encodings for us,
jpayne@68 223 # and we don't need to deal with it.
jpayne@68 224 if isinstance(source, str):
jpayne@68 225 payload = msg.get_payload()
jpayne@68 226 assert isinstance(payload, str)
jpayne@68 227 return payload
jpayne@68 228 # If our source is a bytes, then we're managing the encoding and we need
jpayne@68 229 # to deal with it.
jpayne@68 230 else:
jpayne@68 231 bpayload = msg.get_payload(decode=True)
jpayne@68 232 assert isinstance(bpayload, bytes)
jpayne@68 233 try:
jpayne@68 234 return bpayload.decode("utf8", "strict")
jpayne@68 235 except UnicodeDecodeError as exc:
jpayne@68 236 raise ValueError("payload in an invalid encoding") from exc
jpayne@68 237
jpayne@68 238
jpayne@68 239 # The various parse_FORMAT functions here are intended to be as lenient as
jpayne@68 240 # possible in their parsing, while still returning a correctly typed
jpayne@68 241 # RawMetadata.
jpayne@68 242 #
jpayne@68 243 # To aid in this, we also generally want to do as little touching of the
jpayne@68 244 # data as possible, except where there are possibly some historic holdovers
jpayne@68 245 # that make valid data awkward to work with.
jpayne@68 246 #
jpayne@68 247 # While this is a lower level, intermediate format than our ``Metadata``
jpayne@68 248 # class, some light touch ups can make a massive difference in usability.
jpayne@68 249
jpayne@68 250 # Map METADATA fields to RawMetadata.
jpayne@68 251 _EMAIL_TO_RAW_MAPPING = {
jpayne@68 252 "author": "author",
jpayne@68 253 "author-email": "author_email",
jpayne@68 254 "classifier": "classifiers",
jpayne@68 255 "description": "description",
jpayne@68 256 "description-content-type": "description_content_type",
jpayne@68 257 "download-url": "download_url",
jpayne@68 258 "dynamic": "dynamic",
jpayne@68 259 "home-page": "home_page",
jpayne@68 260 "keywords": "keywords",
jpayne@68 261 "license": "license",
jpayne@68 262 "license-expression": "license_expression",
jpayne@68 263 "license-file": "license_files",
jpayne@68 264 "maintainer": "maintainer",
jpayne@68 265 "maintainer-email": "maintainer_email",
jpayne@68 266 "metadata-version": "metadata_version",
jpayne@68 267 "name": "name",
jpayne@68 268 "obsoletes": "obsoletes",
jpayne@68 269 "obsoletes-dist": "obsoletes_dist",
jpayne@68 270 "platform": "platforms",
jpayne@68 271 "project-url": "project_urls",
jpayne@68 272 "provides": "provides",
jpayne@68 273 "provides-dist": "provides_dist",
jpayne@68 274 "provides-extra": "provides_extra",
jpayne@68 275 "requires": "requires",
jpayne@68 276 "requires-dist": "requires_dist",
jpayne@68 277 "requires-external": "requires_external",
jpayne@68 278 "requires-python": "requires_python",
jpayne@68 279 "summary": "summary",
jpayne@68 280 "supported-platform": "supported_platforms",
jpayne@68 281 "version": "version",
jpayne@68 282 }
jpayne@68 283 _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
jpayne@68 284
jpayne@68 285
jpayne@68 286 def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
jpayne@68 287 """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
jpayne@68 288
jpayne@68 289 This function returns a two-item tuple of dicts. The first dict is of
jpayne@68 290 recognized fields from the core metadata specification. Fields that can be
jpayne@68 291 parsed and translated into Python's built-in types are converted
jpayne@68 292 appropriately. All other fields are left as-is. Fields that are allowed to
jpayne@68 293 appear multiple times are stored as lists.
jpayne@68 294
jpayne@68 295 The second dict contains all other fields from the metadata. This includes
jpayne@68 296 any unrecognized fields. It also includes any fields which are expected to
jpayne@68 297 be parsed into a built-in type but were not formatted appropriately. Finally,
jpayne@68 298 any fields that are expected to appear only once but are repeated are
jpayne@68 299 included in this dict.
jpayne@68 300
jpayne@68 301 """
jpayne@68 302 raw: dict[str, str | list[str] | dict[str, str]] = {}
jpayne@68 303 unparsed: dict[str, list[str]] = {}
jpayne@68 304
jpayne@68 305 if isinstance(data, str):
jpayne@68 306 parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
jpayne@68 307 else:
jpayne@68 308 parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
jpayne@68 309
jpayne@68 310 # We have to wrap parsed.keys() in a set, because in the case of multiple
jpayne@68 311 # values for a key (a list), the key will appear multiple times in the
jpayne@68 312 # list of keys, but we're avoiding that by using get_all().
jpayne@68 313 for name in frozenset(parsed.keys()):
jpayne@68 314 # Header names in RFC are case insensitive, so we'll normalize to all
jpayne@68 315 # lower case to make comparisons easier.
jpayne@68 316 name = name.lower()
jpayne@68 317
jpayne@68 318 # We use get_all() here, even for fields that aren't multiple use,
jpayne@68 319 # because otherwise someone could have e.g. two Name fields, and we
jpayne@68 320 # would just silently ignore it rather than doing something about it.
jpayne@68 321 headers = parsed.get_all(name) or []
jpayne@68 322
jpayne@68 323 # The way the email module works when parsing bytes is that it
jpayne@68 324 # unconditionally decodes the bytes as ascii using the surrogateescape
jpayne@68 325 # handler. When you pull that data back out (such as with get_all() ),
jpayne@68 326 # it looks to see if the str has any surrogate escapes, and if it does
jpayne@68 327 # it wraps it in a Header object instead of returning the string.
jpayne@68 328 #
jpayne@68 329 # As such, we'll look for those Header objects, and fix up the encoding.
jpayne@68 330 value = []
jpayne@68 331 # Flag if we have run into any issues processing the headers, thus
jpayne@68 332 # signalling that the data belongs in 'unparsed'.
jpayne@68 333 valid_encoding = True
jpayne@68 334 for h in headers:
jpayne@68 335 # It's unclear if this can return more types than just a Header or
jpayne@68 336 # a str, so we'll just assert here to make sure.
jpayne@68 337 assert isinstance(h, (email.header.Header, str))
jpayne@68 338
jpayne@68 339 # If it's a header object, we need to do our little dance to get
jpayne@68 340 # the real data out of it. In cases where there is invalid data
jpayne@68 341 # we're going to end up with mojibake, but there's no obvious, good
jpayne@68 342 # way around that without reimplementing parts of the Header object
jpayne@68 343 # ourselves.
jpayne@68 344 #
jpayne@68 345 # That should be fine since, if mojibacked happens, this key is
jpayne@68 346 # going into the unparsed dict anyways.
jpayne@68 347 if isinstance(h, email.header.Header):
jpayne@68 348 # The Header object stores it's data as chunks, and each chunk
jpayne@68 349 # can be independently encoded, so we'll need to check each
jpayne@68 350 # of them.
jpayne@68 351 chunks: list[tuple[bytes, str | None]] = []
jpayne@68 352 for bin, encoding in email.header.decode_header(h):
jpayne@68 353 try:
jpayne@68 354 bin.decode("utf8", "strict")
jpayne@68 355 except UnicodeDecodeError:
jpayne@68 356 # Enable mojibake.
jpayne@68 357 encoding = "latin1"
jpayne@68 358 valid_encoding = False
jpayne@68 359 else:
jpayne@68 360 encoding = "utf8"
jpayne@68 361 chunks.append((bin, encoding))
jpayne@68 362
jpayne@68 363 # Turn our chunks back into a Header object, then let that
jpayne@68 364 # Header object do the right thing to turn them into a
jpayne@68 365 # string for us.
jpayne@68 366 value.append(str(email.header.make_header(chunks)))
jpayne@68 367 # This is already a string, so just add it.
jpayne@68 368 else:
jpayne@68 369 value.append(h)
jpayne@68 370
jpayne@68 371 # We've processed all of our values to get them into a list of str,
jpayne@68 372 # but we may have mojibake data, in which case this is an unparsed
jpayne@68 373 # field.
jpayne@68 374 if not valid_encoding:
jpayne@68 375 unparsed[name] = value
jpayne@68 376 continue
jpayne@68 377
jpayne@68 378 raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
jpayne@68 379 if raw_name is None:
jpayne@68 380 # This is a bit of a weird situation, we've encountered a key that
jpayne@68 381 # we don't know what it means, so we don't know whether it's meant
jpayne@68 382 # to be a list or not.
jpayne@68 383 #
jpayne@68 384 # Since we can't really tell one way or another, we'll just leave it
jpayne@68 385 # as a list, even though it may be a single item list, because that's
jpayne@68 386 # what makes the most sense for email headers.
jpayne@68 387 unparsed[name] = value
jpayne@68 388 continue
jpayne@68 389
jpayne@68 390 # If this is one of our string fields, then we'll check to see if our
jpayne@68 391 # value is a list of a single item. If it is then we'll assume that
jpayne@68 392 # it was emitted as a single string, and unwrap the str from inside
jpayne@68 393 # the list.
jpayne@68 394 #
jpayne@68 395 # If it's any other kind of data, then we haven't the faintest clue
jpayne@68 396 # what we should parse it as, and we have to just add it to our list
jpayne@68 397 # of unparsed stuff.
jpayne@68 398 if raw_name in _STRING_FIELDS and len(value) == 1:
jpayne@68 399 raw[raw_name] = value[0]
jpayne@68 400 # If this is one of our list of string fields, then we can just assign
jpayne@68 401 # the value, since email *only* has strings, and our get_all() call
jpayne@68 402 # above ensures that this is a list.
jpayne@68 403 elif raw_name in _LIST_FIELDS:
jpayne@68 404 raw[raw_name] = value
jpayne@68 405 # Special Case: Keywords
jpayne@68 406 # The keywords field is implemented in the metadata spec as a str,
jpayne@68 407 # but it conceptually is a list of strings, and is serialized using
jpayne@68 408 # ", ".join(keywords), so we'll do some light data massaging to turn
jpayne@68 409 # this into what it logically is.
jpayne@68 410 elif raw_name == "keywords" and len(value) == 1:
jpayne@68 411 raw[raw_name] = _parse_keywords(value[0])
jpayne@68 412 # Special Case: Project-URL
jpayne@68 413 # The project urls is implemented in the metadata spec as a list of
jpayne@68 414 # specially-formatted strings that represent a key and a value, which
jpayne@68 415 # is fundamentally a mapping, however the email format doesn't support
jpayne@68 416 # mappings in a sane way, so it was crammed into a list of strings
jpayne@68 417 # instead.
jpayne@68 418 #
jpayne@68 419 # We will do a little light data massaging to turn this into a map as
jpayne@68 420 # it logically should be.
jpayne@68 421 elif raw_name == "project_urls":
jpayne@68 422 try:
jpayne@68 423 raw[raw_name] = _parse_project_urls(value)
jpayne@68 424 except KeyError:
jpayne@68 425 unparsed[name] = value
jpayne@68 426 # Nothing that we've done has managed to parse this, so it'll just
jpayne@68 427 # throw it in our unparseable data and move on.
jpayne@68 428 else:
jpayne@68 429 unparsed[name] = value
jpayne@68 430
jpayne@68 431 # We need to support getting the Description from the message payload in
jpayne@68 432 # addition to getting it from the the headers. This does mean, though, there
jpayne@68 433 # is the possibility of it being set both ways, in which case we put both
jpayne@68 434 # in 'unparsed' since we don't know which is right.
jpayne@68 435 try:
jpayne@68 436 payload = _get_payload(parsed, data)
jpayne@68 437 except ValueError:
jpayne@68 438 unparsed.setdefault("description", []).append(
jpayne@68 439 parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload]
jpayne@68 440 )
jpayne@68 441 else:
jpayne@68 442 if payload:
jpayne@68 443 # Check to see if we've already got a description, if so then both
jpayne@68 444 # it, and this body move to unparseable.
jpayne@68 445 if "description" in raw:
jpayne@68 446 description_header = cast(str, raw.pop("description"))
jpayne@68 447 unparsed.setdefault("description", []).extend(
jpayne@68 448 [description_header, payload]
jpayne@68 449 )
jpayne@68 450 elif "description" in unparsed:
jpayne@68 451 unparsed["description"].append(payload)
jpayne@68 452 else:
jpayne@68 453 raw["description"] = payload
jpayne@68 454
jpayne@68 455 # We need to cast our `raw` to a metadata, because a TypedDict only support
jpayne@68 456 # literal key names, but we're computing our key names on purpose, but the
jpayne@68 457 # way this function is implemented, our `TypedDict` can only have valid key
jpayne@68 458 # names.
jpayne@68 459 return cast(RawMetadata, raw), unparsed
jpayne@68 460
jpayne@68 461
jpayne@68 462 _NOT_FOUND = object()
jpayne@68 463
jpayne@68 464
jpayne@68 465 # Keep the two values in sync.
jpayne@68 466 _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
jpayne@68 467 _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
jpayne@68 468
jpayne@68 469 _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
jpayne@68 470
jpayne@68 471
jpayne@68 472 class _Validator(Generic[T]):
jpayne@68 473 """Validate a metadata field.
jpayne@68 474
jpayne@68 475 All _process_*() methods correspond to a core metadata field. The method is
jpayne@68 476 called with the field's raw value. If the raw value is valid it is returned
jpayne@68 477 in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
jpayne@68 478 If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
jpayne@68 479 as appropriate).
jpayne@68 480 """
jpayne@68 481
jpayne@68 482 name: str
jpayne@68 483 raw_name: str
jpayne@68 484 added: _MetadataVersion
jpayne@68 485
jpayne@68 486 def __init__(
jpayne@68 487 self,
jpayne@68 488 *,
jpayne@68 489 added: _MetadataVersion = "1.0",
jpayne@68 490 ) -> None:
jpayne@68 491 self.added = added
jpayne@68 492
jpayne@68 493 def __set_name__(self, _owner: Metadata, name: str) -> None:
jpayne@68 494 self.name = name
jpayne@68 495 self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
jpayne@68 496
jpayne@68 497 def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
jpayne@68 498 # With Python 3.8, the caching can be replaced with functools.cached_property().
jpayne@68 499 # No need to check the cache as attribute lookup will resolve into the
jpayne@68 500 # instance's __dict__ before __get__ is called.
jpayne@68 501 cache = instance.__dict__
jpayne@68 502 value = instance._raw.get(self.name)
jpayne@68 503
jpayne@68 504 # To make the _process_* methods easier, we'll check if the value is None
jpayne@68 505 # and if this field is NOT a required attribute, and if both of those
jpayne@68 506 # things are true, we'll skip the the converter. This will mean that the
jpayne@68 507 # converters never have to deal with the None union.
jpayne@68 508 if self.name in _REQUIRED_ATTRS or value is not None:
jpayne@68 509 try:
jpayne@68 510 converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
jpayne@68 511 except AttributeError:
jpayne@68 512 pass
jpayne@68 513 else:
jpayne@68 514 value = converter(value)
jpayne@68 515
jpayne@68 516 cache[self.name] = value
jpayne@68 517 try:
jpayne@68 518 del instance._raw[self.name] # type: ignore[misc]
jpayne@68 519 except KeyError:
jpayne@68 520 pass
jpayne@68 521
jpayne@68 522 return cast(T, value)
jpayne@68 523
jpayne@68 524 def _invalid_metadata(
jpayne@68 525 self, msg: str, cause: Exception | None = None
jpayne@68 526 ) -> InvalidMetadata:
jpayne@68 527 exc = InvalidMetadata(
jpayne@68 528 self.raw_name, msg.format_map({"field": repr(self.raw_name)})
jpayne@68 529 )
jpayne@68 530 exc.__cause__ = cause
jpayne@68 531 return exc
jpayne@68 532
jpayne@68 533 def _process_metadata_version(self, value: str) -> _MetadataVersion:
jpayne@68 534 # Implicitly makes Metadata-Version required.
jpayne@68 535 if value not in _VALID_METADATA_VERSIONS:
jpayne@68 536 raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
jpayne@68 537 return cast(_MetadataVersion, value)
jpayne@68 538
jpayne@68 539 def _process_name(self, value: str) -> str:
jpayne@68 540 if not value:
jpayne@68 541 raise self._invalid_metadata("{field} is a required field")
jpayne@68 542 # Validate the name as a side-effect.
jpayne@68 543 try:
jpayne@68 544 utils.canonicalize_name(value, validate=True)
jpayne@68 545 except utils.InvalidName as exc:
jpayne@68 546 raise self._invalid_metadata(
jpayne@68 547 f"{value!r} is invalid for {{field}}", cause=exc
jpayne@68 548 ) from exc
jpayne@68 549 else:
jpayne@68 550 return value
jpayne@68 551
jpayne@68 552 def _process_version(self, value: str) -> version_module.Version:
jpayne@68 553 if not value:
jpayne@68 554 raise self._invalid_metadata("{field} is a required field")
jpayne@68 555 try:
jpayne@68 556 return version_module.parse(value)
jpayne@68 557 except version_module.InvalidVersion as exc:
jpayne@68 558 raise self._invalid_metadata(
jpayne@68 559 f"{value!r} is invalid for {{field}}", cause=exc
jpayne@68 560 ) from exc
jpayne@68 561
jpayne@68 562 def _process_summary(self, value: str) -> str:
jpayne@68 563 """Check the field contains no newlines."""
jpayne@68 564 if "\n" in value:
jpayne@68 565 raise self._invalid_metadata("{field} must be a single line")
jpayne@68 566 return value
jpayne@68 567
jpayne@68 568 def _process_description_content_type(self, value: str) -> str:
jpayne@68 569 content_types = {"text/plain", "text/x-rst", "text/markdown"}
jpayne@68 570 message = email.message.EmailMessage()
jpayne@68 571 message["content-type"] = value
jpayne@68 572
jpayne@68 573 content_type, parameters = (
jpayne@68 574 # Defaults to `text/plain` if parsing failed.
jpayne@68 575 message.get_content_type().lower(),
jpayne@68 576 message["content-type"].params,
jpayne@68 577 )
jpayne@68 578 # Check if content-type is valid or defaulted to `text/plain` and thus was
jpayne@68 579 # not parseable.
jpayne@68 580 if content_type not in content_types or content_type not in value.lower():
jpayne@68 581 raise self._invalid_metadata(
jpayne@68 582 f"{{field}} must be one of {list(content_types)}, not {value!r}"
jpayne@68 583 )
jpayne@68 584
jpayne@68 585 charset = parameters.get("charset", "UTF-8")
jpayne@68 586 if charset != "UTF-8":
jpayne@68 587 raise self._invalid_metadata(
jpayne@68 588 f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
jpayne@68 589 )
jpayne@68 590
jpayne@68 591 markdown_variants = {"GFM", "CommonMark"}
jpayne@68 592 variant = parameters.get("variant", "GFM") # Use an acceptable default.
jpayne@68 593 if content_type == "text/markdown" and variant not in markdown_variants:
jpayne@68 594 raise self._invalid_metadata(
jpayne@68 595 f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
jpayne@68 596 f"not {variant!r}",
jpayne@68 597 )
jpayne@68 598 return value
jpayne@68 599
jpayne@68 600 def _process_dynamic(self, value: list[str]) -> list[str]:
jpayne@68 601 for dynamic_field in map(str.lower, value):
jpayne@68 602 if dynamic_field in {"name", "version", "metadata-version"}:
jpayne@68 603 raise self._invalid_metadata(
jpayne@68 604 f"{dynamic_field!r} is not allowed as a dynamic field"
jpayne@68 605 )
jpayne@68 606 elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
jpayne@68 607 raise self._invalid_metadata(
jpayne@68 608 f"{dynamic_field!r} is not a valid dynamic field"
jpayne@68 609 )
jpayne@68 610 return list(map(str.lower, value))
jpayne@68 611
jpayne@68 612 def _process_provides_extra(
jpayne@68 613 self,
jpayne@68 614 value: list[str],
jpayne@68 615 ) -> list[utils.NormalizedName]:
jpayne@68 616 normalized_names = []
jpayne@68 617 try:
jpayne@68 618 for name in value:
jpayne@68 619 normalized_names.append(utils.canonicalize_name(name, validate=True))
jpayne@68 620 except utils.InvalidName as exc:
jpayne@68 621 raise self._invalid_metadata(
jpayne@68 622 f"{name!r} is invalid for {{field}}", cause=exc
jpayne@68 623 ) from exc
jpayne@68 624 else:
jpayne@68 625 return normalized_names
jpayne@68 626
jpayne@68 627 def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
jpayne@68 628 try:
jpayne@68 629 return specifiers.SpecifierSet(value)
jpayne@68 630 except specifiers.InvalidSpecifier as exc:
jpayne@68 631 raise self._invalid_metadata(
jpayne@68 632 f"{value!r} is invalid for {{field}}", cause=exc
jpayne@68 633 ) from exc
jpayne@68 634
jpayne@68 635 def _process_requires_dist(
jpayne@68 636 self,
jpayne@68 637 value: list[str],
jpayne@68 638 ) -> list[requirements.Requirement]:
jpayne@68 639 reqs = []
jpayne@68 640 try:
jpayne@68 641 for req in value:
jpayne@68 642 reqs.append(requirements.Requirement(req))
jpayne@68 643 except requirements.InvalidRequirement as exc:
jpayne@68 644 raise self._invalid_metadata(
jpayne@68 645 f"{req!r} is invalid for {{field}}", cause=exc
jpayne@68 646 ) from exc
jpayne@68 647 else:
jpayne@68 648 return reqs
jpayne@68 649
jpayne@68 650 def _process_license_expression(
jpayne@68 651 self, value: str
jpayne@68 652 ) -> NormalizedLicenseExpression | None:
jpayne@68 653 try:
jpayne@68 654 return licenses.canonicalize_license_expression(value)
jpayne@68 655 except ValueError as exc:
jpayne@68 656 raise self._invalid_metadata(
jpayne@68 657 f"{value!r} is invalid for {{field}}", cause=exc
jpayne@68 658 ) from exc
jpayne@68 659
jpayne@68 660 def _process_license_files(self, value: list[str]) -> list[str]:
jpayne@68 661 paths = []
jpayne@68 662 for path in value:
jpayne@68 663 if ".." in path:
jpayne@68 664 raise self._invalid_metadata(
jpayne@68 665 f"{path!r} is invalid for {{field}}, "
jpayne@68 666 "parent directory indicators are not allowed"
jpayne@68 667 )
jpayne@68 668 if "*" in path:
jpayne@68 669 raise self._invalid_metadata(
jpayne@68 670 f"{path!r} is invalid for {{field}}, paths must be resolved"
jpayne@68 671 )
jpayne@68 672 if (
jpayne@68 673 pathlib.PurePosixPath(path).is_absolute()
jpayne@68 674 or pathlib.PureWindowsPath(path).is_absolute()
jpayne@68 675 ):
jpayne@68 676 raise self._invalid_metadata(
jpayne@68 677 f"{path!r} is invalid for {{field}}, paths must be relative"
jpayne@68 678 )
jpayne@68 679 if pathlib.PureWindowsPath(path).as_posix() != path:
jpayne@68 680 raise self._invalid_metadata(
jpayne@68 681 f"{path!r} is invalid for {{field}}, "
jpayne@68 682 "paths must use '/' delimiter"
jpayne@68 683 )
jpayne@68 684 paths.append(path)
jpayne@68 685 return paths
jpayne@68 686
jpayne@68 687
jpayne@68 688 class Metadata:
jpayne@68 689 """Representation of distribution metadata.
jpayne@68 690
jpayne@68 691 Compared to :class:`RawMetadata`, this class provides objects representing
jpayne@68 692 metadata fields instead of only using built-in types. Any invalid metadata
jpayne@68 693 will cause :exc:`InvalidMetadata` to be raised (with a
jpayne@68 694 :py:attr:`~BaseException.__cause__` attribute as appropriate).
jpayne@68 695 """
jpayne@68 696
jpayne@68 697 _raw: RawMetadata
jpayne@68 698
jpayne@68 699 @classmethod
jpayne@68 700 def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
jpayne@68 701 """Create an instance from :class:`RawMetadata`.
jpayne@68 702
jpayne@68 703 If *validate* is true, all metadata will be validated. All exceptions
jpayne@68 704 related to validation will be gathered and raised as an :class:`ExceptionGroup`.
jpayne@68 705 """
jpayne@68 706 ins = cls()
jpayne@68 707 ins._raw = data.copy() # Mutations occur due to caching enriched values.
jpayne@68 708
jpayne@68 709 if validate:
jpayne@68 710 exceptions: list[Exception] = []
jpayne@68 711 try:
jpayne@68 712 metadata_version = ins.metadata_version
jpayne@68 713 metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
jpayne@68 714 except InvalidMetadata as metadata_version_exc:
jpayne@68 715 exceptions.append(metadata_version_exc)
jpayne@68 716 metadata_version = None
jpayne@68 717
jpayne@68 718 # Make sure to check for the fields that are present, the required
jpayne@68 719 # fields (so their absence can be reported).
jpayne@68 720 fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
jpayne@68 721 # Remove fields that have already been checked.
jpayne@68 722 fields_to_check -= {"metadata_version"}
jpayne@68 723
jpayne@68 724 for key in fields_to_check:
jpayne@68 725 try:
jpayne@68 726 if metadata_version:
jpayne@68 727 # Can't use getattr() as that triggers descriptor protocol which
jpayne@68 728 # will fail due to no value for the instance argument.
jpayne@68 729 try:
jpayne@68 730 field_metadata_version = cls.__dict__[key].added
jpayne@68 731 except KeyError:
jpayne@68 732 exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
jpayne@68 733 exceptions.append(exc)
jpayne@68 734 continue
jpayne@68 735 field_age = _VALID_METADATA_VERSIONS.index(
jpayne@68 736 field_metadata_version
jpayne@68 737 )
jpayne@68 738 if field_age > metadata_age:
jpayne@68 739 field = _RAW_TO_EMAIL_MAPPING[key]
jpayne@68 740 exc = InvalidMetadata(
jpayne@68 741 field,
jpayne@68 742 f"{field} introduced in metadata version "
jpayne@68 743 f"{field_metadata_version}, not {metadata_version}",
jpayne@68 744 )
jpayne@68 745 exceptions.append(exc)
jpayne@68 746 continue
jpayne@68 747 getattr(ins, key)
jpayne@68 748 except InvalidMetadata as exc:
jpayne@68 749 exceptions.append(exc)
jpayne@68 750
jpayne@68 751 if exceptions:
jpayne@68 752 raise ExceptionGroup("invalid metadata", exceptions)
jpayne@68 753
jpayne@68 754 return ins
jpayne@68 755
jpayne@68 756 @classmethod
jpayne@68 757 def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
jpayne@68 758 """Parse metadata from email headers.
jpayne@68 759
jpayne@68 760 If *validate* is true, the metadata will be validated. All exceptions
jpayne@68 761 related to validation will be gathered and raised as an :class:`ExceptionGroup`.
jpayne@68 762 """
jpayne@68 763 raw, unparsed = parse_email(data)
jpayne@68 764
jpayne@68 765 if validate:
jpayne@68 766 exceptions: list[Exception] = []
jpayne@68 767 for unparsed_key in unparsed:
jpayne@68 768 if unparsed_key in _EMAIL_TO_RAW_MAPPING:
jpayne@68 769 message = f"{unparsed_key!r} has invalid data"
jpayne@68 770 else:
jpayne@68 771 message = f"unrecognized field: {unparsed_key!r}"
jpayne@68 772 exceptions.append(InvalidMetadata(unparsed_key, message))
jpayne@68 773
jpayne@68 774 if exceptions:
jpayne@68 775 raise ExceptionGroup("unparsed", exceptions)
jpayne@68 776
jpayne@68 777 try:
jpayne@68 778 return cls.from_raw(raw, validate=validate)
jpayne@68 779 except ExceptionGroup as exc_group:
jpayne@68 780 raise ExceptionGroup(
jpayne@68 781 "invalid or unparsed metadata", exc_group.exceptions
jpayne@68 782 ) from None
jpayne@68 783
jpayne@68 784 metadata_version: _Validator[_MetadataVersion] = _Validator()
jpayne@68 785 """:external:ref:`core-metadata-metadata-version`
jpayne@68 786 (required; validated to be a valid metadata version)"""
jpayne@68 787 # `name` is not normalized/typed to NormalizedName so as to provide access to
jpayne@68 788 # the original/raw name.
jpayne@68 789 name: _Validator[str] = _Validator()
jpayne@68 790 """:external:ref:`core-metadata-name`
jpayne@68 791 (required; validated using :func:`~packaging.utils.canonicalize_name` and its
jpayne@68 792 *validate* parameter)"""
jpayne@68 793 version: _Validator[version_module.Version] = _Validator()
jpayne@68 794 """:external:ref:`core-metadata-version` (required)"""
jpayne@68 795 dynamic: _Validator[list[str] | None] = _Validator(
jpayne@68 796 added="2.2",
jpayne@68 797 )
jpayne@68 798 """:external:ref:`core-metadata-dynamic`
jpayne@68 799 (validated against core metadata field names and lowercased)"""
jpayne@68 800 platforms: _Validator[list[str] | None] = _Validator()
jpayne@68 801 """:external:ref:`core-metadata-platform`"""
jpayne@68 802 supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
jpayne@68 803 """:external:ref:`core-metadata-supported-platform`"""
jpayne@68 804 summary: _Validator[str | None] = _Validator()
jpayne@68 805 """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
jpayne@68 806 description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body
jpayne@68 807 """:external:ref:`core-metadata-description`"""
jpayne@68 808 description_content_type: _Validator[str | None] = _Validator(added="2.1")
jpayne@68 809 """:external:ref:`core-metadata-description-content-type` (validated)"""
jpayne@68 810 keywords: _Validator[list[str] | None] = _Validator()
jpayne@68 811 """:external:ref:`core-metadata-keywords`"""
jpayne@68 812 home_page: _Validator[str | None] = _Validator()
jpayne@68 813 """:external:ref:`core-metadata-home-page`"""
jpayne@68 814 download_url: _Validator[str | None] = _Validator(added="1.1")
jpayne@68 815 """:external:ref:`core-metadata-download-url`"""
jpayne@68 816 author: _Validator[str | None] = _Validator()
jpayne@68 817 """:external:ref:`core-metadata-author`"""
jpayne@68 818 author_email: _Validator[str | None] = _Validator()
jpayne@68 819 """:external:ref:`core-metadata-author-email`"""
jpayne@68 820 maintainer: _Validator[str | None] = _Validator(added="1.2")
jpayne@68 821 """:external:ref:`core-metadata-maintainer`"""
jpayne@68 822 maintainer_email: _Validator[str | None] = _Validator(added="1.2")
jpayne@68 823 """:external:ref:`core-metadata-maintainer-email`"""
jpayne@68 824 license: _Validator[str | None] = _Validator()
jpayne@68 825 """:external:ref:`core-metadata-license`"""
jpayne@68 826 license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
jpayne@68 827 added="2.4"
jpayne@68 828 )
jpayne@68 829 """:external:ref:`core-metadata-license-expression`"""
jpayne@68 830 license_files: _Validator[list[str] | None] = _Validator(added="2.4")
jpayne@68 831 """:external:ref:`core-metadata-license-file`"""
jpayne@68 832 classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
jpayne@68 833 """:external:ref:`core-metadata-classifier`"""
jpayne@68 834 requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
jpayne@68 835 added="1.2"
jpayne@68 836 )
jpayne@68 837 """:external:ref:`core-metadata-requires-dist`"""
jpayne@68 838 requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
jpayne@68 839 added="1.2"
jpayne@68 840 )
jpayne@68 841 """:external:ref:`core-metadata-requires-python`"""
jpayne@68 842 # Because `Requires-External` allows for non-PEP 440 version specifiers, we
jpayne@68 843 # don't do any processing on the values.
jpayne@68 844 requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
jpayne@68 845 """:external:ref:`core-metadata-requires-external`"""
jpayne@68 846 project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
jpayne@68 847 """:external:ref:`core-metadata-project-url`"""
jpayne@68 848 # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
jpayne@68 849 # regardless of metadata version.
jpayne@68 850 provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
jpayne@68 851 added="2.1",
jpayne@68 852 )
jpayne@68 853 """:external:ref:`core-metadata-provides-extra`"""
jpayne@68 854 provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
jpayne@68 855 """:external:ref:`core-metadata-provides-dist`"""
jpayne@68 856 obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
jpayne@68 857 """:external:ref:`core-metadata-obsoletes-dist`"""
jpayne@68 858 requires: _Validator[list[str] | None] = _Validator(added="1.1")
jpayne@68 859 """``Requires`` (deprecated)"""
jpayne@68 860 provides: _Validator[list[str] | None] = _Validator(added="1.1")
jpayne@68 861 """``Provides`` (deprecated)"""
jpayne@68 862 obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
jpayne@68 863 """``Obsoletes`` (deprecated)"""