comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/packaging/metadata.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 from __future__ import annotations
2
3 import email.feedparser
4 import email.header
5 import email.message
6 import email.parser
7 import email.policy
8 import pathlib
9 import sys
10 import typing
11 from typing import (
12 Any,
13 Callable,
14 Generic,
15 Literal,
16 TypedDict,
17 cast,
18 )
19
20 from . import licenses, requirements, specifiers, utils
21 from . import version as version_module
22 from .licenses import NormalizedLicenseExpression
23
24 T = typing.TypeVar("T")
25
26
27 if sys.version_info >= (3, 11): # pragma: no cover
28 ExceptionGroup = ExceptionGroup
29 else: # pragma: no cover
30
31 class ExceptionGroup(Exception):
32 """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
33
34 If :external:exc:`ExceptionGroup` is already defined by Python itself,
35 that version is used instead.
36 """
37
38 message: str
39 exceptions: list[Exception]
40
41 def __init__(self, message: str, exceptions: list[Exception]) -> None:
42 self.message = message
43 self.exceptions = exceptions
44
45 def __repr__(self) -> str:
46 return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
47
48
49 class InvalidMetadata(ValueError):
50 """A metadata field contains invalid data."""
51
52 field: str
53 """The name of the field that contains invalid data."""
54
55 def __init__(self, field: str, message: str) -> None:
56 self.field = field
57 super().__init__(message)
58
59
60 # The RawMetadata class attempts to make as few assumptions about the underlying
61 # serialization formats as possible. The idea is that as long as a serialization
62 # formats offer some very basic primitives in *some* way then we can support
63 # serializing to and from that format.
64 class RawMetadata(TypedDict, total=False):
65 """A dictionary of raw core metadata.
66
67 Each field in core metadata maps to a key of this dictionary (when data is
68 provided). The key is lower-case and underscores are used instead of dashes
69 compared to the equivalent core metadata field. Any core metadata field that
70 can be specified multiple times or can hold multiple values in a single
71 field have a key with a plural name. See :class:`Metadata` whose attributes
72 match the keys of this dictionary.
73
74 Core metadata fields that can be specified multiple times are stored as a
75 list or dict depending on which is appropriate for the field. Any fields
76 which hold multiple values in a single field are stored as a list.
77
78 """
79
80 # Metadata 1.0 - PEP 241
81 metadata_version: str
82 name: str
83 version: str
84 platforms: list[str]
85 summary: str
86 description: str
87 keywords: list[str]
88 home_page: str
89 author: str
90 author_email: str
91 license: str
92
93 # Metadata 1.1 - PEP 314
94 supported_platforms: list[str]
95 download_url: str
96 classifiers: list[str]
97 requires: list[str]
98 provides: list[str]
99 obsoletes: list[str]
100
101 # Metadata 1.2 - PEP 345
102 maintainer: str
103 maintainer_email: str
104 requires_dist: list[str]
105 provides_dist: list[str]
106 obsoletes_dist: list[str]
107 requires_python: str
108 requires_external: list[str]
109 project_urls: dict[str, str]
110
111 # Metadata 2.0
112 # PEP 426 attempted to completely revamp the metadata format
113 # but got stuck without ever being able to build consensus on
114 # it and ultimately ended up withdrawn.
115 #
116 # However, a number of tools had started emitting METADATA with
117 # `2.0` Metadata-Version, so for historical reasons, this version
118 # was skipped.
119
120 # Metadata 2.1 - PEP 566
121 description_content_type: str
122 provides_extra: list[str]
123
124 # Metadata 2.2 - PEP 643
125 dynamic: list[str]
126
127 # Metadata 2.3 - PEP 685
128 # No new fields were added in PEP 685, just some edge case were
129 # tightened up to provide better interoptability.
130
131 # Metadata 2.4 - PEP 639
132 license_expression: str
133 license_files: list[str]
134
135
136 _STRING_FIELDS = {
137 "author",
138 "author_email",
139 "description",
140 "description_content_type",
141 "download_url",
142 "home_page",
143 "license",
144 "license_expression",
145 "maintainer",
146 "maintainer_email",
147 "metadata_version",
148 "name",
149 "requires_python",
150 "summary",
151 "version",
152 }
153
154 _LIST_FIELDS = {
155 "classifiers",
156 "dynamic",
157 "license_files",
158 "obsoletes",
159 "obsoletes_dist",
160 "platforms",
161 "provides",
162 "provides_dist",
163 "provides_extra",
164 "requires",
165 "requires_dist",
166 "requires_external",
167 "supported_platforms",
168 }
169
170 _DICT_FIELDS = {
171 "project_urls",
172 }
173
174
175 def _parse_keywords(data: str) -> list[str]:
176 """Split a string of comma-separated keywords into a list of keywords."""
177 return [k.strip() for k in data.split(",")]
178
179
180 def _parse_project_urls(data: list[str]) -> dict[str, str]:
181 """Parse a list of label/URL string pairings separated by a comma."""
182 urls = {}
183 for pair in data:
184 # Our logic is slightly tricky here as we want to try and do
185 # *something* reasonable with malformed data.
186 #
187 # The main thing that we have to worry about, is data that does
188 # not have a ',' at all to split the label from the Value. There
189 # isn't a singular right answer here, and we will fail validation
190 # later on (if the caller is validating) so it doesn't *really*
191 # matter, but since the missing value has to be an empty str
192 # and our return value is dict[str, str], if we let the key
193 # be the missing value, then they'd have multiple '' values that
194 # overwrite each other in a accumulating dict.
195 #
196 # The other potentional issue is that it's possible to have the
197 # same label multiple times in the metadata, with no solid "right"
198 # answer with what to do in that case. As such, we'll do the only
199 # thing we can, which is treat the field as unparseable and add it
200 # to our list of unparsed fields.
201 parts = [p.strip() for p in pair.split(",", 1)]
202 parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
203
204 # TODO: The spec doesn't say anything about if the keys should be
205 # considered case sensitive or not... logically they should
206 # be case-preserving and case-insensitive, but doing that
207 # would open up more cases where we might have duplicate
208 # entries.
209 label, url = parts
210 if label in urls:
211 # The label already exists in our set of urls, so this field
212 # is unparseable, and we can just add the whole thing to our
213 # unparseable data and stop processing it.
214 raise KeyError("duplicate labels in project urls")
215 urls[label] = url
216
217 return urls
218
219
220 def _get_payload(msg: email.message.Message, source: bytes | str) -> str:
221 """Get the body of the message."""
222 # If our source is a str, then our caller has managed encodings for us,
223 # and we don't need to deal with it.
224 if isinstance(source, str):
225 payload = msg.get_payload()
226 assert isinstance(payload, str)
227 return payload
228 # If our source is a bytes, then we're managing the encoding and we need
229 # to deal with it.
230 else:
231 bpayload = msg.get_payload(decode=True)
232 assert isinstance(bpayload, bytes)
233 try:
234 return bpayload.decode("utf8", "strict")
235 except UnicodeDecodeError as exc:
236 raise ValueError("payload in an invalid encoding") from exc
237
238
239 # The various parse_FORMAT functions here are intended to be as lenient as
240 # possible in their parsing, while still returning a correctly typed
241 # RawMetadata.
242 #
243 # To aid in this, we also generally want to do as little touching of the
244 # data as possible, except where there are possibly some historic holdovers
245 # that make valid data awkward to work with.
246 #
247 # While this is a lower level, intermediate format than our ``Metadata``
248 # class, some light touch ups can make a massive difference in usability.
249
250 # Map METADATA fields to RawMetadata.
251 _EMAIL_TO_RAW_MAPPING = {
252 "author": "author",
253 "author-email": "author_email",
254 "classifier": "classifiers",
255 "description": "description",
256 "description-content-type": "description_content_type",
257 "download-url": "download_url",
258 "dynamic": "dynamic",
259 "home-page": "home_page",
260 "keywords": "keywords",
261 "license": "license",
262 "license-expression": "license_expression",
263 "license-file": "license_files",
264 "maintainer": "maintainer",
265 "maintainer-email": "maintainer_email",
266 "metadata-version": "metadata_version",
267 "name": "name",
268 "obsoletes": "obsoletes",
269 "obsoletes-dist": "obsoletes_dist",
270 "platform": "platforms",
271 "project-url": "project_urls",
272 "provides": "provides",
273 "provides-dist": "provides_dist",
274 "provides-extra": "provides_extra",
275 "requires": "requires",
276 "requires-dist": "requires_dist",
277 "requires-external": "requires_external",
278 "requires-python": "requires_python",
279 "summary": "summary",
280 "supported-platform": "supported_platforms",
281 "version": "version",
282 }
283 _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
284
285
286 def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]:
287 """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
288
289 This function returns a two-item tuple of dicts. The first dict is of
290 recognized fields from the core metadata specification. Fields that can be
291 parsed and translated into Python's built-in types are converted
292 appropriately. All other fields are left as-is. Fields that are allowed to
293 appear multiple times are stored as lists.
294
295 The second dict contains all other fields from the metadata. This includes
296 any unrecognized fields. It also includes any fields which are expected to
297 be parsed into a built-in type but were not formatted appropriately. Finally,
298 any fields that are expected to appear only once but are repeated are
299 included in this dict.
300
301 """
302 raw: dict[str, str | list[str] | dict[str, str]] = {}
303 unparsed: dict[str, list[str]] = {}
304
305 if isinstance(data, str):
306 parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
307 else:
308 parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
309
310 # We have to wrap parsed.keys() in a set, because in the case of multiple
311 # values for a key (a list), the key will appear multiple times in the
312 # list of keys, but we're avoiding that by using get_all().
313 for name in frozenset(parsed.keys()):
314 # Header names in RFC are case insensitive, so we'll normalize to all
315 # lower case to make comparisons easier.
316 name = name.lower()
317
318 # We use get_all() here, even for fields that aren't multiple use,
319 # because otherwise someone could have e.g. two Name fields, and we
320 # would just silently ignore it rather than doing something about it.
321 headers = parsed.get_all(name) or []
322
323 # The way the email module works when parsing bytes is that it
324 # unconditionally decodes the bytes as ascii using the surrogateescape
325 # handler. When you pull that data back out (such as with get_all() ),
326 # it looks to see if the str has any surrogate escapes, and if it does
327 # it wraps it in a Header object instead of returning the string.
328 #
329 # As such, we'll look for those Header objects, and fix up the encoding.
330 value = []
331 # Flag if we have run into any issues processing the headers, thus
332 # signalling that the data belongs in 'unparsed'.
333 valid_encoding = True
334 for h in headers:
335 # It's unclear if this can return more types than just a Header or
336 # a str, so we'll just assert here to make sure.
337 assert isinstance(h, (email.header.Header, str))
338
339 # If it's a header object, we need to do our little dance to get
340 # the real data out of it. In cases where there is invalid data
341 # we're going to end up with mojibake, but there's no obvious, good
342 # way around that without reimplementing parts of the Header object
343 # ourselves.
344 #
345 # That should be fine since, if mojibacked happens, this key is
346 # going into the unparsed dict anyways.
347 if isinstance(h, email.header.Header):
348 # The Header object stores it's data as chunks, and each chunk
349 # can be independently encoded, so we'll need to check each
350 # of them.
351 chunks: list[tuple[bytes, str | None]] = []
352 for bin, encoding in email.header.decode_header(h):
353 try:
354 bin.decode("utf8", "strict")
355 except UnicodeDecodeError:
356 # Enable mojibake.
357 encoding = "latin1"
358 valid_encoding = False
359 else:
360 encoding = "utf8"
361 chunks.append((bin, encoding))
362
363 # Turn our chunks back into a Header object, then let that
364 # Header object do the right thing to turn them into a
365 # string for us.
366 value.append(str(email.header.make_header(chunks)))
367 # This is already a string, so just add it.
368 else:
369 value.append(h)
370
371 # We've processed all of our values to get them into a list of str,
372 # but we may have mojibake data, in which case this is an unparsed
373 # field.
374 if not valid_encoding:
375 unparsed[name] = value
376 continue
377
378 raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
379 if raw_name is None:
380 # This is a bit of a weird situation, we've encountered a key that
381 # we don't know what it means, so we don't know whether it's meant
382 # to be a list or not.
383 #
384 # Since we can't really tell one way or another, we'll just leave it
385 # as a list, even though it may be a single item list, because that's
386 # what makes the most sense for email headers.
387 unparsed[name] = value
388 continue
389
390 # If this is one of our string fields, then we'll check to see if our
391 # value is a list of a single item. If it is then we'll assume that
392 # it was emitted as a single string, and unwrap the str from inside
393 # the list.
394 #
395 # If it's any other kind of data, then we haven't the faintest clue
396 # what we should parse it as, and we have to just add it to our list
397 # of unparsed stuff.
398 if raw_name in _STRING_FIELDS and len(value) == 1:
399 raw[raw_name] = value[0]
400 # If this is one of our list of string fields, then we can just assign
401 # the value, since email *only* has strings, and our get_all() call
402 # above ensures that this is a list.
403 elif raw_name in _LIST_FIELDS:
404 raw[raw_name] = value
405 # Special Case: Keywords
406 # The keywords field is implemented in the metadata spec as a str,
407 # but it conceptually is a list of strings, and is serialized using
408 # ", ".join(keywords), so we'll do some light data massaging to turn
409 # this into what it logically is.
410 elif raw_name == "keywords" and len(value) == 1:
411 raw[raw_name] = _parse_keywords(value[0])
412 # Special Case: Project-URL
413 # The project urls is implemented in the metadata spec as a list of
414 # specially-formatted strings that represent a key and a value, which
415 # is fundamentally a mapping, however the email format doesn't support
416 # mappings in a sane way, so it was crammed into a list of strings
417 # instead.
418 #
419 # We will do a little light data massaging to turn this into a map as
420 # it logically should be.
421 elif raw_name == "project_urls":
422 try:
423 raw[raw_name] = _parse_project_urls(value)
424 except KeyError:
425 unparsed[name] = value
426 # Nothing that we've done has managed to parse this, so it'll just
427 # throw it in our unparseable data and move on.
428 else:
429 unparsed[name] = value
430
431 # We need to support getting the Description from the message payload in
432 # addition to getting it from the the headers. This does mean, though, there
433 # is the possibility of it being set both ways, in which case we put both
434 # in 'unparsed' since we don't know which is right.
435 try:
436 payload = _get_payload(parsed, data)
437 except ValueError:
438 unparsed.setdefault("description", []).append(
439 parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload]
440 )
441 else:
442 if payload:
443 # Check to see if we've already got a description, if so then both
444 # it, and this body move to unparseable.
445 if "description" in raw:
446 description_header = cast(str, raw.pop("description"))
447 unparsed.setdefault("description", []).extend(
448 [description_header, payload]
449 )
450 elif "description" in unparsed:
451 unparsed["description"].append(payload)
452 else:
453 raw["description"] = payload
454
455 # We need to cast our `raw` to a metadata, because a TypedDict only support
456 # literal key names, but we're computing our key names on purpose, but the
457 # way this function is implemented, our `TypedDict` can only have valid key
458 # names.
459 return cast(RawMetadata, raw), unparsed
460
461
462 _NOT_FOUND = object()
463
464
465 # Keep the two values in sync.
466 _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
467 _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"]
468
469 _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
470
471
472 class _Validator(Generic[T]):
473 """Validate a metadata field.
474
475 All _process_*() methods correspond to a core metadata field. The method is
476 called with the field's raw value. If the raw value is valid it is returned
477 in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
478 If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
479 as appropriate).
480 """
481
482 name: str
483 raw_name: str
484 added: _MetadataVersion
485
486 def __init__(
487 self,
488 *,
489 added: _MetadataVersion = "1.0",
490 ) -> None:
491 self.added = added
492
493 def __set_name__(self, _owner: Metadata, name: str) -> None:
494 self.name = name
495 self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
496
497 def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T:
498 # With Python 3.8, the caching can be replaced with functools.cached_property().
499 # No need to check the cache as attribute lookup will resolve into the
500 # instance's __dict__ before __get__ is called.
501 cache = instance.__dict__
502 value = instance._raw.get(self.name)
503
504 # To make the _process_* methods easier, we'll check if the value is None
505 # and if this field is NOT a required attribute, and if both of those
506 # things are true, we'll skip the the converter. This will mean that the
507 # converters never have to deal with the None union.
508 if self.name in _REQUIRED_ATTRS or value is not None:
509 try:
510 converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
511 except AttributeError:
512 pass
513 else:
514 value = converter(value)
515
516 cache[self.name] = value
517 try:
518 del instance._raw[self.name] # type: ignore[misc]
519 except KeyError:
520 pass
521
522 return cast(T, value)
523
524 def _invalid_metadata(
525 self, msg: str, cause: Exception | None = None
526 ) -> InvalidMetadata:
527 exc = InvalidMetadata(
528 self.raw_name, msg.format_map({"field": repr(self.raw_name)})
529 )
530 exc.__cause__ = cause
531 return exc
532
533 def _process_metadata_version(self, value: str) -> _MetadataVersion:
534 # Implicitly makes Metadata-Version required.
535 if value not in _VALID_METADATA_VERSIONS:
536 raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
537 return cast(_MetadataVersion, value)
538
539 def _process_name(self, value: str) -> str:
540 if not value:
541 raise self._invalid_metadata("{field} is a required field")
542 # Validate the name as a side-effect.
543 try:
544 utils.canonicalize_name(value, validate=True)
545 except utils.InvalidName as exc:
546 raise self._invalid_metadata(
547 f"{value!r} is invalid for {{field}}", cause=exc
548 ) from exc
549 else:
550 return value
551
552 def _process_version(self, value: str) -> version_module.Version:
553 if not value:
554 raise self._invalid_metadata("{field} is a required field")
555 try:
556 return version_module.parse(value)
557 except version_module.InvalidVersion as exc:
558 raise self._invalid_metadata(
559 f"{value!r} is invalid for {{field}}", cause=exc
560 ) from exc
561
562 def _process_summary(self, value: str) -> str:
563 """Check the field contains no newlines."""
564 if "\n" in value:
565 raise self._invalid_metadata("{field} must be a single line")
566 return value
567
568 def _process_description_content_type(self, value: str) -> str:
569 content_types = {"text/plain", "text/x-rst", "text/markdown"}
570 message = email.message.EmailMessage()
571 message["content-type"] = value
572
573 content_type, parameters = (
574 # Defaults to `text/plain` if parsing failed.
575 message.get_content_type().lower(),
576 message["content-type"].params,
577 )
578 # Check if content-type is valid or defaulted to `text/plain` and thus was
579 # not parseable.
580 if content_type not in content_types or content_type not in value.lower():
581 raise self._invalid_metadata(
582 f"{{field}} must be one of {list(content_types)}, not {value!r}"
583 )
584
585 charset = parameters.get("charset", "UTF-8")
586 if charset != "UTF-8":
587 raise self._invalid_metadata(
588 f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
589 )
590
591 markdown_variants = {"GFM", "CommonMark"}
592 variant = parameters.get("variant", "GFM") # Use an acceptable default.
593 if content_type == "text/markdown" and variant not in markdown_variants:
594 raise self._invalid_metadata(
595 f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
596 f"not {variant!r}",
597 )
598 return value
599
600 def _process_dynamic(self, value: list[str]) -> list[str]:
601 for dynamic_field in map(str.lower, value):
602 if dynamic_field in {"name", "version", "metadata-version"}:
603 raise self._invalid_metadata(
604 f"{dynamic_field!r} is not allowed as a dynamic field"
605 )
606 elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
607 raise self._invalid_metadata(
608 f"{dynamic_field!r} is not a valid dynamic field"
609 )
610 return list(map(str.lower, value))
611
612 def _process_provides_extra(
613 self,
614 value: list[str],
615 ) -> list[utils.NormalizedName]:
616 normalized_names = []
617 try:
618 for name in value:
619 normalized_names.append(utils.canonicalize_name(name, validate=True))
620 except utils.InvalidName as exc:
621 raise self._invalid_metadata(
622 f"{name!r} is invalid for {{field}}", cause=exc
623 ) from exc
624 else:
625 return normalized_names
626
627 def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
628 try:
629 return specifiers.SpecifierSet(value)
630 except specifiers.InvalidSpecifier as exc:
631 raise self._invalid_metadata(
632 f"{value!r} is invalid for {{field}}", cause=exc
633 ) from exc
634
635 def _process_requires_dist(
636 self,
637 value: list[str],
638 ) -> list[requirements.Requirement]:
639 reqs = []
640 try:
641 for req in value:
642 reqs.append(requirements.Requirement(req))
643 except requirements.InvalidRequirement as exc:
644 raise self._invalid_metadata(
645 f"{req!r} is invalid for {{field}}", cause=exc
646 ) from exc
647 else:
648 return reqs
649
650 def _process_license_expression(
651 self, value: str
652 ) -> NormalizedLicenseExpression | None:
653 try:
654 return licenses.canonicalize_license_expression(value)
655 except ValueError as exc:
656 raise self._invalid_metadata(
657 f"{value!r} is invalid for {{field}}", cause=exc
658 ) from exc
659
660 def _process_license_files(self, value: list[str]) -> list[str]:
661 paths = []
662 for path in value:
663 if ".." in path:
664 raise self._invalid_metadata(
665 f"{path!r} is invalid for {{field}}, "
666 "parent directory indicators are not allowed"
667 )
668 if "*" in path:
669 raise self._invalid_metadata(
670 f"{path!r} is invalid for {{field}}, paths must be resolved"
671 )
672 if (
673 pathlib.PurePosixPath(path).is_absolute()
674 or pathlib.PureWindowsPath(path).is_absolute()
675 ):
676 raise self._invalid_metadata(
677 f"{path!r} is invalid for {{field}}, paths must be relative"
678 )
679 if pathlib.PureWindowsPath(path).as_posix() != path:
680 raise self._invalid_metadata(
681 f"{path!r} is invalid for {{field}}, "
682 "paths must use '/' delimiter"
683 )
684 paths.append(path)
685 return paths
686
687
688 class Metadata:
689 """Representation of distribution metadata.
690
691 Compared to :class:`RawMetadata`, this class provides objects representing
692 metadata fields instead of only using built-in types. Any invalid metadata
693 will cause :exc:`InvalidMetadata` to be raised (with a
694 :py:attr:`~BaseException.__cause__` attribute as appropriate).
695 """
696
697 _raw: RawMetadata
698
699 @classmethod
700 def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata:
701 """Create an instance from :class:`RawMetadata`.
702
703 If *validate* is true, all metadata will be validated. All exceptions
704 related to validation will be gathered and raised as an :class:`ExceptionGroup`.
705 """
706 ins = cls()
707 ins._raw = data.copy() # Mutations occur due to caching enriched values.
708
709 if validate:
710 exceptions: list[Exception] = []
711 try:
712 metadata_version = ins.metadata_version
713 metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
714 except InvalidMetadata as metadata_version_exc:
715 exceptions.append(metadata_version_exc)
716 metadata_version = None
717
718 # Make sure to check for the fields that are present, the required
719 # fields (so their absence can be reported).
720 fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
721 # Remove fields that have already been checked.
722 fields_to_check -= {"metadata_version"}
723
724 for key in fields_to_check:
725 try:
726 if metadata_version:
727 # Can't use getattr() as that triggers descriptor protocol which
728 # will fail due to no value for the instance argument.
729 try:
730 field_metadata_version = cls.__dict__[key].added
731 except KeyError:
732 exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
733 exceptions.append(exc)
734 continue
735 field_age = _VALID_METADATA_VERSIONS.index(
736 field_metadata_version
737 )
738 if field_age > metadata_age:
739 field = _RAW_TO_EMAIL_MAPPING[key]
740 exc = InvalidMetadata(
741 field,
742 f"{field} introduced in metadata version "
743 f"{field_metadata_version}, not {metadata_version}",
744 )
745 exceptions.append(exc)
746 continue
747 getattr(ins, key)
748 except InvalidMetadata as exc:
749 exceptions.append(exc)
750
751 if exceptions:
752 raise ExceptionGroup("invalid metadata", exceptions)
753
754 return ins
755
756 @classmethod
757 def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
758 """Parse metadata from email headers.
759
760 If *validate* is true, the metadata will be validated. All exceptions
761 related to validation will be gathered and raised as an :class:`ExceptionGroup`.
762 """
763 raw, unparsed = parse_email(data)
764
765 if validate:
766 exceptions: list[Exception] = []
767 for unparsed_key in unparsed:
768 if unparsed_key in _EMAIL_TO_RAW_MAPPING:
769 message = f"{unparsed_key!r} has invalid data"
770 else:
771 message = f"unrecognized field: {unparsed_key!r}"
772 exceptions.append(InvalidMetadata(unparsed_key, message))
773
774 if exceptions:
775 raise ExceptionGroup("unparsed", exceptions)
776
777 try:
778 return cls.from_raw(raw, validate=validate)
779 except ExceptionGroup as exc_group:
780 raise ExceptionGroup(
781 "invalid or unparsed metadata", exc_group.exceptions
782 ) from None
783
784 metadata_version: _Validator[_MetadataVersion] = _Validator()
785 """:external:ref:`core-metadata-metadata-version`
786 (required; validated to be a valid metadata version)"""
787 # `name` is not normalized/typed to NormalizedName so as to provide access to
788 # the original/raw name.
789 name: _Validator[str] = _Validator()
790 """:external:ref:`core-metadata-name`
791 (required; validated using :func:`~packaging.utils.canonicalize_name` and its
792 *validate* parameter)"""
793 version: _Validator[version_module.Version] = _Validator()
794 """:external:ref:`core-metadata-version` (required)"""
795 dynamic: _Validator[list[str] | None] = _Validator(
796 added="2.2",
797 )
798 """:external:ref:`core-metadata-dynamic`
799 (validated against core metadata field names and lowercased)"""
800 platforms: _Validator[list[str] | None] = _Validator()
801 """:external:ref:`core-metadata-platform`"""
802 supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1")
803 """:external:ref:`core-metadata-supported-platform`"""
804 summary: _Validator[str | None] = _Validator()
805 """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
806 description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body
807 """:external:ref:`core-metadata-description`"""
808 description_content_type: _Validator[str | None] = _Validator(added="2.1")
809 """:external:ref:`core-metadata-description-content-type` (validated)"""
810 keywords: _Validator[list[str] | None] = _Validator()
811 """:external:ref:`core-metadata-keywords`"""
812 home_page: _Validator[str | None] = _Validator()
813 """:external:ref:`core-metadata-home-page`"""
814 download_url: _Validator[str | None] = _Validator(added="1.1")
815 """:external:ref:`core-metadata-download-url`"""
816 author: _Validator[str | None] = _Validator()
817 """:external:ref:`core-metadata-author`"""
818 author_email: _Validator[str | None] = _Validator()
819 """:external:ref:`core-metadata-author-email`"""
820 maintainer: _Validator[str | None] = _Validator(added="1.2")
821 """:external:ref:`core-metadata-maintainer`"""
822 maintainer_email: _Validator[str | None] = _Validator(added="1.2")
823 """:external:ref:`core-metadata-maintainer-email`"""
824 license: _Validator[str | None] = _Validator()
825 """:external:ref:`core-metadata-license`"""
826 license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator(
827 added="2.4"
828 )
829 """:external:ref:`core-metadata-license-expression`"""
830 license_files: _Validator[list[str] | None] = _Validator(added="2.4")
831 """:external:ref:`core-metadata-license-file`"""
832 classifiers: _Validator[list[str] | None] = _Validator(added="1.1")
833 """:external:ref:`core-metadata-classifier`"""
834 requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator(
835 added="1.2"
836 )
837 """:external:ref:`core-metadata-requires-dist`"""
838 requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator(
839 added="1.2"
840 )
841 """:external:ref:`core-metadata-requires-python`"""
842 # Because `Requires-External` allows for non-PEP 440 version specifiers, we
843 # don't do any processing on the values.
844 requires_external: _Validator[list[str] | None] = _Validator(added="1.2")
845 """:external:ref:`core-metadata-requires-external`"""
846 project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2")
847 """:external:ref:`core-metadata-project-url`"""
848 # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
849 # regardless of metadata version.
850 provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator(
851 added="2.1",
852 )
853 """:external:ref:`core-metadata-provides-extra`"""
854 provides_dist: _Validator[list[str] | None] = _Validator(added="1.2")
855 """:external:ref:`core-metadata-provides-dist`"""
856 obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2")
857 """:external:ref:`core-metadata-obsoletes-dist`"""
858 requires: _Validator[list[str] | None] = _Validator(added="1.1")
859 """``Requires`` (deprecated)"""
860 provides: _Validator[list[str] | None] = _Validator(added="1.1")
861 """``Provides`` (deprecated)"""
862 obsoletes: _Validator[list[str] | None] = _Validator(added="1.1")
863 """``Obsoletes`` (deprecated)"""