Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/packaging/metadata.py @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 from __future__ import annotations | |
2 | |
3 import email.feedparser | |
4 import email.header | |
5 import email.message | |
6 import email.parser | |
7 import email.policy | |
8 import pathlib | |
9 import sys | |
10 import typing | |
11 from typing import ( | |
12 Any, | |
13 Callable, | |
14 Generic, | |
15 Literal, | |
16 TypedDict, | |
17 cast, | |
18 ) | |
19 | |
20 from . import licenses, requirements, specifiers, utils | |
21 from . import version as version_module | |
22 from .licenses import NormalizedLicenseExpression | |
23 | |
24 T = typing.TypeVar("T") | |
25 | |
26 | |
27 if sys.version_info >= (3, 11): # pragma: no cover | |
28 ExceptionGroup = ExceptionGroup | |
29 else: # pragma: no cover | |
30 | |
31 class ExceptionGroup(Exception): | |
32 """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. | |
33 | |
34 If :external:exc:`ExceptionGroup` is already defined by Python itself, | |
35 that version is used instead. | |
36 """ | |
37 | |
38 message: str | |
39 exceptions: list[Exception] | |
40 | |
41 def __init__(self, message: str, exceptions: list[Exception]) -> None: | |
42 self.message = message | |
43 self.exceptions = exceptions | |
44 | |
45 def __repr__(self) -> str: | |
46 return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" | |
47 | |
48 | |
49 class InvalidMetadata(ValueError): | |
50 """A metadata field contains invalid data.""" | |
51 | |
52 field: str | |
53 """The name of the field that contains invalid data.""" | |
54 | |
55 def __init__(self, field: str, message: str) -> None: | |
56 self.field = field | |
57 super().__init__(message) | |
58 | |
59 | |
60 # The RawMetadata class attempts to make as few assumptions about the underlying | |
61 # serialization formats as possible. The idea is that as long as a serialization | |
62 # formats offer some very basic primitives in *some* way then we can support | |
63 # serializing to and from that format. | |
64 class RawMetadata(TypedDict, total=False): | |
65 """A dictionary of raw core metadata. | |
66 | |
67 Each field in core metadata maps to a key of this dictionary (when data is | |
68 provided). The key is lower-case and underscores are used instead of dashes | |
69 compared to the equivalent core metadata field. Any core metadata field that | |
70 can be specified multiple times or can hold multiple values in a single | |
71 field have a key with a plural name. See :class:`Metadata` whose attributes | |
72 match the keys of this dictionary. | |
73 | |
74 Core metadata fields that can be specified multiple times are stored as a | |
75 list or dict depending on which is appropriate for the field. Any fields | |
76 which hold multiple values in a single field are stored as a list. | |
77 | |
78 """ | |
79 | |
80 # Metadata 1.0 - PEP 241 | |
81 metadata_version: str | |
82 name: str | |
83 version: str | |
84 platforms: list[str] | |
85 summary: str | |
86 description: str | |
87 keywords: list[str] | |
88 home_page: str | |
89 author: str | |
90 author_email: str | |
91 license: str | |
92 | |
93 # Metadata 1.1 - PEP 314 | |
94 supported_platforms: list[str] | |
95 download_url: str | |
96 classifiers: list[str] | |
97 requires: list[str] | |
98 provides: list[str] | |
99 obsoletes: list[str] | |
100 | |
101 # Metadata 1.2 - PEP 345 | |
102 maintainer: str | |
103 maintainer_email: str | |
104 requires_dist: list[str] | |
105 provides_dist: list[str] | |
106 obsoletes_dist: list[str] | |
107 requires_python: str | |
108 requires_external: list[str] | |
109 project_urls: dict[str, str] | |
110 | |
111 # Metadata 2.0 | |
112 # PEP 426 attempted to completely revamp the metadata format | |
113 # but got stuck without ever being able to build consensus on | |
114 # it and ultimately ended up withdrawn. | |
115 # | |
116 # However, a number of tools had started emitting METADATA with | |
117 # `2.0` Metadata-Version, so for historical reasons, this version | |
118 # was skipped. | |
119 | |
120 # Metadata 2.1 - PEP 566 | |
121 description_content_type: str | |
122 provides_extra: list[str] | |
123 | |
124 # Metadata 2.2 - PEP 643 | |
125 dynamic: list[str] | |
126 | |
127 # Metadata 2.3 - PEP 685 | |
128 # No new fields were added in PEP 685, just some edge case were | |
129 # tightened up to provide better interoptability. | |
130 | |
131 # Metadata 2.4 - PEP 639 | |
132 license_expression: str | |
133 license_files: list[str] | |
134 | |
135 | |
136 _STRING_FIELDS = { | |
137 "author", | |
138 "author_email", | |
139 "description", | |
140 "description_content_type", | |
141 "download_url", | |
142 "home_page", | |
143 "license", | |
144 "license_expression", | |
145 "maintainer", | |
146 "maintainer_email", | |
147 "metadata_version", | |
148 "name", | |
149 "requires_python", | |
150 "summary", | |
151 "version", | |
152 } | |
153 | |
154 _LIST_FIELDS = { | |
155 "classifiers", | |
156 "dynamic", | |
157 "license_files", | |
158 "obsoletes", | |
159 "obsoletes_dist", | |
160 "platforms", | |
161 "provides", | |
162 "provides_dist", | |
163 "provides_extra", | |
164 "requires", | |
165 "requires_dist", | |
166 "requires_external", | |
167 "supported_platforms", | |
168 } | |
169 | |
170 _DICT_FIELDS = { | |
171 "project_urls", | |
172 } | |
173 | |
174 | |
175 def _parse_keywords(data: str) -> list[str]: | |
176 """Split a string of comma-separated keywords into a list of keywords.""" | |
177 return [k.strip() for k in data.split(",")] | |
178 | |
179 | |
180 def _parse_project_urls(data: list[str]) -> dict[str, str]: | |
181 """Parse a list of label/URL string pairings separated by a comma.""" | |
182 urls = {} | |
183 for pair in data: | |
184 # Our logic is slightly tricky here as we want to try and do | |
185 # *something* reasonable with malformed data. | |
186 # | |
187 # The main thing that we have to worry about, is data that does | |
188 # not have a ',' at all to split the label from the Value. There | |
189 # isn't a singular right answer here, and we will fail validation | |
190 # later on (if the caller is validating) so it doesn't *really* | |
191 # matter, but since the missing value has to be an empty str | |
192 # and our return value is dict[str, str], if we let the key | |
193 # be the missing value, then they'd have multiple '' values that | |
194 # overwrite each other in a accumulating dict. | |
195 # | |
196 # The other potentional issue is that it's possible to have the | |
197 # same label multiple times in the metadata, with no solid "right" | |
198 # answer with what to do in that case. As such, we'll do the only | |
199 # thing we can, which is treat the field as unparseable and add it | |
200 # to our list of unparsed fields. | |
201 parts = [p.strip() for p in pair.split(",", 1)] | |
202 parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items | |
203 | |
204 # TODO: The spec doesn't say anything about if the keys should be | |
205 # considered case sensitive or not... logically they should | |
206 # be case-preserving and case-insensitive, but doing that | |
207 # would open up more cases where we might have duplicate | |
208 # entries. | |
209 label, url = parts | |
210 if label in urls: | |
211 # The label already exists in our set of urls, so this field | |
212 # is unparseable, and we can just add the whole thing to our | |
213 # unparseable data and stop processing it. | |
214 raise KeyError("duplicate labels in project urls") | |
215 urls[label] = url | |
216 | |
217 return urls | |
218 | |
219 | |
220 def _get_payload(msg: email.message.Message, source: bytes | str) -> str: | |
221 """Get the body of the message.""" | |
222 # If our source is a str, then our caller has managed encodings for us, | |
223 # and we don't need to deal with it. | |
224 if isinstance(source, str): | |
225 payload = msg.get_payload() | |
226 assert isinstance(payload, str) | |
227 return payload | |
228 # If our source is a bytes, then we're managing the encoding and we need | |
229 # to deal with it. | |
230 else: | |
231 bpayload = msg.get_payload(decode=True) | |
232 assert isinstance(bpayload, bytes) | |
233 try: | |
234 return bpayload.decode("utf8", "strict") | |
235 except UnicodeDecodeError as exc: | |
236 raise ValueError("payload in an invalid encoding") from exc | |
237 | |
238 | |
239 # The various parse_FORMAT functions here are intended to be as lenient as | |
240 # possible in their parsing, while still returning a correctly typed | |
241 # RawMetadata. | |
242 # | |
243 # To aid in this, we also generally want to do as little touching of the | |
244 # data as possible, except where there are possibly some historic holdovers | |
245 # that make valid data awkward to work with. | |
246 # | |
247 # While this is a lower level, intermediate format than our ``Metadata`` | |
248 # class, some light touch ups can make a massive difference in usability. | |
249 | |
250 # Map METADATA fields to RawMetadata. | |
251 _EMAIL_TO_RAW_MAPPING = { | |
252 "author": "author", | |
253 "author-email": "author_email", | |
254 "classifier": "classifiers", | |
255 "description": "description", | |
256 "description-content-type": "description_content_type", | |
257 "download-url": "download_url", | |
258 "dynamic": "dynamic", | |
259 "home-page": "home_page", | |
260 "keywords": "keywords", | |
261 "license": "license", | |
262 "license-expression": "license_expression", | |
263 "license-file": "license_files", | |
264 "maintainer": "maintainer", | |
265 "maintainer-email": "maintainer_email", | |
266 "metadata-version": "metadata_version", | |
267 "name": "name", | |
268 "obsoletes": "obsoletes", | |
269 "obsoletes-dist": "obsoletes_dist", | |
270 "platform": "platforms", | |
271 "project-url": "project_urls", | |
272 "provides": "provides", | |
273 "provides-dist": "provides_dist", | |
274 "provides-extra": "provides_extra", | |
275 "requires": "requires", | |
276 "requires-dist": "requires_dist", | |
277 "requires-external": "requires_external", | |
278 "requires-python": "requires_python", | |
279 "summary": "summary", | |
280 "supported-platform": "supported_platforms", | |
281 "version": "version", | |
282 } | |
283 _RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} | |
284 | |
285 | |
286 def parse_email(data: bytes | str) -> tuple[RawMetadata, dict[str, list[str]]]: | |
287 """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). | |
288 | |
289 This function returns a two-item tuple of dicts. The first dict is of | |
290 recognized fields from the core metadata specification. Fields that can be | |
291 parsed and translated into Python's built-in types are converted | |
292 appropriately. All other fields are left as-is. Fields that are allowed to | |
293 appear multiple times are stored as lists. | |
294 | |
295 The second dict contains all other fields from the metadata. This includes | |
296 any unrecognized fields. It also includes any fields which are expected to | |
297 be parsed into a built-in type but were not formatted appropriately. Finally, | |
298 any fields that are expected to appear only once but are repeated are | |
299 included in this dict. | |
300 | |
301 """ | |
302 raw: dict[str, str | list[str] | dict[str, str]] = {} | |
303 unparsed: dict[str, list[str]] = {} | |
304 | |
305 if isinstance(data, str): | |
306 parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) | |
307 else: | |
308 parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) | |
309 | |
310 # We have to wrap parsed.keys() in a set, because in the case of multiple | |
311 # values for a key (a list), the key will appear multiple times in the | |
312 # list of keys, but we're avoiding that by using get_all(). | |
313 for name in frozenset(parsed.keys()): | |
314 # Header names in RFC are case insensitive, so we'll normalize to all | |
315 # lower case to make comparisons easier. | |
316 name = name.lower() | |
317 | |
318 # We use get_all() here, even for fields that aren't multiple use, | |
319 # because otherwise someone could have e.g. two Name fields, and we | |
320 # would just silently ignore it rather than doing something about it. | |
321 headers = parsed.get_all(name) or [] | |
322 | |
323 # The way the email module works when parsing bytes is that it | |
324 # unconditionally decodes the bytes as ascii using the surrogateescape | |
325 # handler. When you pull that data back out (such as with get_all() ), | |
326 # it looks to see if the str has any surrogate escapes, and if it does | |
327 # it wraps it in a Header object instead of returning the string. | |
328 # | |
329 # As such, we'll look for those Header objects, and fix up the encoding. | |
330 value = [] | |
331 # Flag if we have run into any issues processing the headers, thus | |
332 # signalling that the data belongs in 'unparsed'. | |
333 valid_encoding = True | |
334 for h in headers: | |
335 # It's unclear if this can return more types than just a Header or | |
336 # a str, so we'll just assert here to make sure. | |
337 assert isinstance(h, (email.header.Header, str)) | |
338 | |
339 # If it's a header object, we need to do our little dance to get | |
340 # the real data out of it. In cases where there is invalid data | |
341 # we're going to end up with mojibake, but there's no obvious, good | |
342 # way around that without reimplementing parts of the Header object | |
343 # ourselves. | |
344 # | |
345 # That should be fine since, if mojibacked happens, this key is | |
346 # going into the unparsed dict anyways. | |
347 if isinstance(h, email.header.Header): | |
348 # The Header object stores it's data as chunks, and each chunk | |
349 # can be independently encoded, so we'll need to check each | |
350 # of them. | |
351 chunks: list[tuple[bytes, str | None]] = [] | |
352 for bin, encoding in email.header.decode_header(h): | |
353 try: | |
354 bin.decode("utf8", "strict") | |
355 except UnicodeDecodeError: | |
356 # Enable mojibake. | |
357 encoding = "latin1" | |
358 valid_encoding = False | |
359 else: | |
360 encoding = "utf8" | |
361 chunks.append((bin, encoding)) | |
362 | |
363 # Turn our chunks back into a Header object, then let that | |
364 # Header object do the right thing to turn them into a | |
365 # string for us. | |
366 value.append(str(email.header.make_header(chunks))) | |
367 # This is already a string, so just add it. | |
368 else: | |
369 value.append(h) | |
370 | |
371 # We've processed all of our values to get them into a list of str, | |
372 # but we may have mojibake data, in which case this is an unparsed | |
373 # field. | |
374 if not valid_encoding: | |
375 unparsed[name] = value | |
376 continue | |
377 | |
378 raw_name = _EMAIL_TO_RAW_MAPPING.get(name) | |
379 if raw_name is None: | |
380 # This is a bit of a weird situation, we've encountered a key that | |
381 # we don't know what it means, so we don't know whether it's meant | |
382 # to be a list or not. | |
383 # | |
384 # Since we can't really tell one way or another, we'll just leave it | |
385 # as a list, even though it may be a single item list, because that's | |
386 # what makes the most sense for email headers. | |
387 unparsed[name] = value | |
388 continue | |
389 | |
390 # If this is one of our string fields, then we'll check to see if our | |
391 # value is a list of a single item. If it is then we'll assume that | |
392 # it was emitted as a single string, and unwrap the str from inside | |
393 # the list. | |
394 # | |
395 # If it's any other kind of data, then we haven't the faintest clue | |
396 # what we should parse it as, and we have to just add it to our list | |
397 # of unparsed stuff. | |
398 if raw_name in _STRING_FIELDS and len(value) == 1: | |
399 raw[raw_name] = value[0] | |
400 # If this is one of our list of string fields, then we can just assign | |
401 # the value, since email *only* has strings, and our get_all() call | |
402 # above ensures that this is a list. | |
403 elif raw_name in _LIST_FIELDS: | |
404 raw[raw_name] = value | |
405 # Special Case: Keywords | |
406 # The keywords field is implemented in the metadata spec as a str, | |
407 # but it conceptually is a list of strings, and is serialized using | |
408 # ", ".join(keywords), so we'll do some light data massaging to turn | |
409 # this into what it logically is. | |
410 elif raw_name == "keywords" and len(value) == 1: | |
411 raw[raw_name] = _parse_keywords(value[0]) | |
412 # Special Case: Project-URL | |
413 # The project urls is implemented in the metadata spec as a list of | |
414 # specially-formatted strings that represent a key and a value, which | |
415 # is fundamentally a mapping, however the email format doesn't support | |
416 # mappings in a sane way, so it was crammed into a list of strings | |
417 # instead. | |
418 # | |
419 # We will do a little light data massaging to turn this into a map as | |
420 # it logically should be. | |
421 elif raw_name == "project_urls": | |
422 try: | |
423 raw[raw_name] = _parse_project_urls(value) | |
424 except KeyError: | |
425 unparsed[name] = value | |
426 # Nothing that we've done has managed to parse this, so it'll just | |
427 # throw it in our unparseable data and move on. | |
428 else: | |
429 unparsed[name] = value | |
430 | |
431 # We need to support getting the Description from the message payload in | |
432 # addition to getting it from the the headers. This does mean, though, there | |
433 # is the possibility of it being set both ways, in which case we put both | |
434 # in 'unparsed' since we don't know which is right. | |
435 try: | |
436 payload = _get_payload(parsed, data) | |
437 except ValueError: | |
438 unparsed.setdefault("description", []).append( | |
439 parsed.get_payload(decode=isinstance(data, bytes)) # type: ignore[call-overload] | |
440 ) | |
441 else: | |
442 if payload: | |
443 # Check to see if we've already got a description, if so then both | |
444 # it, and this body move to unparseable. | |
445 if "description" in raw: | |
446 description_header = cast(str, raw.pop("description")) | |
447 unparsed.setdefault("description", []).extend( | |
448 [description_header, payload] | |
449 ) | |
450 elif "description" in unparsed: | |
451 unparsed["description"].append(payload) | |
452 else: | |
453 raw["description"] = payload | |
454 | |
455 # We need to cast our `raw` to a metadata, because a TypedDict only support | |
456 # literal key names, but we're computing our key names on purpose, but the | |
457 # way this function is implemented, our `TypedDict` can only have valid key | |
458 # names. | |
459 return cast(RawMetadata, raw), unparsed | |
460 | |
461 | |
462 _NOT_FOUND = object() | |
463 | |
464 | |
465 # Keep the two values in sync. | |
466 _VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"] | |
467 _MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"] | |
468 | |
469 _REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) | |
470 | |
471 | |
472 class _Validator(Generic[T]): | |
473 """Validate a metadata field. | |
474 | |
475 All _process_*() methods correspond to a core metadata field. The method is | |
476 called with the field's raw value. If the raw value is valid it is returned | |
477 in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). | |
478 If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause | |
479 as appropriate). | |
480 """ | |
481 | |
482 name: str | |
483 raw_name: str | |
484 added: _MetadataVersion | |
485 | |
486 def __init__( | |
487 self, | |
488 *, | |
489 added: _MetadataVersion = "1.0", | |
490 ) -> None: | |
491 self.added = added | |
492 | |
493 def __set_name__(self, _owner: Metadata, name: str) -> None: | |
494 self.name = name | |
495 self.raw_name = _RAW_TO_EMAIL_MAPPING[name] | |
496 | |
497 def __get__(self, instance: Metadata, _owner: type[Metadata]) -> T: | |
498 # With Python 3.8, the caching can be replaced with functools.cached_property(). | |
499 # No need to check the cache as attribute lookup will resolve into the | |
500 # instance's __dict__ before __get__ is called. | |
501 cache = instance.__dict__ | |
502 value = instance._raw.get(self.name) | |
503 | |
504 # To make the _process_* methods easier, we'll check if the value is None | |
505 # and if this field is NOT a required attribute, and if both of those | |
506 # things are true, we'll skip the the converter. This will mean that the | |
507 # converters never have to deal with the None union. | |
508 if self.name in _REQUIRED_ATTRS or value is not None: | |
509 try: | |
510 converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") | |
511 except AttributeError: | |
512 pass | |
513 else: | |
514 value = converter(value) | |
515 | |
516 cache[self.name] = value | |
517 try: | |
518 del instance._raw[self.name] # type: ignore[misc] | |
519 except KeyError: | |
520 pass | |
521 | |
522 return cast(T, value) | |
523 | |
524 def _invalid_metadata( | |
525 self, msg: str, cause: Exception | None = None | |
526 ) -> InvalidMetadata: | |
527 exc = InvalidMetadata( | |
528 self.raw_name, msg.format_map({"field": repr(self.raw_name)}) | |
529 ) | |
530 exc.__cause__ = cause | |
531 return exc | |
532 | |
533 def _process_metadata_version(self, value: str) -> _MetadataVersion: | |
534 # Implicitly makes Metadata-Version required. | |
535 if value not in _VALID_METADATA_VERSIONS: | |
536 raise self._invalid_metadata(f"{value!r} is not a valid metadata version") | |
537 return cast(_MetadataVersion, value) | |
538 | |
539 def _process_name(self, value: str) -> str: | |
540 if not value: | |
541 raise self._invalid_metadata("{field} is a required field") | |
542 # Validate the name as a side-effect. | |
543 try: | |
544 utils.canonicalize_name(value, validate=True) | |
545 except utils.InvalidName as exc: | |
546 raise self._invalid_metadata( | |
547 f"{value!r} is invalid for {{field}}", cause=exc | |
548 ) from exc | |
549 else: | |
550 return value | |
551 | |
552 def _process_version(self, value: str) -> version_module.Version: | |
553 if not value: | |
554 raise self._invalid_metadata("{field} is a required field") | |
555 try: | |
556 return version_module.parse(value) | |
557 except version_module.InvalidVersion as exc: | |
558 raise self._invalid_metadata( | |
559 f"{value!r} is invalid for {{field}}", cause=exc | |
560 ) from exc | |
561 | |
562 def _process_summary(self, value: str) -> str: | |
563 """Check the field contains no newlines.""" | |
564 if "\n" in value: | |
565 raise self._invalid_metadata("{field} must be a single line") | |
566 return value | |
567 | |
568 def _process_description_content_type(self, value: str) -> str: | |
569 content_types = {"text/plain", "text/x-rst", "text/markdown"} | |
570 message = email.message.EmailMessage() | |
571 message["content-type"] = value | |
572 | |
573 content_type, parameters = ( | |
574 # Defaults to `text/plain` if parsing failed. | |
575 message.get_content_type().lower(), | |
576 message["content-type"].params, | |
577 ) | |
578 # Check if content-type is valid or defaulted to `text/plain` and thus was | |
579 # not parseable. | |
580 if content_type not in content_types or content_type not in value.lower(): | |
581 raise self._invalid_metadata( | |
582 f"{{field}} must be one of {list(content_types)}, not {value!r}" | |
583 ) | |
584 | |
585 charset = parameters.get("charset", "UTF-8") | |
586 if charset != "UTF-8": | |
587 raise self._invalid_metadata( | |
588 f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" | |
589 ) | |
590 | |
591 markdown_variants = {"GFM", "CommonMark"} | |
592 variant = parameters.get("variant", "GFM") # Use an acceptable default. | |
593 if content_type == "text/markdown" and variant not in markdown_variants: | |
594 raise self._invalid_metadata( | |
595 f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " | |
596 f"not {variant!r}", | |
597 ) | |
598 return value | |
599 | |
600 def _process_dynamic(self, value: list[str]) -> list[str]: | |
601 for dynamic_field in map(str.lower, value): | |
602 if dynamic_field in {"name", "version", "metadata-version"}: | |
603 raise self._invalid_metadata( | |
604 f"{dynamic_field!r} is not allowed as a dynamic field" | |
605 ) | |
606 elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: | |
607 raise self._invalid_metadata( | |
608 f"{dynamic_field!r} is not a valid dynamic field" | |
609 ) | |
610 return list(map(str.lower, value)) | |
611 | |
612 def _process_provides_extra( | |
613 self, | |
614 value: list[str], | |
615 ) -> list[utils.NormalizedName]: | |
616 normalized_names = [] | |
617 try: | |
618 for name in value: | |
619 normalized_names.append(utils.canonicalize_name(name, validate=True)) | |
620 except utils.InvalidName as exc: | |
621 raise self._invalid_metadata( | |
622 f"{name!r} is invalid for {{field}}", cause=exc | |
623 ) from exc | |
624 else: | |
625 return normalized_names | |
626 | |
627 def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: | |
628 try: | |
629 return specifiers.SpecifierSet(value) | |
630 except specifiers.InvalidSpecifier as exc: | |
631 raise self._invalid_metadata( | |
632 f"{value!r} is invalid for {{field}}", cause=exc | |
633 ) from exc | |
634 | |
635 def _process_requires_dist( | |
636 self, | |
637 value: list[str], | |
638 ) -> list[requirements.Requirement]: | |
639 reqs = [] | |
640 try: | |
641 for req in value: | |
642 reqs.append(requirements.Requirement(req)) | |
643 except requirements.InvalidRequirement as exc: | |
644 raise self._invalid_metadata( | |
645 f"{req!r} is invalid for {{field}}", cause=exc | |
646 ) from exc | |
647 else: | |
648 return reqs | |
649 | |
650 def _process_license_expression( | |
651 self, value: str | |
652 ) -> NormalizedLicenseExpression | None: | |
653 try: | |
654 return licenses.canonicalize_license_expression(value) | |
655 except ValueError as exc: | |
656 raise self._invalid_metadata( | |
657 f"{value!r} is invalid for {{field}}", cause=exc | |
658 ) from exc | |
659 | |
660 def _process_license_files(self, value: list[str]) -> list[str]: | |
661 paths = [] | |
662 for path in value: | |
663 if ".." in path: | |
664 raise self._invalid_metadata( | |
665 f"{path!r} is invalid for {{field}}, " | |
666 "parent directory indicators are not allowed" | |
667 ) | |
668 if "*" in path: | |
669 raise self._invalid_metadata( | |
670 f"{path!r} is invalid for {{field}}, paths must be resolved" | |
671 ) | |
672 if ( | |
673 pathlib.PurePosixPath(path).is_absolute() | |
674 or pathlib.PureWindowsPath(path).is_absolute() | |
675 ): | |
676 raise self._invalid_metadata( | |
677 f"{path!r} is invalid for {{field}}, paths must be relative" | |
678 ) | |
679 if pathlib.PureWindowsPath(path).as_posix() != path: | |
680 raise self._invalid_metadata( | |
681 f"{path!r} is invalid for {{field}}, " | |
682 "paths must use '/' delimiter" | |
683 ) | |
684 paths.append(path) | |
685 return paths | |
686 | |
687 | |
688 class Metadata: | |
689 """Representation of distribution metadata. | |
690 | |
691 Compared to :class:`RawMetadata`, this class provides objects representing | |
692 metadata fields instead of only using built-in types. Any invalid metadata | |
693 will cause :exc:`InvalidMetadata` to be raised (with a | |
694 :py:attr:`~BaseException.__cause__` attribute as appropriate). | |
695 """ | |
696 | |
697 _raw: RawMetadata | |
698 | |
699 @classmethod | |
700 def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> Metadata: | |
701 """Create an instance from :class:`RawMetadata`. | |
702 | |
703 If *validate* is true, all metadata will be validated. All exceptions | |
704 related to validation will be gathered and raised as an :class:`ExceptionGroup`. | |
705 """ | |
706 ins = cls() | |
707 ins._raw = data.copy() # Mutations occur due to caching enriched values. | |
708 | |
709 if validate: | |
710 exceptions: list[Exception] = [] | |
711 try: | |
712 metadata_version = ins.metadata_version | |
713 metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) | |
714 except InvalidMetadata as metadata_version_exc: | |
715 exceptions.append(metadata_version_exc) | |
716 metadata_version = None | |
717 | |
718 # Make sure to check for the fields that are present, the required | |
719 # fields (so their absence can be reported). | |
720 fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS | |
721 # Remove fields that have already been checked. | |
722 fields_to_check -= {"metadata_version"} | |
723 | |
724 for key in fields_to_check: | |
725 try: | |
726 if metadata_version: | |
727 # Can't use getattr() as that triggers descriptor protocol which | |
728 # will fail due to no value for the instance argument. | |
729 try: | |
730 field_metadata_version = cls.__dict__[key].added | |
731 except KeyError: | |
732 exc = InvalidMetadata(key, f"unrecognized field: {key!r}") | |
733 exceptions.append(exc) | |
734 continue | |
735 field_age = _VALID_METADATA_VERSIONS.index( | |
736 field_metadata_version | |
737 ) | |
738 if field_age > metadata_age: | |
739 field = _RAW_TO_EMAIL_MAPPING[key] | |
740 exc = InvalidMetadata( | |
741 field, | |
742 f"{field} introduced in metadata version " | |
743 f"{field_metadata_version}, not {metadata_version}", | |
744 ) | |
745 exceptions.append(exc) | |
746 continue | |
747 getattr(ins, key) | |
748 except InvalidMetadata as exc: | |
749 exceptions.append(exc) | |
750 | |
751 if exceptions: | |
752 raise ExceptionGroup("invalid metadata", exceptions) | |
753 | |
754 return ins | |
755 | |
756 @classmethod | |
757 def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata: | |
758 """Parse metadata from email headers. | |
759 | |
760 If *validate* is true, the metadata will be validated. All exceptions | |
761 related to validation will be gathered and raised as an :class:`ExceptionGroup`. | |
762 """ | |
763 raw, unparsed = parse_email(data) | |
764 | |
765 if validate: | |
766 exceptions: list[Exception] = [] | |
767 for unparsed_key in unparsed: | |
768 if unparsed_key in _EMAIL_TO_RAW_MAPPING: | |
769 message = f"{unparsed_key!r} has invalid data" | |
770 else: | |
771 message = f"unrecognized field: {unparsed_key!r}" | |
772 exceptions.append(InvalidMetadata(unparsed_key, message)) | |
773 | |
774 if exceptions: | |
775 raise ExceptionGroup("unparsed", exceptions) | |
776 | |
777 try: | |
778 return cls.from_raw(raw, validate=validate) | |
779 except ExceptionGroup as exc_group: | |
780 raise ExceptionGroup( | |
781 "invalid or unparsed metadata", exc_group.exceptions | |
782 ) from None | |
783 | |
784 metadata_version: _Validator[_MetadataVersion] = _Validator() | |
785 """:external:ref:`core-metadata-metadata-version` | |
786 (required; validated to be a valid metadata version)""" | |
787 # `name` is not normalized/typed to NormalizedName so as to provide access to | |
788 # the original/raw name. | |
789 name: _Validator[str] = _Validator() | |
790 """:external:ref:`core-metadata-name` | |
791 (required; validated using :func:`~packaging.utils.canonicalize_name` and its | |
792 *validate* parameter)""" | |
793 version: _Validator[version_module.Version] = _Validator() | |
794 """:external:ref:`core-metadata-version` (required)""" | |
795 dynamic: _Validator[list[str] | None] = _Validator( | |
796 added="2.2", | |
797 ) | |
798 """:external:ref:`core-metadata-dynamic` | |
799 (validated against core metadata field names and lowercased)""" | |
800 platforms: _Validator[list[str] | None] = _Validator() | |
801 """:external:ref:`core-metadata-platform`""" | |
802 supported_platforms: _Validator[list[str] | None] = _Validator(added="1.1") | |
803 """:external:ref:`core-metadata-supported-platform`""" | |
804 summary: _Validator[str | None] = _Validator() | |
805 """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" | |
806 description: _Validator[str | None] = _Validator() # TODO 2.1: can be in body | |
807 """:external:ref:`core-metadata-description`""" | |
808 description_content_type: _Validator[str | None] = _Validator(added="2.1") | |
809 """:external:ref:`core-metadata-description-content-type` (validated)""" | |
810 keywords: _Validator[list[str] | None] = _Validator() | |
811 """:external:ref:`core-metadata-keywords`""" | |
812 home_page: _Validator[str | None] = _Validator() | |
813 """:external:ref:`core-metadata-home-page`""" | |
814 download_url: _Validator[str | None] = _Validator(added="1.1") | |
815 """:external:ref:`core-metadata-download-url`""" | |
816 author: _Validator[str | None] = _Validator() | |
817 """:external:ref:`core-metadata-author`""" | |
818 author_email: _Validator[str | None] = _Validator() | |
819 """:external:ref:`core-metadata-author-email`""" | |
820 maintainer: _Validator[str | None] = _Validator(added="1.2") | |
821 """:external:ref:`core-metadata-maintainer`""" | |
822 maintainer_email: _Validator[str | None] = _Validator(added="1.2") | |
823 """:external:ref:`core-metadata-maintainer-email`""" | |
824 license: _Validator[str | None] = _Validator() | |
825 """:external:ref:`core-metadata-license`""" | |
826 license_expression: _Validator[NormalizedLicenseExpression | None] = _Validator( | |
827 added="2.4" | |
828 ) | |
829 """:external:ref:`core-metadata-license-expression`""" | |
830 license_files: _Validator[list[str] | None] = _Validator(added="2.4") | |
831 """:external:ref:`core-metadata-license-file`""" | |
832 classifiers: _Validator[list[str] | None] = _Validator(added="1.1") | |
833 """:external:ref:`core-metadata-classifier`""" | |
834 requires_dist: _Validator[list[requirements.Requirement] | None] = _Validator( | |
835 added="1.2" | |
836 ) | |
837 """:external:ref:`core-metadata-requires-dist`""" | |
838 requires_python: _Validator[specifiers.SpecifierSet | None] = _Validator( | |
839 added="1.2" | |
840 ) | |
841 """:external:ref:`core-metadata-requires-python`""" | |
842 # Because `Requires-External` allows for non-PEP 440 version specifiers, we | |
843 # don't do any processing on the values. | |
844 requires_external: _Validator[list[str] | None] = _Validator(added="1.2") | |
845 """:external:ref:`core-metadata-requires-external`""" | |
846 project_urls: _Validator[dict[str, str] | None] = _Validator(added="1.2") | |
847 """:external:ref:`core-metadata-project-url`""" | |
848 # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation | |
849 # regardless of metadata version. | |
850 provides_extra: _Validator[list[utils.NormalizedName] | None] = _Validator( | |
851 added="2.1", | |
852 ) | |
853 """:external:ref:`core-metadata-provides-extra`""" | |
854 provides_dist: _Validator[list[str] | None] = _Validator(added="1.2") | |
855 """:external:ref:`core-metadata-provides-dist`""" | |
856 obsoletes_dist: _Validator[list[str] | None] = _Validator(added="1.2") | |
857 """:external:ref:`core-metadata-obsoletes-dist`""" | |
858 requires: _Validator[list[str] | None] = _Validator(added="1.1") | |
859 """``Requires`` (deprecated)""" | |
860 provides: _Validator[list[str] | None] = _Validator(added="1.1") | |
861 """``Provides`` (deprecated)""" | |
862 obsoletes: _Validator[list[str] | None] = _Validator(added="1.1") | |
863 """``Obsoletes`` (deprecated)""" |