Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/email/message.py @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 # Copyright (C) 2001-2007 Python Software Foundation | |
2 # Author: Barry Warsaw | |
3 # Contact: email-sig@python.org | |
4 | |
5 """Basic message object for the email package object model.""" | |
6 | |
7 __all__ = ['Message', 'EmailMessage'] | |
8 | |
9 import re | |
10 import uu | |
11 import quopri | |
12 from io import BytesIO, StringIO | |
13 | |
14 # Intrapackage imports | |
15 from email import utils | |
16 from email import errors | |
17 from email._policybase import Policy, compat32 | |
18 from email import charset as _charset | |
19 from email._encoded_words import decode_b | |
20 Charset = _charset.Charset | |
21 | |
22 SEMISPACE = '; ' | |
23 | |
24 # Regular expression that matches `special' characters in parameters, the | |
25 # existence of which force quoting of the parameter value. | |
26 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') | |
27 | |
28 | |
29 def _splitparam(param): | |
30 # Split header parameters. BAW: this may be too simple. It isn't | |
31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers | |
32 # found in the wild. We may eventually need a full fledged parser. | |
33 # RDM: we might have a Header here; for now just stringify it. | |
34 a, sep, b = str(param).partition(';') | |
35 if not sep: | |
36 return a.strip(), None | |
37 return a.strip(), b.strip() | |
38 | |
39 def _formatparam(param, value=None, quote=True): | |
40 """Convenience function to format and return a key=value pair. | |
41 | |
42 This will quote the value if needed or if quote is true. If value is a | |
43 three tuple (charset, language, value), it will be encoded according | |
44 to RFC2231 rules. If it contains non-ascii characters it will likewise | |
45 be encoded according to RFC2231 rules, using the utf-8 charset and | |
46 a null language. | |
47 """ | |
48 if value is not None and len(value) > 0: | |
49 # A tuple is used for RFC 2231 encoded parameter values where items | |
50 # are (charset, language, value). charset is a string, not a Charset | |
51 # instance. RFC 2231 encoded values are never quoted, per RFC. | |
52 if isinstance(value, tuple): | |
53 # Encode as per RFC 2231 | |
54 param += '*' | |
55 value = utils.encode_rfc2231(value[2], value[0], value[1]) | |
56 return '%s=%s' % (param, value) | |
57 else: | |
58 try: | |
59 value.encode('ascii') | |
60 except UnicodeEncodeError: | |
61 param += '*' | |
62 value = utils.encode_rfc2231(value, 'utf-8', '') | |
63 return '%s=%s' % (param, value) | |
64 # BAW: Please check this. I think that if quote is set it should | |
65 # force quoting even if not necessary. | |
66 if quote or tspecials.search(value): | |
67 return '%s="%s"' % (param, utils.quote(value)) | |
68 else: | |
69 return '%s=%s' % (param, value) | |
70 else: | |
71 return param | |
72 | |
73 def _parseparam(s): | |
74 # RDM This might be a Header, so for now stringify it. | |
75 s = ';' + str(s) | |
76 plist = [] | |
77 while s[:1] == ';': | |
78 s = s[1:] | |
79 end = s.find(';') | |
80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: | |
81 end = s.find(';', end + 1) | |
82 if end < 0: | |
83 end = len(s) | |
84 f = s[:end] | |
85 if '=' in f: | |
86 i = f.index('=') | |
87 f = f[:i].strip().lower() + '=' + f[i+1:].strip() | |
88 plist.append(f.strip()) | |
89 s = s[end:] | |
90 return plist | |
91 | |
92 | |
93 def _unquotevalue(value): | |
94 # This is different than utils.collapse_rfc2231_value() because it doesn't | |
95 # try to convert the value to a unicode. Message.get_param() and | |
96 # Message.get_params() are both currently defined to return the tuple in | |
97 # the face of RFC 2231 parameters. | |
98 if isinstance(value, tuple): | |
99 return value[0], value[1], utils.unquote(value[2]) | |
100 else: | |
101 return utils.unquote(value) | |
102 | |
103 | |
104 | |
105 class Message: | |
106 """Basic message object. | |
107 | |
108 A message object is defined as something that has a bunch of RFC 2822 | |
109 headers and a payload. It may optionally have an envelope header | |
110 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a | |
111 multipart or a message/rfc822), then the payload is a list of Message | |
112 objects, otherwise it is a string. | |
113 | |
114 Message objects implement part of the `mapping' interface, which assumes | |
115 there is exactly one occurrence of the header per message. Some headers | |
116 do in fact appear multiple times (e.g. Received) and for those headers, | |
117 you must use the explicit API to set or get all the headers. Not all of | |
118 the mapping methods are implemented. | |
119 """ | |
120 def __init__(self, policy=compat32): | |
121 self.policy = policy | |
122 self._headers = [] | |
123 self._unixfrom = None | |
124 self._payload = None | |
125 self._charset = None | |
126 # Defaults for multipart messages | |
127 self.preamble = self.epilogue = None | |
128 self.defects = [] | |
129 # Default content type | |
130 self._default_type = 'text/plain' | |
131 | |
132 def __str__(self): | |
133 """Return the entire formatted message as a string. | |
134 """ | |
135 return self.as_string() | |
136 | |
137 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): | |
138 """Return the entire formatted message as a string. | |
139 | |
140 Optional 'unixfrom', when true, means include the Unix From_ envelope | |
141 header. For backward compatibility reasons, if maxheaderlen is | |
142 not specified it defaults to 0, so you must override it explicitly | |
143 if you want a different maxheaderlen. 'policy' is passed to the | |
144 Generator instance used to serialize the mesasge; if it is not | |
145 specified the policy associated with the message instance is used. | |
146 | |
147 If the message object contains binary data that is not encoded | |
148 according to RFC standards, the non-compliant data will be replaced by | |
149 unicode "unknown character" code points. | |
150 """ | |
151 from email.generator import Generator | |
152 policy = self.policy if policy is None else policy | |
153 fp = StringIO() | |
154 g = Generator(fp, | |
155 mangle_from_=False, | |
156 maxheaderlen=maxheaderlen, | |
157 policy=policy) | |
158 g.flatten(self, unixfrom=unixfrom) | |
159 return fp.getvalue() | |
160 | |
161 def __bytes__(self): | |
162 """Return the entire formatted message as a bytes object. | |
163 """ | |
164 return self.as_bytes() | |
165 | |
166 def as_bytes(self, unixfrom=False, policy=None): | |
167 """Return the entire formatted message as a bytes object. | |
168 | |
169 Optional 'unixfrom', when true, means include the Unix From_ envelope | |
170 header. 'policy' is passed to the BytesGenerator instance used to | |
171 serialize the message; if not specified the policy associated with | |
172 the message instance is used. | |
173 """ | |
174 from email.generator import BytesGenerator | |
175 policy = self.policy if policy is None else policy | |
176 fp = BytesIO() | |
177 g = BytesGenerator(fp, mangle_from_=False, policy=policy) | |
178 g.flatten(self, unixfrom=unixfrom) | |
179 return fp.getvalue() | |
180 | |
181 def is_multipart(self): | |
182 """Return True if the message consists of multiple parts.""" | |
183 return isinstance(self._payload, list) | |
184 | |
185 # | |
186 # Unix From_ line | |
187 # | |
188 def set_unixfrom(self, unixfrom): | |
189 self._unixfrom = unixfrom | |
190 | |
191 def get_unixfrom(self): | |
192 return self._unixfrom | |
193 | |
194 # | |
195 # Payload manipulation. | |
196 # | |
197 def attach(self, payload): | |
198 """Add the given payload to the current payload. | |
199 | |
200 The current payload will always be a list of objects after this method | |
201 is called. If you want to set the payload to a scalar object, use | |
202 set_payload() instead. | |
203 """ | |
204 if self._payload is None: | |
205 self._payload = [payload] | |
206 else: | |
207 try: | |
208 self._payload.append(payload) | |
209 except AttributeError: | |
210 raise TypeError("Attach is not valid on a message with a" | |
211 " non-multipart payload") | |
212 | |
213 def get_payload(self, i=None, decode=False): | |
214 """Return a reference to the payload. | |
215 | |
216 The payload will either be a list object or a string. If you mutate | |
217 the list object, you modify the message's payload in place. Optional | |
218 i returns that index into the payload. | |
219 | |
220 Optional decode is a flag indicating whether the payload should be | |
221 decoded or not, according to the Content-Transfer-Encoding header | |
222 (default is False). | |
223 | |
224 When True and the message is not a multipart, the payload will be | |
225 decoded if this header's value is `quoted-printable' or `base64'. If | |
226 some other encoding is used, or the header is missing, or if the | |
227 payload has bogus data (i.e. bogus base64 or uuencoded data), the | |
228 payload is returned as-is. | |
229 | |
230 If the message is a multipart and the decode flag is True, then None | |
231 is returned. | |
232 """ | |
233 # Here is the logic table for this code, based on the email5.0.0 code: | |
234 # i decode is_multipart result | |
235 # ------ ------ ------------ ------------------------------ | |
236 # None True True None | |
237 # i True True None | |
238 # None False True _payload (a list) | |
239 # i False True _payload element i (a Message) | |
240 # i False False error (not a list) | |
241 # i True False error (not a list) | |
242 # None False False _payload | |
243 # None True False _payload decoded (bytes) | |
244 # Note that Barry planned to factor out the 'decode' case, but that | |
245 # isn't so easy now that we handle the 8 bit data, which needs to be | |
246 # converted in both the decode and non-decode path. | |
247 if self.is_multipart(): | |
248 if decode: | |
249 return None | |
250 if i is None: | |
251 return self._payload | |
252 else: | |
253 return self._payload[i] | |
254 # For backward compatibility, Use isinstance and this error message | |
255 # instead of the more logical is_multipart test. | |
256 if i is not None and not isinstance(self._payload, list): | |
257 raise TypeError('Expected list, got %s' % type(self._payload)) | |
258 payload = self._payload | |
259 # cte might be a Header, so for now stringify it. | |
260 cte = str(self.get('content-transfer-encoding', '')).lower() | |
261 # payload may be bytes here. | |
262 if isinstance(payload, str): | |
263 if utils._has_surrogates(payload): | |
264 bpayload = payload.encode('ascii', 'surrogateescape') | |
265 if not decode: | |
266 try: | |
267 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') | |
268 except LookupError: | |
269 payload = bpayload.decode('ascii', 'replace') | |
270 elif decode: | |
271 try: | |
272 bpayload = payload.encode('ascii') | |
273 except UnicodeError: | |
274 # This won't happen for RFC compliant messages (messages | |
275 # containing only ASCII code points in the unicode input). | |
276 # If it does happen, turn the string into bytes in a way | |
277 # guaranteed not to fail. | |
278 bpayload = payload.encode('raw-unicode-escape') | |
279 if not decode: | |
280 return payload | |
281 if cte == 'quoted-printable': | |
282 return quopri.decodestring(bpayload) | |
283 elif cte == 'base64': | |
284 # XXX: this is a bit of a hack; decode_b should probably be factored | |
285 # out somewhere, but I haven't figured out where yet. | |
286 value, defects = decode_b(b''.join(bpayload.splitlines())) | |
287 for defect in defects: | |
288 self.policy.handle_defect(self, defect) | |
289 return value | |
290 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): | |
291 in_file = BytesIO(bpayload) | |
292 out_file = BytesIO() | |
293 try: | |
294 uu.decode(in_file, out_file, quiet=True) | |
295 return out_file.getvalue() | |
296 except uu.Error: | |
297 # Some decoding problem | |
298 return bpayload | |
299 if isinstance(payload, str): | |
300 return bpayload | |
301 return payload | |
302 | |
303 def set_payload(self, payload, charset=None): | |
304 """Set the payload to the given value. | |
305 | |
306 Optional charset sets the message's default character set. See | |
307 set_charset() for details. | |
308 """ | |
309 if hasattr(payload, 'encode'): | |
310 if charset is None: | |
311 self._payload = payload | |
312 return | |
313 if not isinstance(charset, Charset): | |
314 charset = Charset(charset) | |
315 payload = payload.encode(charset.output_charset) | |
316 if hasattr(payload, 'decode'): | |
317 self._payload = payload.decode('ascii', 'surrogateescape') | |
318 else: | |
319 self._payload = payload | |
320 if charset is not None: | |
321 self.set_charset(charset) | |
322 | |
323 def set_charset(self, charset): | |
324 """Set the charset of the payload to a given character set. | |
325 | |
326 charset can be a Charset instance, a string naming a character set, or | |
327 None. If it is a string it will be converted to a Charset instance. | |
328 If charset is None, the charset parameter will be removed from the | |
329 Content-Type field. Anything else will generate a TypeError. | |
330 | |
331 The message will be assumed to be of type text/* encoded with | |
332 charset.input_charset. It will be converted to charset.output_charset | |
333 and encoded properly, if needed, when generating the plain text | |
334 representation of the message. MIME headers (MIME-Version, | |
335 Content-Type, Content-Transfer-Encoding) will be added as needed. | |
336 """ | |
337 if charset is None: | |
338 self.del_param('charset') | |
339 self._charset = None | |
340 return | |
341 if not isinstance(charset, Charset): | |
342 charset = Charset(charset) | |
343 self._charset = charset | |
344 if 'MIME-Version' not in self: | |
345 self.add_header('MIME-Version', '1.0') | |
346 if 'Content-Type' not in self: | |
347 self.add_header('Content-Type', 'text/plain', | |
348 charset=charset.get_output_charset()) | |
349 else: | |
350 self.set_param('charset', charset.get_output_charset()) | |
351 if charset != charset.get_output_charset(): | |
352 self._payload = charset.body_encode(self._payload) | |
353 if 'Content-Transfer-Encoding' not in self: | |
354 cte = charset.get_body_encoding() | |
355 try: | |
356 cte(self) | |
357 except TypeError: | |
358 # This 'if' is for backward compatibility, it allows unicode | |
359 # through even though that won't work correctly if the | |
360 # message is serialized. | |
361 payload = self._payload | |
362 if payload: | |
363 try: | |
364 payload = payload.encode('ascii', 'surrogateescape') | |
365 except UnicodeError: | |
366 payload = payload.encode(charset.output_charset) | |
367 self._payload = charset.body_encode(payload) | |
368 self.add_header('Content-Transfer-Encoding', cte) | |
369 | |
370 def get_charset(self): | |
371 """Return the Charset instance associated with the message's payload. | |
372 """ | |
373 return self._charset | |
374 | |
375 # | |
376 # MAPPING INTERFACE (partial) | |
377 # | |
378 def __len__(self): | |
379 """Return the total number of headers, including duplicates.""" | |
380 return len(self._headers) | |
381 | |
382 def __getitem__(self, name): | |
383 """Get a header value. | |
384 | |
385 Return None if the header is missing instead of raising an exception. | |
386 | |
387 Note that if the header appeared multiple times, exactly which | |
388 occurrence gets returned is undefined. Use get_all() to get all | |
389 the values matching a header field name. | |
390 """ | |
391 return self.get(name) | |
392 | |
393 def __setitem__(self, name, val): | |
394 """Set the value of a header. | |
395 | |
396 Note: this does not overwrite an existing header with the same field | |
397 name. Use __delitem__() first to delete any existing headers. | |
398 """ | |
399 max_count = self.policy.header_max_count(name) | |
400 if max_count: | |
401 lname = name.lower() | |
402 found = 0 | |
403 for k, v in self._headers: | |
404 if k.lower() == lname: | |
405 found += 1 | |
406 if found >= max_count: | |
407 raise ValueError("There may be at most {} {} headers " | |
408 "in a message".format(max_count, name)) | |
409 self._headers.append(self.policy.header_store_parse(name, val)) | |
410 | |
411 def __delitem__(self, name): | |
412 """Delete all occurrences of a header, if present. | |
413 | |
414 Does not raise an exception if the header is missing. | |
415 """ | |
416 name = name.lower() | |
417 newheaders = [] | |
418 for k, v in self._headers: | |
419 if k.lower() != name: | |
420 newheaders.append((k, v)) | |
421 self._headers = newheaders | |
422 | |
423 def __contains__(self, name): | |
424 return name.lower() in [k.lower() for k, v in self._headers] | |
425 | |
426 def __iter__(self): | |
427 for field, value in self._headers: | |
428 yield field | |
429 | |
430 def keys(self): | |
431 """Return a list of all the message's header field names. | |
432 | |
433 These will be sorted in the order they appeared in the original | |
434 message, or were added to the message, and may contain duplicates. | |
435 Any fields deleted and re-inserted are always appended to the header | |
436 list. | |
437 """ | |
438 return [k for k, v in self._headers] | |
439 | |
440 def values(self): | |
441 """Return a list of all the message's header values. | |
442 | |
443 These will be sorted in the order they appeared in the original | |
444 message, or were added to the message, and may contain duplicates. | |
445 Any fields deleted and re-inserted are always appended to the header | |
446 list. | |
447 """ | |
448 return [self.policy.header_fetch_parse(k, v) | |
449 for k, v in self._headers] | |
450 | |
451 def items(self): | |
452 """Get all the message's header fields and values. | |
453 | |
454 These will be sorted in the order they appeared in the original | |
455 message, or were added to the message, and may contain duplicates. | |
456 Any fields deleted and re-inserted are always appended to the header | |
457 list. | |
458 """ | |
459 return [(k, self.policy.header_fetch_parse(k, v)) | |
460 for k, v in self._headers] | |
461 | |
462 def get(self, name, failobj=None): | |
463 """Get a header value. | |
464 | |
465 Like __getitem__() but return failobj instead of None when the field | |
466 is missing. | |
467 """ | |
468 name = name.lower() | |
469 for k, v in self._headers: | |
470 if k.lower() == name: | |
471 return self.policy.header_fetch_parse(k, v) | |
472 return failobj | |
473 | |
474 # | |
475 # "Internal" methods (public API, but only intended for use by a parser | |
476 # or generator, not normal application code. | |
477 # | |
478 | |
479 def set_raw(self, name, value): | |
480 """Store name and value in the model without modification. | |
481 | |
482 This is an "internal" API, intended only for use by a parser. | |
483 """ | |
484 self._headers.append((name, value)) | |
485 | |
486 def raw_items(self): | |
487 """Return the (name, value) header pairs without modification. | |
488 | |
489 This is an "internal" API, intended only for use by a generator. | |
490 """ | |
491 return iter(self._headers.copy()) | |
492 | |
493 # | |
494 # Additional useful stuff | |
495 # | |
496 | |
497 def get_all(self, name, failobj=None): | |
498 """Return a list of all the values for the named field. | |
499 | |
500 These will be sorted in the order they appeared in the original | |
501 message, and may contain duplicates. Any fields deleted and | |
502 re-inserted are always appended to the header list. | |
503 | |
504 If no such fields exist, failobj is returned (defaults to None). | |
505 """ | |
506 values = [] | |
507 name = name.lower() | |
508 for k, v in self._headers: | |
509 if k.lower() == name: | |
510 values.append(self.policy.header_fetch_parse(k, v)) | |
511 if not values: | |
512 return failobj | |
513 return values | |
514 | |
515 def add_header(self, _name, _value, **_params): | |
516 """Extended header setting. | |
517 | |
518 name is the header field to add. keyword arguments can be used to set | |
519 additional parameters for the header field, with underscores converted | |
520 to dashes. Normally the parameter will be added as key="value" unless | |
521 value is None, in which case only the key will be added. If a | |
522 parameter value contains non-ASCII characters it can be specified as a | |
523 three-tuple of (charset, language, value), in which case it will be | |
524 encoded according to RFC2231 rules. Otherwise it will be encoded using | |
525 the utf-8 charset and a language of ''. | |
526 | |
527 Examples: | |
528 | |
529 msg.add_header('content-disposition', 'attachment', filename='bud.gif') | |
530 msg.add_header('content-disposition', 'attachment', | |
531 filename=('utf-8', '', Fußballer.ppt')) | |
532 msg.add_header('content-disposition', 'attachment', | |
533 filename='Fußballer.ppt')) | |
534 """ | |
535 parts = [] | |
536 for k, v in _params.items(): | |
537 if v is None: | |
538 parts.append(k.replace('_', '-')) | |
539 else: | |
540 parts.append(_formatparam(k.replace('_', '-'), v)) | |
541 if _value is not None: | |
542 parts.insert(0, _value) | |
543 self[_name] = SEMISPACE.join(parts) | |
544 | |
545 def replace_header(self, _name, _value): | |
546 """Replace a header. | |
547 | |
548 Replace the first matching header found in the message, retaining | |
549 header order and case. If no matching header was found, a KeyError is | |
550 raised. | |
551 """ | |
552 _name = _name.lower() | |
553 for i, (k, v) in zip(range(len(self._headers)), self._headers): | |
554 if k.lower() == _name: | |
555 self._headers[i] = self.policy.header_store_parse(k, _value) | |
556 break | |
557 else: | |
558 raise KeyError(_name) | |
559 | |
560 # | |
561 # Use these three methods instead of the three above. | |
562 # | |
563 | |
564 def get_content_type(self): | |
565 """Return the message's content type. | |
566 | |
567 The returned string is coerced to lower case of the form | |
568 `maintype/subtype'. If there was no Content-Type header in the | |
569 message, the default type as given by get_default_type() will be | |
570 returned. Since according to RFC 2045, messages always have a default | |
571 type this will always return a value. | |
572 | |
573 RFC 2045 defines a message's default type to be text/plain unless it | |
574 appears inside a multipart/digest container, in which case it would be | |
575 message/rfc822. | |
576 """ | |
577 missing = object() | |
578 value = self.get('content-type', missing) | |
579 if value is missing: | |
580 # This should have no parameters | |
581 return self.get_default_type() | |
582 ctype = _splitparam(value)[0].lower() | |
583 # RFC 2045, section 5.2 says if its invalid, use text/plain | |
584 if ctype.count('/') != 1: | |
585 return 'text/plain' | |
586 return ctype | |
587 | |
588 def get_content_maintype(self): | |
589 """Return the message's main content type. | |
590 | |
591 This is the `maintype' part of the string returned by | |
592 get_content_type(). | |
593 """ | |
594 ctype = self.get_content_type() | |
595 return ctype.split('/')[0] | |
596 | |
597 def get_content_subtype(self): | |
598 """Returns the message's sub-content type. | |
599 | |
600 This is the `subtype' part of the string returned by | |
601 get_content_type(). | |
602 """ | |
603 ctype = self.get_content_type() | |
604 return ctype.split('/')[1] | |
605 | |
606 def get_default_type(self): | |
607 """Return the `default' content type. | |
608 | |
609 Most messages have a default content type of text/plain, except for | |
610 messages that are subparts of multipart/digest containers. Such | |
611 subparts have a default content type of message/rfc822. | |
612 """ | |
613 return self._default_type | |
614 | |
615 def set_default_type(self, ctype): | |
616 """Set the `default' content type. | |
617 | |
618 ctype should be either "text/plain" or "message/rfc822", although this | |
619 is not enforced. The default content type is not stored in the | |
620 Content-Type header. | |
621 """ | |
622 self._default_type = ctype | |
623 | |
624 def _get_params_preserve(self, failobj, header): | |
625 # Like get_params() but preserves the quoting of values. BAW: | |
626 # should this be part of the public interface? | |
627 missing = object() | |
628 value = self.get(header, missing) | |
629 if value is missing: | |
630 return failobj | |
631 params = [] | |
632 for p in _parseparam(value): | |
633 try: | |
634 name, val = p.split('=', 1) | |
635 name = name.strip() | |
636 val = val.strip() | |
637 except ValueError: | |
638 # Must have been a bare attribute | |
639 name = p.strip() | |
640 val = '' | |
641 params.append((name, val)) | |
642 params = utils.decode_params(params) | |
643 return params | |
644 | |
645 def get_params(self, failobj=None, header='content-type', unquote=True): | |
646 """Return the message's Content-Type parameters, as a list. | |
647 | |
648 The elements of the returned list are 2-tuples of key/value pairs, as | |
649 split on the `=' sign. The left hand side of the `=' is the key, | |
650 while the right hand side is the value. If there is no `=' sign in | |
651 the parameter the value is the empty string. The value is as | |
652 described in the get_param() method. | |
653 | |
654 Optional failobj is the object to return if there is no Content-Type | |
655 header. Optional header is the header to search instead of | |
656 Content-Type. If unquote is True, the value is unquoted. | |
657 """ | |
658 missing = object() | |
659 params = self._get_params_preserve(missing, header) | |
660 if params is missing: | |
661 return failobj | |
662 if unquote: | |
663 return [(k, _unquotevalue(v)) for k, v in params] | |
664 else: | |
665 return params | |
666 | |
667 def get_param(self, param, failobj=None, header='content-type', | |
668 unquote=True): | |
669 """Return the parameter value if found in the Content-Type header. | |
670 | |
671 Optional failobj is the object to return if there is no Content-Type | |
672 header, or the Content-Type header has no such parameter. Optional | |
673 header is the header to search instead of Content-Type. | |
674 | |
675 Parameter keys are always compared case insensitively. The return | |
676 value can either be a string, or a 3-tuple if the parameter was RFC | |
677 2231 encoded. When it's a 3-tuple, the elements of the value are of | |
678 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and | |
679 LANGUAGE can be None, in which case you should consider VALUE to be | |
680 encoded in the us-ascii charset. You can usually ignore LANGUAGE. | |
681 The parameter value (either the returned string, or the VALUE item in | |
682 the 3-tuple) is always unquoted, unless unquote is set to False. | |
683 | |
684 If your application doesn't care whether the parameter was RFC 2231 | |
685 encoded, it can turn the return value into a string as follows: | |
686 | |
687 rawparam = msg.get_param('foo') | |
688 param = email.utils.collapse_rfc2231_value(rawparam) | |
689 | |
690 """ | |
691 if header not in self: | |
692 return failobj | |
693 for k, v in self._get_params_preserve(failobj, header): | |
694 if k.lower() == param.lower(): | |
695 if unquote: | |
696 return _unquotevalue(v) | |
697 else: | |
698 return v | |
699 return failobj | |
700 | |
701 def set_param(self, param, value, header='Content-Type', requote=True, | |
702 charset=None, language='', replace=False): | |
703 """Set a parameter in the Content-Type header. | |
704 | |
705 If the parameter already exists in the header, its value will be | |
706 replaced with the new value. | |
707 | |
708 If header is Content-Type and has not yet been defined for this | |
709 message, it will be set to "text/plain" and the new parameter and | |
710 value will be appended as per RFC 2045. | |
711 | |
712 An alternate header can be specified in the header argument, and all | |
713 parameters will be quoted as necessary unless requote is False. | |
714 | |
715 If charset is specified, the parameter will be encoded according to RFC | |
716 2231. Optional language specifies the RFC 2231 language, defaulting | |
717 to the empty string. Both charset and language should be strings. | |
718 """ | |
719 if not isinstance(value, tuple) and charset: | |
720 value = (charset, language, value) | |
721 | |
722 if header not in self and header.lower() == 'content-type': | |
723 ctype = 'text/plain' | |
724 else: | |
725 ctype = self.get(header) | |
726 if not self.get_param(param, header=header): | |
727 if not ctype: | |
728 ctype = _formatparam(param, value, requote) | |
729 else: | |
730 ctype = SEMISPACE.join( | |
731 [ctype, _formatparam(param, value, requote)]) | |
732 else: | |
733 ctype = '' | |
734 for old_param, old_value in self.get_params(header=header, | |
735 unquote=requote): | |
736 append_param = '' | |
737 if old_param.lower() == param.lower(): | |
738 append_param = _formatparam(param, value, requote) | |
739 else: | |
740 append_param = _formatparam(old_param, old_value, requote) | |
741 if not ctype: | |
742 ctype = append_param | |
743 else: | |
744 ctype = SEMISPACE.join([ctype, append_param]) | |
745 if ctype != self.get(header): | |
746 if replace: | |
747 self.replace_header(header, ctype) | |
748 else: | |
749 del self[header] | |
750 self[header] = ctype | |
751 | |
752 def del_param(self, param, header='content-type', requote=True): | |
753 """Remove the given parameter completely from the Content-Type header. | |
754 | |
755 The header will be re-written in place without the parameter or its | |
756 value. All values will be quoted as necessary unless requote is | |
757 False. Optional header specifies an alternative to the Content-Type | |
758 header. | |
759 """ | |
760 if header not in self: | |
761 return | |
762 new_ctype = '' | |
763 for p, v in self.get_params(header=header, unquote=requote): | |
764 if p.lower() != param.lower(): | |
765 if not new_ctype: | |
766 new_ctype = _formatparam(p, v, requote) | |
767 else: | |
768 new_ctype = SEMISPACE.join([new_ctype, | |
769 _formatparam(p, v, requote)]) | |
770 if new_ctype != self.get(header): | |
771 del self[header] | |
772 self[header] = new_ctype | |
773 | |
774 def set_type(self, type, header='Content-Type', requote=True): | |
775 """Set the main type and subtype for the Content-Type header. | |
776 | |
777 type must be a string in the form "maintype/subtype", otherwise a | |
778 ValueError is raised. | |
779 | |
780 This method replaces the Content-Type header, keeping all the | |
781 parameters in place. If requote is False, this leaves the existing | |
782 header's quoting as is. Otherwise, the parameters will be quoted (the | |
783 default). | |
784 | |
785 An alternative header can be specified in the header argument. When | |
786 the Content-Type header is set, we'll always also add a MIME-Version | |
787 header. | |
788 """ | |
789 # BAW: should we be strict? | |
790 if not type.count('/') == 1: | |
791 raise ValueError | |
792 # Set the Content-Type, you get a MIME-Version | |
793 if header.lower() == 'content-type': | |
794 del self['mime-version'] | |
795 self['MIME-Version'] = '1.0' | |
796 if header not in self: | |
797 self[header] = type | |
798 return | |
799 params = self.get_params(header=header, unquote=requote) | |
800 del self[header] | |
801 self[header] = type | |
802 # Skip the first param; it's the old type. | |
803 for p, v in params[1:]: | |
804 self.set_param(p, v, header, requote) | |
805 | |
806 def get_filename(self, failobj=None): | |
807 """Return the filename associated with the payload if present. | |
808 | |
809 The filename is extracted from the Content-Disposition header's | |
810 `filename' parameter, and it is unquoted. If that header is missing | |
811 the `filename' parameter, this method falls back to looking for the | |
812 `name' parameter. | |
813 """ | |
814 missing = object() | |
815 filename = self.get_param('filename', missing, 'content-disposition') | |
816 if filename is missing: | |
817 filename = self.get_param('name', missing, 'content-type') | |
818 if filename is missing: | |
819 return failobj | |
820 return utils.collapse_rfc2231_value(filename).strip() | |
821 | |
822 def get_boundary(self, failobj=None): | |
823 """Return the boundary associated with the payload if present. | |
824 | |
825 The boundary is extracted from the Content-Type header's `boundary' | |
826 parameter, and it is unquoted. | |
827 """ | |
828 missing = object() | |
829 boundary = self.get_param('boundary', missing) | |
830 if boundary is missing: | |
831 return failobj | |
832 # RFC 2046 says that boundaries may begin but not end in w/s | |
833 return utils.collapse_rfc2231_value(boundary).rstrip() | |
834 | |
835 def set_boundary(self, boundary): | |
836 """Set the boundary parameter in Content-Type to 'boundary'. | |
837 | |
838 This is subtly different than deleting the Content-Type header and | |
839 adding a new one with a new boundary parameter via add_header(). The | |
840 main difference is that using the set_boundary() method preserves the | |
841 order of the Content-Type header in the original message. | |
842 | |
843 HeaderParseError is raised if the message has no Content-Type header. | |
844 """ | |
845 missing = object() | |
846 params = self._get_params_preserve(missing, 'content-type') | |
847 if params is missing: | |
848 # There was no Content-Type header, and we don't know what type | |
849 # to set it to, so raise an exception. | |
850 raise errors.HeaderParseError('No Content-Type header found') | |
851 newparams = [] | |
852 foundp = False | |
853 for pk, pv in params: | |
854 if pk.lower() == 'boundary': | |
855 newparams.append(('boundary', '"%s"' % boundary)) | |
856 foundp = True | |
857 else: | |
858 newparams.append((pk, pv)) | |
859 if not foundp: | |
860 # The original Content-Type header had no boundary attribute. | |
861 # Tack one on the end. BAW: should we raise an exception | |
862 # instead??? | |
863 newparams.append(('boundary', '"%s"' % boundary)) | |
864 # Replace the existing Content-Type header with the new value | |
865 newheaders = [] | |
866 for h, v in self._headers: | |
867 if h.lower() == 'content-type': | |
868 parts = [] | |
869 for k, v in newparams: | |
870 if v == '': | |
871 parts.append(k) | |
872 else: | |
873 parts.append('%s=%s' % (k, v)) | |
874 val = SEMISPACE.join(parts) | |
875 newheaders.append(self.policy.header_store_parse(h, val)) | |
876 | |
877 else: | |
878 newheaders.append((h, v)) | |
879 self._headers = newheaders | |
880 | |
881 def get_content_charset(self, failobj=None): | |
882 """Return the charset parameter of the Content-Type header. | |
883 | |
884 The returned string is always coerced to lower case. If there is no | |
885 Content-Type header, or if that header has no charset parameter, | |
886 failobj is returned. | |
887 """ | |
888 missing = object() | |
889 charset = self.get_param('charset', missing) | |
890 if charset is missing: | |
891 return failobj | |
892 if isinstance(charset, tuple): | |
893 # RFC 2231 encoded, so decode it, and it better end up as ascii. | |
894 pcharset = charset[0] or 'us-ascii' | |
895 try: | |
896 # LookupError will be raised if the charset isn't known to | |
897 # Python. UnicodeError will be raised if the encoded text | |
898 # contains a character not in the charset. | |
899 as_bytes = charset[2].encode('raw-unicode-escape') | |
900 charset = str(as_bytes, pcharset) | |
901 except (LookupError, UnicodeError): | |
902 charset = charset[2] | |
903 # charset characters must be in us-ascii range | |
904 try: | |
905 charset.encode('us-ascii') | |
906 except UnicodeError: | |
907 return failobj | |
908 # RFC 2046, $4.1.2 says charsets are not case sensitive | |
909 return charset.lower() | |
910 | |
911 def get_charsets(self, failobj=None): | |
912 """Return a list containing the charset(s) used in this message. | |
913 | |
914 The returned list of items describes the Content-Type headers' | |
915 charset parameter for this message and all the subparts in its | |
916 payload. | |
917 | |
918 Each item will either be a string (the value of the charset parameter | |
919 in the Content-Type header of that part) or the value of the | |
920 'failobj' parameter (defaults to None), if the part does not have a | |
921 main MIME type of "text", or the charset is not defined. | |
922 | |
923 The list will contain one string for each part of the message, plus | |
924 one for the container message (i.e. self), so that a non-multipart | |
925 message will still return a list of length 1. | |
926 """ | |
927 return [part.get_content_charset(failobj) for part in self.walk()] | |
928 | |
929 def get_content_disposition(self): | |
930 """Return the message's content-disposition if it exists, or None. | |
931 | |
932 The return values can be either 'inline', 'attachment' or None | |
933 according to the rfc2183. | |
934 """ | |
935 value = self.get('content-disposition') | |
936 if value is None: | |
937 return None | |
938 c_d = _splitparam(value)[0].lower() | |
939 return c_d | |
940 | |
941 # I.e. def walk(self): ... | |
942 from email.iterators import walk | |
943 | |
944 | |
945 class MIMEPart(Message): | |
946 | |
947 def __init__(self, policy=None): | |
948 if policy is None: | |
949 from email.policy import default | |
950 policy = default | |
951 Message.__init__(self, policy) | |
952 | |
953 | |
954 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): | |
955 """Return the entire formatted message as a string. | |
956 | |
957 Optional 'unixfrom', when true, means include the Unix From_ envelope | |
958 header. maxheaderlen is retained for backward compatibility with the | |
959 base Message class, but defaults to None, meaning that the policy value | |
960 for max_line_length controls the header maximum length. 'policy' is | |
961 passed to the Generator instance used to serialize the mesasge; if it | |
962 is not specified the policy associated with the message instance is | |
963 used. | |
964 """ | |
965 policy = self.policy if policy is None else policy | |
966 if maxheaderlen is None: | |
967 maxheaderlen = policy.max_line_length | |
968 return super().as_string(maxheaderlen=maxheaderlen, policy=policy) | |
969 | |
970 def __str__(self): | |
971 return self.as_string(policy=self.policy.clone(utf8=True)) | |
972 | |
973 def is_attachment(self): | |
974 c_d = self.get('content-disposition') | |
975 return False if c_d is None else c_d.content_disposition == 'attachment' | |
976 | |
977 def _find_body(self, part, preferencelist): | |
978 if part.is_attachment(): | |
979 return | |
980 maintype, subtype = part.get_content_type().split('/') | |
981 if maintype == 'text': | |
982 if subtype in preferencelist: | |
983 yield (preferencelist.index(subtype), part) | |
984 return | |
985 if maintype != 'multipart': | |
986 return | |
987 if subtype != 'related': | |
988 for subpart in part.iter_parts(): | |
989 yield from self._find_body(subpart, preferencelist) | |
990 return | |
991 if 'related' in preferencelist: | |
992 yield (preferencelist.index('related'), part) | |
993 candidate = None | |
994 start = part.get_param('start') | |
995 if start: | |
996 for subpart in part.iter_parts(): | |
997 if subpart['content-id'] == start: | |
998 candidate = subpart | |
999 break | |
1000 if candidate is None: | |
1001 subparts = part.get_payload() | |
1002 candidate = subparts[0] if subparts else None | |
1003 if candidate is not None: | |
1004 yield from self._find_body(candidate, preferencelist) | |
1005 | |
1006 def get_body(self, preferencelist=('related', 'html', 'plain')): | |
1007 """Return best candidate mime part for display as 'body' of message. | |
1008 | |
1009 Do a depth first search, starting with self, looking for the first part | |
1010 matching each of the items in preferencelist, and return the part | |
1011 corresponding to the first item that has a match, or None if no items | |
1012 have a match. If 'related' is not included in preferencelist, consider | |
1013 the root part of any multipart/related encountered as a candidate | |
1014 match. Ignore parts with 'Content-Disposition: attachment'. | |
1015 """ | |
1016 best_prio = len(preferencelist) | |
1017 body = None | |
1018 for prio, part in self._find_body(self, preferencelist): | |
1019 if prio < best_prio: | |
1020 best_prio = prio | |
1021 body = part | |
1022 if prio == 0: | |
1023 break | |
1024 return body | |
1025 | |
1026 _body_types = {('text', 'plain'), | |
1027 ('text', 'html'), | |
1028 ('multipart', 'related'), | |
1029 ('multipart', 'alternative')} | |
1030 def iter_attachments(self): | |
1031 """Return an iterator over the non-main parts of a multipart. | |
1032 | |
1033 Skip the first of each occurrence of text/plain, text/html, | |
1034 multipart/related, or multipart/alternative in the multipart (unless | |
1035 they have a 'Content-Disposition: attachment' header) and include all | |
1036 remaining subparts in the returned iterator. When applied to a | |
1037 multipart/related, return all parts except the root part. Return an | |
1038 empty iterator when applied to a multipart/alternative or a | |
1039 non-multipart. | |
1040 """ | |
1041 maintype, subtype = self.get_content_type().split('/') | |
1042 if maintype != 'multipart' or subtype == 'alternative': | |
1043 return | |
1044 payload = self.get_payload() | |
1045 # Certain malformed messages can have content type set to `multipart/*` | |
1046 # but still have single part body, in which case payload.copy() can | |
1047 # fail with AttributeError. | |
1048 try: | |
1049 parts = payload.copy() | |
1050 except AttributeError: | |
1051 # payload is not a list, it is most probably a string. | |
1052 return | |
1053 | |
1054 if maintype == 'multipart' and subtype == 'related': | |
1055 # For related, we treat everything but the root as an attachment. | |
1056 # The root may be indicated by 'start'; if there's no start or we | |
1057 # can't find the named start, treat the first subpart as the root. | |
1058 start = self.get_param('start') | |
1059 if start: | |
1060 found = False | |
1061 attachments = [] | |
1062 for part in parts: | |
1063 if part.get('content-id') == start: | |
1064 found = True | |
1065 else: | |
1066 attachments.append(part) | |
1067 if found: | |
1068 yield from attachments | |
1069 return | |
1070 parts.pop(0) | |
1071 yield from parts | |
1072 return | |
1073 # Otherwise we more or less invert the remaining logic in get_body. | |
1074 # This only really works in edge cases (ex: non-text related or | |
1075 # alternatives) if the sending agent sets content-disposition. | |
1076 seen = [] # Only skip the first example of each candidate type. | |
1077 for part in parts: | |
1078 maintype, subtype = part.get_content_type().split('/') | |
1079 if ((maintype, subtype) in self._body_types and | |
1080 not part.is_attachment() and subtype not in seen): | |
1081 seen.append(subtype) | |
1082 continue | |
1083 yield part | |
1084 | |
1085 def iter_parts(self): | |
1086 """Return an iterator over all immediate subparts of a multipart. | |
1087 | |
1088 Return an empty iterator for a non-multipart. | |
1089 """ | |
1090 if self.get_content_maintype() == 'multipart': | |
1091 yield from self.get_payload() | |
1092 | |
1093 def get_content(self, *args, content_manager=None, **kw): | |
1094 if content_manager is None: | |
1095 content_manager = self.policy.content_manager | |
1096 return content_manager.get_content(self, *args, **kw) | |
1097 | |
1098 def set_content(self, *args, content_manager=None, **kw): | |
1099 if content_manager is None: | |
1100 content_manager = self.policy.content_manager | |
1101 content_manager.set_content(self, *args, **kw) | |
1102 | |
1103 def _make_multipart(self, subtype, disallowed_subtypes, boundary): | |
1104 if self.get_content_maintype() == 'multipart': | |
1105 existing_subtype = self.get_content_subtype() | |
1106 disallowed_subtypes = disallowed_subtypes + (subtype,) | |
1107 if existing_subtype in disallowed_subtypes: | |
1108 raise ValueError("Cannot convert {} to {}".format( | |
1109 existing_subtype, subtype)) | |
1110 keep_headers = [] | |
1111 part_headers = [] | |
1112 for name, value in self._headers: | |
1113 if name.lower().startswith('content-'): | |
1114 part_headers.append((name, value)) | |
1115 else: | |
1116 keep_headers.append((name, value)) | |
1117 if part_headers: | |
1118 # There is existing content, move it to the first subpart. | |
1119 part = type(self)(policy=self.policy) | |
1120 part._headers = part_headers | |
1121 part._payload = self._payload | |
1122 self._payload = [part] | |
1123 else: | |
1124 self._payload = [] | |
1125 self._headers = keep_headers | |
1126 self['Content-Type'] = 'multipart/' + subtype | |
1127 if boundary is not None: | |
1128 self.set_param('boundary', boundary) | |
1129 | |
1130 def make_related(self, boundary=None): | |
1131 self._make_multipart('related', ('alternative', 'mixed'), boundary) | |
1132 | |
1133 def make_alternative(self, boundary=None): | |
1134 self._make_multipart('alternative', ('mixed',), boundary) | |
1135 | |
1136 def make_mixed(self, boundary=None): | |
1137 self._make_multipart('mixed', (), boundary) | |
1138 | |
1139 def _add_multipart(self, _subtype, *args, _disp=None, **kw): | |
1140 if (self.get_content_maintype() != 'multipart' or | |
1141 self.get_content_subtype() != _subtype): | |
1142 getattr(self, 'make_' + _subtype)() | |
1143 part = type(self)(policy=self.policy) | |
1144 part.set_content(*args, **kw) | |
1145 if _disp and 'content-disposition' not in part: | |
1146 part['Content-Disposition'] = _disp | |
1147 self.attach(part) | |
1148 | |
1149 def add_related(self, *args, **kw): | |
1150 self._add_multipart('related', *args, _disp='inline', **kw) | |
1151 | |
1152 def add_alternative(self, *args, **kw): | |
1153 self._add_multipart('alternative', *args, **kw) | |
1154 | |
1155 def add_attachment(self, *args, **kw): | |
1156 self._add_multipart('mixed', *args, _disp='attachment', **kw) | |
1157 | |
1158 def clear(self): | |
1159 self._headers = [] | |
1160 self._payload = None | |
1161 | |
1162 def clear_content(self): | |
1163 self._headers = [(n, v) for n, v in self._headers | |
1164 if not n.lower().startswith('content-')] | |
1165 self._payload = None | |
1166 | |
1167 | |
1168 class EmailMessage(MIMEPart): | |
1169 | |
1170 def set_content(self, *args, **kw): | |
1171 super().set_content(*args, **kw) | |
1172 if 'MIME-Version' not in self: | |
1173 self['MIME-Version'] = '1.0' |