annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/email/message.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 # Copyright (C) 2001-2007 Python Software Foundation
jpayne@68 2 # Author: Barry Warsaw
jpayne@68 3 # Contact: email-sig@python.org
jpayne@68 4
jpayne@68 5 """Basic message object for the email package object model."""
jpayne@68 6
jpayne@68 7 __all__ = ['Message', 'EmailMessage']
jpayne@68 8
jpayne@68 9 import re
jpayne@68 10 import uu
jpayne@68 11 import quopri
jpayne@68 12 from io import BytesIO, StringIO
jpayne@68 13
jpayne@68 14 # Intrapackage imports
jpayne@68 15 from email import utils
jpayne@68 16 from email import errors
jpayne@68 17 from email._policybase import Policy, compat32
jpayne@68 18 from email import charset as _charset
jpayne@68 19 from email._encoded_words import decode_b
jpayne@68 20 Charset = _charset.Charset
jpayne@68 21
jpayne@68 22 SEMISPACE = '; '
jpayne@68 23
jpayne@68 24 # Regular expression that matches `special' characters in parameters, the
jpayne@68 25 # existence of which force quoting of the parameter value.
jpayne@68 26 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
jpayne@68 27
jpayne@68 28
jpayne@68 29 def _splitparam(param):
jpayne@68 30 # Split header parameters. BAW: this may be too simple. It isn't
jpayne@68 31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
jpayne@68 32 # found in the wild. We may eventually need a full fledged parser.
jpayne@68 33 # RDM: we might have a Header here; for now just stringify it.
jpayne@68 34 a, sep, b = str(param).partition(';')
jpayne@68 35 if not sep:
jpayne@68 36 return a.strip(), None
jpayne@68 37 return a.strip(), b.strip()
jpayne@68 38
jpayne@68 39 def _formatparam(param, value=None, quote=True):
jpayne@68 40 """Convenience function to format and return a key=value pair.
jpayne@68 41
jpayne@68 42 This will quote the value if needed or if quote is true. If value is a
jpayne@68 43 three tuple (charset, language, value), it will be encoded according
jpayne@68 44 to RFC2231 rules. If it contains non-ascii characters it will likewise
jpayne@68 45 be encoded according to RFC2231 rules, using the utf-8 charset and
jpayne@68 46 a null language.
jpayne@68 47 """
jpayne@68 48 if value is not None and len(value) > 0:
jpayne@68 49 # A tuple is used for RFC 2231 encoded parameter values where items
jpayne@68 50 # are (charset, language, value). charset is a string, not a Charset
jpayne@68 51 # instance. RFC 2231 encoded values are never quoted, per RFC.
jpayne@68 52 if isinstance(value, tuple):
jpayne@68 53 # Encode as per RFC 2231
jpayne@68 54 param += '*'
jpayne@68 55 value = utils.encode_rfc2231(value[2], value[0], value[1])
jpayne@68 56 return '%s=%s' % (param, value)
jpayne@68 57 else:
jpayne@68 58 try:
jpayne@68 59 value.encode('ascii')
jpayne@68 60 except UnicodeEncodeError:
jpayne@68 61 param += '*'
jpayne@68 62 value = utils.encode_rfc2231(value, 'utf-8', '')
jpayne@68 63 return '%s=%s' % (param, value)
jpayne@68 64 # BAW: Please check this. I think that if quote is set it should
jpayne@68 65 # force quoting even if not necessary.
jpayne@68 66 if quote or tspecials.search(value):
jpayne@68 67 return '%s="%s"' % (param, utils.quote(value))
jpayne@68 68 else:
jpayne@68 69 return '%s=%s' % (param, value)
jpayne@68 70 else:
jpayne@68 71 return param
jpayne@68 72
jpayne@68 73 def _parseparam(s):
jpayne@68 74 # RDM This might be a Header, so for now stringify it.
jpayne@68 75 s = ';' + str(s)
jpayne@68 76 plist = []
jpayne@68 77 while s[:1] == ';':
jpayne@68 78 s = s[1:]
jpayne@68 79 end = s.find(';')
jpayne@68 80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
jpayne@68 81 end = s.find(';', end + 1)
jpayne@68 82 if end < 0:
jpayne@68 83 end = len(s)
jpayne@68 84 f = s[:end]
jpayne@68 85 if '=' in f:
jpayne@68 86 i = f.index('=')
jpayne@68 87 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
jpayne@68 88 plist.append(f.strip())
jpayne@68 89 s = s[end:]
jpayne@68 90 return plist
jpayne@68 91
jpayne@68 92
jpayne@68 93 def _unquotevalue(value):
jpayne@68 94 # This is different than utils.collapse_rfc2231_value() because it doesn't
jpayne@68 95 # try to convert the value to a unicode. Message.get_param() and
jpayne@68 96 # Message.get_params() are both currently defined to return the tuple in
jpayne@68 97 # the face of RFC 2231 parameters.
jpayne@68 98 if isinstance(value, tuple):
jpayne@68 99 return value[0], value[1], utils.unquote(value[2])
jpayne@68 100 else:
jpayne@68 101 return utils.unquote(value)
jpayne@68 102
jpayne@68 103
jpayne@68 104
jpayne@68 105 class Message:
jpayne@68 106 """Basic message object.
jpayne@68 107
jpayne@68 108 A message object is defined as something that has a bunch of RFC 2822
jpayne@68 109 headers and a payload. It may optionally have an envelope header
jpayne@68 110 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
jpayne@68 111 multipart or a message/rfc822), then the payload is a list of Message
jpayne@68 112 objects, otherwise it is a string.
jpayne@68 113
jpayne@68 114 Message objects implement part of the `mapping' interface, which assumes
jpayne@68 115 there is exactly one occurrence of the header per message. Some headers
jpayne@68 116 do in fact appear multiple times (e.g. Received) and for those headers,
jpayne@68 117 you must use the explicit API to set or get all the headers. Not all of
jpayne@68 118 the mapping methods are implemented.
jpayne@68 119 """
jpayne@68 120 def __init__(self, policy=compat32):
jpayne@68 121 self.policy = policy
jpayne@68 122 self._headers = []
jpayne@68 123 self._unixfrom = None
jpayne@68 124 self._payload = None
jpayne@68 125 self._charset = None
jpayne@68 126 # Defaults for multipart messages
jpayne@68 127 self.preamble = self.epilogue = None
jpayne@68 128 self.defects = []
jpayne@68 129 # Default content type
jpayne@68 130 self._default_type = 'text/plain'
jpayne@68 131
jpayne@68 132 def __str__(self):
jpayne@68 133 """Return the entire formatted message as a string.
jpayne@68 134 """
jpayne@68 135 return self.as_string()
jpayne@68 136
jpayne@68 137 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
jpayne@68 138 """Return the entire formatted message as a string.
jpayne@68 139
jpayne@68 140 Optional 'unixfrom', when true, means include the Unix From_ envelope
jpayne@68 141 header. For backward compatibility reasons, if maxheaderlen is
jpayne@68 142 not specified it defaults to 0, so you must override it explicitly
jpayne@68 143 if you want a different maxheaderlen. 'policy' is passed to the
jpayne@68 144 Generator instance used to serialize the mesasge; if it is not
jpayne@68 145 specified the policy associated with the message instance is used.
jpayne@68 146
jpayne@68 147 If the message object contains binary data that is not encoded
jpayne@68 148 according to RFC standards, the non-compliant data will be replaced by
jpayne@68 149 unicode "unknown character" code points.
jpayne@68 150 """
jpayne@68 151 from email.generator import Generator
jpayne@68 152 policy = self.policy if policy is None else policy
jpayne@68 153 fp = StringIO()
jpayne@68 154 g = Generator(fp,
jpayne@68 155 mangle_from_=False,
jpayne@68 156 maxheaderlen=maxheaderlen,
jpayne@68 157 policy=policy)
jpayne@68 158 g.flatten(self, unixfrom=unixfrom)
jpayne@68 159 return fp.getvalue()
jpayne@68 160
jpayne@68 161 def __bytes__(self):
jpayne@68 162 """Return the entire formatted message as a bytes object.
jpayne@68 163 """
jpayne@68 164 return self.as_bytes()
jpayne@68 165
jpayne@68 166 def as_bytes(self, unixfrom=False, policy=None):
jpayne@68 167 """Return the entire formatted message as a bytes object.
jpayne@68 168
jpayne@68 169 Optional 'unixfrom', when true, means include the Unix From_ envelope
jpayne@68 170 header. 'policy' is passed to the BytesGenerator instance used to
jpayne@68 171 serialize the message; if not specified the policy associated with
jpayne@68 172 the message instance is used.
jpayne@68 173 """
jpayne@68 174 from email.generator import BytesGenerator
jpayne@68 175 policy = self.policy if policy is None else policy
jpayne@68 176 fp = BytesIO()
jpayne@68 177 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
jpayne@68 178 g.flatten(self, unixfrom=unixfrom)
jpayne@68 179 return fp.getvalue()
jpayne@68 180
jpayne@68 181 def is_multipart(self):
jpayne@68 182 """Return True if the message consists of multiple parts."""
jpayne@68 183 return isinstance(self._payload, list)
jpayne@68 184
jpayne@68 185 #
jpayne@68 186 # Unix From_ line
jpayne@68 187 #
jpayne@68 188 def set_unixfrom(self, unixfrom):
jpayne@68 189 self._unixfrom = unixfrom
jpayne@68 190
jpayne@68 191 def get_unixfrom(self):
jpayne@68 192 return self._unixfrom
jpayne@68 193
jpayne@68 194 #
jpayne@68 195 # Payload manipulation.
jpayne@68 196 #
jpayne@68 197 def attach(self, payload):
jpayne@68 198 """Add the given payload to the current payload.
jpayne@68 199
jpayne@68 200 The current payload will always be a list of objects after this method
jpayne@68 201 is called. If you want to set the payload to a scalar object, use
jpayne@68 202 set_payload() instead.
jpayne@68 203 """
jpayne@68 204 if self._payload is None:
jpayne@68 205 self._payload = [payload]
jpayne@68 206 else:
jpayne@68 207 try:
jpayne@68 208 self._payload.append(payload)
jpayne@68 209 except AttributeError:
jpayne@68 210 raise TypeError("Attach is not valid on a message with a"
jpayne@68 211 " non-multipart payload")
jpayne@68 212
jpayne@68 213 def get_payload(self, i=None, decode=False):
jpayne@68 214 """Return a reference to the payload.
jpayne@68 215
jpayne@68 216 The payload will either be a list object or a string. If you mutate
jpayne@68 217 the list object, you modify the message's payload in place. Optional
jpayne@68 218 i returns that index into the payload.
jpayne@68 219
jpayne@68 220 Optional decode is a flag indicating whether the payload should be
jpayne@68 221 decoded or not, according to the Content-Transfer-Encoding header
jpayne@68 222 (default is False).
jpayne@68 223
jpayne@68 224 When True and the message is not a multipart, the payload will be
jpayne@68 225 decoded if this header's value is `quoted-printable' or `base64'. If
jpayne@68 226 some other encoding is used, or the header is missing, or if the
jpayne@68 227 payload has bogus data (i.e. bogus base64 or uuencoded data), the
jpayne@68 228 payload is returned as-is.
jpayne@68 229
jpayne@68 230 If the message is a multipart and the decode flag is True, then None
jpayne@68 231 is returned.
jpayne@68 232 """
jpayne@68 233 # Here is the logic table for this code, based on the email5.0.0 code:
jpayne@68 234 # i decode is_multipart result
jpayne@68 235 # ------ ------ ------------ ------------------------------
jpayne@68 236 # None True True None
jpayne@68 237 # i True True None
jpayne@68 238 # None False True _payload (a list)
jpayne@68 239 # i False True _payload element i (a Message)
jpayne@68 240 # i False False error (not a list)
jpayne@68 241 # i True False error (not a list)
jpayne@68 242 # None False False _payload
jpayne@68 243 # None True False _payload decoded (bytes)
jpayne@68 244 # Note that Barry planned to factor out the 'decode' case, but that
jpayne@68 245 # isn't so easy now that we handle the 8 bit data, which needs to be
jpayne@68 246 # converted in both the decode and non-decode path.
jpayne@68 247 if self.is_multipart():
jpayne@68 248 if decode:
jpayne@68 249 return None
jpayne@68 250 if i is None:
jpayne@68 251 return self._payload
jpayne@68 252 else:
jpayne@68 253 return self._payload[i]
jpayne@68 254 # For backward compatibility, Use isinstance and this error message
jpayne@68 255 # instead of the more logical is_multipart test.
jpayne@68 256 if i is not None and not isinstance(self._payload, list):
jpayne@68 257 raise TypeError('Expected list, got %s' % type(self._payload))
jpayne@68 258 payload = self._payload
jpayne@68 259 # cte might be a Header, so for now stringify it.
jpayne@68 260 cte = str(self.get('content-transfer-encoding', '')).lower()
jpayne@68 261 # payload may be bytes here.
jpayne@68 262 if isinstance(payload, str):
jpayne@68 263 if utils._has_surrogates(payload):
jpayne@68 264 bpayload = payload.encode('ascii', 'surrogateescape')
jpayne@68 265 if not decode:
jpayne@68 266 try:
jpayne@68 267 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
jpayne@68 268 except LookupError:
jpayne@68 269 payload = bpayload.decode('ascii', 'replace')
jpayne@68 270 elif decode:
jpayne@68 271 try:
jpayne@68 272 bpayload = payload.encode('ascii')
jpayne@68 273 except UnicodeError:
jpayne@68 274 # This won't happen for RFC compliant messages (messages
jpayne@68 275 # containing only ASCII code points in the unicode input).
jpayne@68 276 # If it does happen, turn the string into bytes in a way
jpayne@68 277 # guaranteed not to fail.
jpayne@68 278 bpayload = payload.encode('raw-unicode-escape')
jpayne@68 279 if not decode:
jpayne@68 280 return payload
jpayne@68 281 if cte == 'quoted-printable':
jpayne@68 282 return quopri.decodestring(bpayload)
jpayne@68 283 elif cte == 'base64':
jpayne@68 284 # XXX: this is a bit of a hack; decode_b should probably be factored
jpayne@68 285 # out somewhere, but I haven't figured out where yet.
jpayne@68 286 value, defects = decode_b(b''.join(bpayload.splitlines()))
jpayne@68 287 for defect in defects:
jpayne@68 288 self.policy.handle_defect(self, defect)
jpayne@68 289 return value
jpayne@68 290 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
jpayne@68 291 in_file = BytesIO(bpayload)
jpayne@68 292 out_file = BytesIO()
jpayne@68 293 try:
jpayne@68 294 uu.decode(in_file, out_file, quiet=True)
jpayne@68 295 return out_file.getvalue()
jpayne@68 296 except uu.Error:
jpayne@68 297 # Some decoding problem
jpayne@68 298 return bpayload
jpayne@68 299 if isinstance(payload, str):
jpayne@68 300 return bpayload
jpayne@68 301 return payload
jpayne@68 302
jpayne@68 303 def set_payload(self, payload, charset=None):
jpayne@68 304 """Set the payload to the given value.
jpayne@68 305
jpayne@68 306 Optional charset sets the message's default character set. See
jpayne@68 307 set_charset() for details.
jpayne@68 308 """
jpayne@68 309 if hasattr(payload, 'encode'):
jpayne@68 310 if charset is None:
jpayne@68 311 self._payload = payload
jpayne@68 312 return
jpayne@68 313 if not isinstance(charset, Charset):
jpayne@68 314 charset = Charset(charset)
jpayne@68 315 payload = payload.encode(charset.output_charset)
jpayne@68 316 if hasattr(payload, 'decode'):
jpayne@68 317 self._payload = payload.decode('ascii', 'surrogateescape')
jpayne@68 318 else:
jpayne@68 319 self._payload = payload
jpayne@68 320 if charset is not None:
jpayne@68 321 self.set_charset(charset)
jpayne@68 322
jpayne@68 323 def set_charset(self, charset):
jpayne@68 324 """Set the charset of the payload to a given character set.
jpayne@68 325
jpayne@68 326 charset can be a Charset instance, a string naming a character set, or
jpayne@68 327 None. If it is a string it will be converted to a Charset instance.
jpayne@68 328 If charset is None, the charset parameter will be removed from the
jpayne@68 329 Content-Type field. Anything else will generate a TypeError.
jpayne@68 330
jpayne@68 331 The message will be assumed to be of type text/* encoded with
jpayne@68 332 charset.input_charset. It will be converted to charset.output_charset
jpayne@68 333 and encoded properly, if needed, when generating the plain text
jpayne@68 334 representation of the message. MIME headers (MIME-Version,
jpayne@68 335 Content-Type, Content-Transfer-Encoding) will be added as needed.
jpayne@68 336 """
jpayne@68 337 if charset is None:
jpayne@68 338 self.del_param('charset')
jpayne@68 339 self._charset = None
jpayne@68 340 return
jpayne@68 341 if not isinstance(charset, Charset):
jpayne@68 342 charset = Charset(charset)
jpayne@68 343 self._charset = charset
jpayne@68 344 if 'MIME-Version' not in self:
jpayne@68 345 self.add_header('MIME-Version', '1.0')
jpayne@68 346 if 'Content-Type' not in self:
jpayne@68 347 self.add_header('Content-Type', 'text/plain',
jpayne@68 348 charset=charset.get_output_charset())
jpayne@68 349 else:
jpayne@68 350 self.set_param('charset', charset.get_output_charset())
jpayne@68 351 if charset != charset.get_output_charset():
jpayne@68 352 self._payload = charset.body_encode(self._payload)
jpayne@68 353 if 'Content-Transfer-Encoding' not in self:
jpayne@68 354 cte = charset.get_body_encoding()
jpayne@68 355 try:
jpayne@68 356 cte(self)
jpayne@68 357 except TypeError:
jpayne@68 358 # This 'if' is for backward compatibility, it allows unicode
jpayne@68 359 # through even though that won't work correctly if the
jpayne@68 360 # message is serialized.
jpayne@68 361 payload = self._payload
jpayne@68 362 if payload:
jpayne@68 363 try:
jpayne@68 364 payload = payload.encode('ascii', 'surrogateescape')
jpayne@68 365 except UnicodeError:
jpayne@68 366 payload = payload.encode(charset.output_charset)
jpayne@68 367 self._payload = charset.body_encode(payload)
jpayne@68 368 self.add_header('Content-Transfer-Encoding', cte)
jpayne@68 369
jpayne@68 370 def get_charset(self):
jpayne@68 371 """Return the Charset instance associated with the message's payload.
jpayne@68 372 """
jpayne@68 373 return self._charset
jpayne@68 374
jpayne@68 375 #
jpayne@68 376 # MAPPING INTERFACE (partial)
jpayne@68 377 #
jpayne@68 378 def __len__(self):
jpayne@68 379 """Return the total number of headers, including duplicates."""
jpayne@68 380 return len(self._headers)
jpayne@68 381
jpayne@68 382 def __getitem__(self, name):
jpayne@68 383 """Get a header value.
jpayne@68 384
jpayne@68 385 Return None if the header is missing instead of raising an exception.
jpayne@68 386
jpayne@68 387 Note that if the header appeared multiple times, exactly which
jpayne@68 388 occurrence gets returned is undefined. Use get_all() to get all
jpayne@68 389 the values matching a header field name.
jpayne@68 390 """
jpayne@68 391 return self.get(name)
jpayne@68 392
jpayne@68 393 def __setitem__(self, name, val):
jpayne@68 394 """Set the value of a header.
jpayne@68 395
jpayne@68 396 Note: this does not overwrite an existing header with the same field
jpayne@68 397 name. Use __delitem__() first to delete any existing headers.
jpayne@68 398 """
jpayne@68 399 max_count = self.policy.header_max_count(name)
jpayne@68 400 if max_count:
jpayne@68 401 lname = name.lower()
jpayne@68 402 found = 0
jpayne@68 403 for k, v in self._headers:
jpayne@68 404 if k.lower() == lname:
jpayne@68 405 found += 1
jpayne@68 406 if found >= max_count:
jpayne@68 407 raise ValueError("There may be at most {} {} headers "
jpayne@68 408 "in a message".format(max_count, name))
jpayne@68 409 self._headers.append(self.policy.header_store_parse(name, val))
jpayne@68 410
jpayne@68 411 def __delitem__(self, name):
jpayne@68 412 """Delete all occurrences of a header, if present.
jpayne@68 413
jpayne@68 414 Does not raise an exception if the header is missing.
jpayne@68 415 """
jpayne@68 416 name = name.lower()
jpayne@68 417 newheaders = []
jpayne@68 418 for k, v in self._headers:
jpayne@68 419 if k.lower() != name:
jpayne@68 420 newheaders.append((k, v))
jpayne@68 421 self._headers = newheaders
jpayne@68 422
jpayne@68 423 def __contains__(self, name):
jpayne@68 424 return name.lower() in [k.lower() for k, v in self._headers]
jpayne@68 425
jpayne@68 426 def __iter__(self):
jpayne@68 427 for field, value in self._headers:
jpayne@68 428 yield field
jpayne@68 429
jpayne@68 430 def keys(self):
jpayne@68 431 """Return a list of all the message's header field names.
jpayne@68 432
jpayne@68 433 These will be sorted in the order they appeared in the original
jpayne@68 434 message, or were added to the message, and may contain duplicates.
jpayne@68 435 Any fields deleted and re-inserted are always appended to the header
jpayne@68 436 list.
jpayne@68 437 """
jpayne@68 438 return [k for k, v in self._headers]
jpayne@68 439
jpayne@68 440 def values(self):
jpayne@68 441 """Return a list of all the message's header values.
jpayne@68 442
jpayne@68 443 These will be sorted in the order they appeared in the original
jpayne@68 444 message, or were added to the message, and may contain duplicates.
jpayne@68 445 Any fields deleted and re-inserted are always appended to the header
jpayne@68 446 list.
jpayne@68 447 """
jpayne@68 448 return [self.policy.header_fetch_parse(k, v)
jpayne@68 449 for k, v in self._headers]
jpayne@68 450
jpayne@68 451 def items(self):
jpayne@68 452 """Get all the message's header fields and values.
jpayne@68 453
jpayne@68 454 These will be sorted in the order they appeared in the original
jpayne@68 455 message, or were added to the message, and may contain duplicates.
jpayne@68 456 Any fields deleted and re-inserted are always appended to the header
jpayne@68 457 list.
jpayne@68 458 """
jpayne@68 459 return [(k, self.policy.header_fetch_parse(k, v))
jpayne@68 460 for k, v in self._headers]
jpayne@68 461
jpayne@68 462 def get(self, name, failobj=None):
jpayne@68 463 """Get a header value.
jpayne@68 464
jpayne@68 465 Like __getitem__() but return failobj instead of None when the field
jpayne@68 466 is missing.
jpayne@68 467 """
jpayne@68 468 name = name.lower()
jpayne@68 469 for k, v in self._headers:
jpayne@68 470 if k.lower() == name:
jpayne@68 471 return self.policy.header_fetch_parse(k, v)
jpayne@68 472 return failobj
jpayne@68 473
jpayne@68 474 #
jpayne@68 475 # "Internal" methods (public API, but only intended for use by a parser
jpayne@68 476 # or generator, not normal application code.
jpayne@68 477 #
jpayne@68 478
jpayne@68 479 def set_raw(self, name, value):
jpayne@68 480 """Store name and value in the model without modification.
jpayne@68 481
jpayne@68 482 This is an "internal" API, intended only for use by a parser.
jpayne@68 483 """
jpayne@68 484 self._headers.append((name, value))
jpayne@68 485
jpayne@68 486 def raw_items(self):
jpayne@68 487 """Return the (name, value) header pairs without modification.
jpayne@68 488
jpayne@68 489 This is an "internal" API, intended only for use by a generator.
jpayne@68 490 """
jpayne@68 491 return iter(self._headers.copy())
jpayne@68 492
jpayne@68 493 #
jpayne@68 494 # Additional useful stuff
jpayne@68 495 #
jpayne@68 496
jpayne@68 497 def get_all(self, name, failobj=None):
jpayne@68 498 """Return a list of all the values for the named field.
jpayne@68 499
jpayne@68 500 These will be sorted in the order they appeared in the original
jpayne@68 501 message, and may contain duplicates. Any fields deleted and
jpayne@68 502 re-inserted are always appended to the header list.
jpayne@68 503
jpayne@68 504 If no such fields exist, failobj is returned (defaults to None).
jpayne@68 505 """
jpayne@68 506 values = []
jpayne@68 507 name = name.lower()
jpayne@68 508 for k, v in self._headers:
jpayne@68 509 if k.lower() == name:
jpayne@68 510 values.append(self.policy.header_fetch_parse(k, v))
jpayne@68 511 if not values:
jpayne@68 512 return failobj
jpayne@68 513 return values
jpayne@68 514
jpayne@68 515 def add_header(self, _name, _value, **_params):
jpayne@68 516 """Extended header setting.
jpayne@68 517
jpayne@68 518 name is the header field to add. keyword arguments can be used to set
jpayne@68 519 additional parameters for the header field, with underscores converted
jpayne@68 520 to dashes. Normally the parameter will be added as key="value" unless
jpayne@68 521 value is None, in which case only the key will be added. If a
jpayne@68 522 parameter value contains non-ASCII characters it can be specified as a
jpayne@68 523 three-tuple of (charset, language, value), in which case it will be
jpayne@68 524 encoded according to RFC2231 rules. Otherwise it will be encoded using
jpayne@68 525 the utf-8 charset and a language of ''.
jpayne@68 526
jpayne@68 527 Examples:
jpayne@68 528
jpayne@68 529 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
jpayne@68 530 msg.add_header('content-disposition', 'attachment',
jpayne@68 531 filename=('utf-8', '', Fußballer.ppt'))
jpayne@68 532 msg.add_header('content-disposition', 'attachment',
jpayne@68 533 filename='Fußballer.ppt'))
jpayne@68 534 """
jpayne@68 535 parts = []
jpayne@68 536 for k, v in _params.items():
jpayne@68 537 if v is None:
jpayne@68 538 parts.append(k.replace('_', '-'))
jpayne@68 539 else:
jpayne@68 540 parts.append(_formatparam(k.replace('_', '-'), v))
jpayne@68 541 if _value is not None:
jpayne@68 542 parts.insert(0, _value)
jpayne@68 543 self[_name] = SEMISPACE.join(parts)
jpayne@68 544
jpayne@68 545 def replace_header(self, _name, _value):
jpayne@68 546 """Replace a header.
jpayne@68 547
jpayne@68 548 Replace the first matching header found in the message, retaining
jpayne@68 549 header order and case. If no matching header was found, a KeyError is
jpayne@68 550 raised.
jpayne@68 551 """
jpayne@68 552 _name = _name.lower()
jpayne@68 553 for i, (k, v) in zip(range(len(self._headers)), self._headers):
jpayne@68 554 if k.lower() == _name:
jpayne@68 555 self._headers[i] = self.policy.header_store_parse(k, _value)
jpayne@68 556 break
jpayne@68 557 else:
jpayne@68 558 raise KeyError(_name)
jpayne@68 559
jpayne@68 560 #
jpayne@68 561 # Use these three methods instead of the three above.
jpayne@68 562 #
jpayne@68 563
jpayne@68 564 def get_content_type(self):
jpayne@68 565 """Return the message's content type.
jpayne@68 566
jpayne@68 567 The returned string is coerced to lower case of the form
jpayne@68 568 `maintype/subtype'. If there was no Content-Type header in the
jpayne@68 569 message, the default type as given by get_default_type() will be
jpayne@68 570 returned. Since according to RFC 2045, messages always have a default
jpayne@68 571 type this will always return a value.
jpayne@68 572
jpayne@68 573 RFC 2045 defines a message's default type to be text/plain unless it
jpayne@68 574 appears inside a multipart/digest container, in which case it would be
jpayne@68 575 message/rfc822.
jpayne@68 576 """
jpayne@68 577 missing = object()
jpayne@68 578 value = self.get('content-type', missing)
jpayne@68 579 if value is missing:
jpayne@68 580 # This should have no parameters
jpayne@68 581 return self.get_default_type()
jpayne@68 582 ctype = _splitparam(value)[0].lower()
jpayne@68 583 # RFC 2045, section 5.2 says if its invalid, use text/plain
jpayne@68 584 if ctype.count('/') != 1:
jpayne@68 585 return 'text/plain'
jpayne@68 586 return ctype
jpayne@68 587
jpayne@68 588 def get_content_maintype(self):
jpayne@68 589 """Return the message's main content type.
jpayne@68 590
jpayne@68 591 This is the `maintype' part of the string returned by
jpayne@68 592 get_content_type().
jpayne@68 593 """
jpayne@68 594 ctype = self.get_content_type()
jpayne@68 595 return ctype.split('/')[0]
jpayne@68 596
jpayne@68 597 def get_content_subtype(self):
jpayne@68 598 """Returns the message's sub-content type.
jpayne@68 599
jpayne@68 600 This is the `subtype' part of the string returned by
jpayne@68 601 get_content_type().
jpayne@68 602 """
jpayne@68 603 ctype = self.get_content_type()
jpayne@68 604 return ctype.split('/')[1]
jpayne@68 605
jpayne@68 606 def get_default_type(self):
jpayne@68 607 """Return the `default' content type.
jpayne@68 608
jpayne@68 609 Most messages have a default content type of text/plain, except for
jpayne@68 610 messages that are subparts of multipart/digest containers. Such
jpayne@68 611 subparts have a default content type of message/rfc822.
jpayne@68 612 """
jpayne@68 613 return self._default_type
jpayne@68 614
jpayne@68 615 def set_default_type(self, ctype):
jpayne@68 616 """Set the `default' content type.
jpayne@68 617
jpayne@68 618 ctype should be either "text/plain" or "message/rfc822", although this
jpayne@68 619 is not enforced. The default content type is not stored in the
jpayne@68 620 Content-Type header.
jpayne@68 621 """
jpayne@68 622 self._default_type = ctype
jpayne@68 623
jpayne@68 624 def _get_params_preserve(self, failobj, header):
jpayne@68 625 # Like get_params() but preserves the quoting of values. BAW:
jpayne@68 626 # should this be part of the public interface?
jpayne@68 627 missing = object()
jpayne@68 628 value = self.get(header, missing)
jpayne@68 629 if value is missing:
jpayne@68 630 return failobj
jpayne@68 631 params = []
jpayne@68 632 for p in _parseparam(value):
jpayne@68 633 try:
jpayne@68 634 name, val = p.split('=', 1)
jpayne@68 635 name = name.strip()
jpayne@68 636 val = val.strip()
jpayne@68 637 except ValueError:
jpayne@68 638 # Must have been a bare attribute
jpayne@68 639 name = p.strip()
jpayne@68 640 val = ''
jpayne@68 641 params.append((name, val))
jpayne@68 642 params = utils.decode_params(params)
jpayne@68 643 return params
jpayne@68 644
jpayne@68 645 def get_params(self, failobj=None, header='content-type', unquote=True):
jpayne@68 646 """Return the message's Content-Type parameters, as a list.
jpayne@68 647
jpayne@68 648 The elements of the returned list are 2-tuples of key/value pairs, as
jpayne@68 649 split on the `=' sign. The left hand side of the `=' is the key,
jpayne@68 650 while the right hand side is the value. If there is no `=' sign in
jpayne@68 651 the parameter the value is the empty string. The value is as
jpayne@68 652 described in the get_param() method.
jpayne@68 653
jpayne@68 654 Optional failobj is the object to return if there is no Content-Type
jpayne@68 655 header. Optional header is the header to search instead of
jpayne@68 656 Content-Type. If unquote is True, the value is unquoted.
jpayne@68 657 """
jpayne@68 658 missing = object()
jpayne@68 659 params = self._get_params_preserve(missing, header)
jpayne@68 660 if params is missing:
jpayne@68 661 return failobj
jpayne@68 662 if unquote:
jpayne@68 663 return [(k, _unquotevalue(v)) for k, v in params]
jpayne@68 664 else:
jpayne@68 665 return params
jpayne@68 666
jpayne@68 667 def get_param(self, param, failobj=None, header='content-type',
jpayne@68 668 unquote=True):
jpayne@68 669 """Return the parameter value if found in the Content-Type header.
jpayne@68 670
jpayne@68 671 Optional failobj is the object to return if there is no Content-Type
jpayne@68 672 header, or the Content-Type header has no such parameter. Optional
jpayne@68 673 header is the header to search instead of Content-Type.
jpayne@68 674
jpayne@68 675 Parameter keys are always compared case insensitively. The return
jpayne@68 676 value can either be a string, or a 3-tuple if the parameter was RFC
jpayne@68 677 2231 encoded. When it's a 3-tuple, the elements of the value are of
jpayne@68 678 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
jpayne@68 679 LANGUAGE can be None, in which case you should consider VALUE to be
jpayne@68 680 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
jpayne@68 681 The parameter value (either the returned string, or the VALUE item in
jpayne@68 682 the 3-tuple) is always unquoted, unless unquote is set to False.
jpayne@68 683
jpayne@68 684 If your application doesn't care whether the parameter was RFC 2231
jpayne@68 685 encoded, it can turn the return value into a string as follows:
jpayne@68 686
jpayne@68 687 rawparam = msg.get_param('foo')
jpayne@68 688 param = email.utils.collapse_rfc2231_value(rawparam)
jpayne@68 689
jpayne@68 690 """
jpayne@68 691 if header not in self:
jpayne@68 692 return failobj
jpayne@68 693 for k, v in self._get_params_preserve(failobj, header):
jpayne@68 694 if k.lower() == param.lower():
jpayne@68 695 if unquote:
jpayne@68 696 return _unquotevalue(v)
jpayne@68 697 else:
jpayne@68 698 return v
jpayne@68 699 return failobj
jpayne@68 700
jpayne@68 701 def set_param(self, param, value, header='Content-Type', requote=True,
jpayne@68 702 charset=None, language='', replace=False):
jpayne@68 703 """Set a parameter in the Content-Type header.
jpayne@68 704
jpayne@68 705 If the parameter already exists in the header, its value will be
jpayne@68 706 replaced with the new value.
jpayne@68 707
jpayne@68 708 If header is Content-Type and has not yet been defined for this
jpayne@68 709 message, it will be set to "text/plain" and the new parameter and
jpayne@68 710 value will be appended as per RFC 2045.
jpayne@68 711
jpayne@68 712 An alternate header can be specified in the header argument, and all
jpayne@68 713 parameters will be quoted as necessary unless requote is False.
jpayne@68 714
jpayne@68 715 If charset is specified, the parameter will be encoded according to RFC
jpayne@68 716 2231. Optional language specifies the RFC 2231 language, defaulting
jpayne@68 717 to the empty string. Both charset and language should be strings.
jpayne@68 718 """
jpayne@68 719 if not isinstance(value, tuple) and charset:
jpayne@68 720 value = (charset, language, value)
jpayne@68 721
jpayne@68 722 if header not in self and header.lower() == 'content-type':
jpayne@68 723 ctype = 'text/plain'
jpayne@68 724 else:
jpayne@68 725 ctype = self.get(header)
jpayne@68 726 if not self.get_param(param, header=header):
jpayne@68 727 if not ctype:
jpayne@68 728 ctype = _formatparam(param, value, requote)
jpayne@68 729 else:
jpayne@68 730 ctype = SEMISPACE.join(
jpayne@68 731 [ctype, _formatparam(param, value, requote)])
jpayne@68 732 else:
jpayne@68 733 ctype = ''
jpayne@68 734 for old_param, old_value in self.get_params(header=header,
jpayne@68 735 unquote=requote):
jpayne@68 736 append_param = ''
jpayne@68 737 if old_param.lower() == param.lower():
jpayne@68 738 append_param = _formatparam(param, value, requote)
jpayne@68 739 else:
jpayne@68 740 append_param = _formatparam(old_param, old_value, requote)
jpayne@68 741 if not ctype:
jpayne@68 742 ctype = append_param
jpayne@68 743 else:
jpayne@68 744 ctype = SEMISPACE.join([ctype, append_param])
jpayne@68 745 if ctype != self.get(header):
jpayne@68 746 if replace:
jpayne@68 747 self.replace_header(header, ctype)
jpayne@68 748 else:
jpayne@68 749 del self[header]
jpayne@68 750 self[header] = ctype
jpayne@68 751
jpayne@68 752 def del_param(self, param, header='content-type', requote=True):
jpayne@68 753 """Remove the given parameter completely from the Content-Type header.
jpayne@68 754
jpayne@68 755 The header will be re-written in place without the parameter or its
jpayne@68 756 value. All values will be quoted as necessary unless requote is
jpayne@68 757 False. Optional header specifies an alternative to the Content-Type
jpayne@68 758 header.
jpayne@68 759 """
jpayne@68 760 if header not in self:
jpayne@68 761 return
jpayne@68 762 new_ctype = ''
jpayne@68 763 for p, v in self.get_params(header=header, unquote=requote):
jpayne@68 764 if p.lower() != param.lower():
jpayne@68 765 if not new_ctype:
jpayne@68 766 new_ctype = _formatparam(p, v, requote)
jpayne@68 767 else:
jpayne@68 768 new_ctype = SEMISPACE.join([new_ctype,
jpayne@68 769 _formatparam(p, v, requote)])
jpayne@68 770 if new_ctype != self.get(header):
jpayne@68 771 del self[header]
jpayne@68 772 self[header] = new_ctype
jpayne@68 773
jpayne@68 774 def set_type(self, type, header='Content-Type', requote=True):
jpayne@68 775 """Set the main type and subtype for the Content-Type header.
jpayne@68 776
jpayne@68 777 type must be a string in the form "maintype/subtype", otherwise a
jpayne@68 778 ValueError is raised.
jpayne@68 779
jpayne@68 780 This method replaces the Content-Type header, keeping all the
jpayne@68 781 parameters in place. If requote is False, this leaves the existing
jpayne@68 782 header's quoting as is. Otherwise, the parameters will be quoted (the
jpayne@68 783 default).
jpayne@68 784
jpayne@68 785 An alternative header can be specified in the header argument. When
jpayne@68 786 the Content-Type header is set, we'll always also add a MIME-Version
jpayne@68 787 header.
jpayne@68 788 """
jpayne@68 789 # BAW: should we be strict?
jpayne@68 790 if not type.count('/') == 1:
jpayne@68 791 raise ValueError
jpayne@68 792 # Set the Content-Type, you get a MIME-Version
jpayne@68 793 if header.lower() == 'content-type':
jpayne@68 794 del self['mime-version']
jpayne@68 795 self['MIME-Version'] = '1.0'
jpayne@68 796 if header not in self:
jpayne@68 797 self[header] = type
jpayne@68 798 return
jpayne@68 799 params = self.get_params(header=header, unquote=requote)
jpayne@68 800 del self[header]
jpayne@68 801 self[header] = type
jpayne@68 802 # Skip the first param; it's the old type.
jpayne@68 803 for p, v in params[1:]:
jpayne@68 804 self.set_param(p, v, header, requote)
jpayne@68 805
jpayne@68 806 def get_filename(self, failobj=None):
jpayne@68 807 """Return the filename associated with the payload if present.
jpayne@68 808
jpayne@68 809 The filename is extracted from the Content-Disposition header's
jpayne@68 810 `filename' parameter, and it is unquoted. If that header is missing
jpayne@68 811 the `filename' parameter, this method falls back to looking for the
jpayne@68 812 `name' parameter.
jpayne@68 813 """
jpayne@68 814 missing = object()
jpayne@68 815 filename = self.get_param('filename', missing, 'content-disposition')
jpayne@68 816 if filename is missing:
jpayne@68 817 filename = self.get_param('name', missing, 'content-type')
jpayne@68 818 if filename is missing:
jpayne@68 819 return failobj
jpayne@68 820 return utils.collapse_rfc2231_value(filename).strip()
jpayne@68 821
jpayne@68 822 def get_boundary(self, failobj=None):
jpayne@68 823 """Return the boundary associated with the payload if present.
jpayne@68 824
jpayne@68 825 The boundary is extracted from the Content-Type header's `boundary'
jpayne@68 826 parameter, and it is unquoted.
jpayne@68 827 """
jpayne@68 828 missing = object()
jpayne@68 829 boundary = self.get_param('boundary', missing)
jpayne@68 830 if boundary is missing:
jpayne@68 831 return failobj
jpayne@68 832 # RFC 2046 says that boundaries may begin but not end in w/s
jpayne@68 833 return utils.collapse_rfc2231_value(boundary).rstrip()
jpayne@68 834
jpayne@68 835 def set_boundary(self, boundary):
jpayne@68 836 """Set the boundary parameter in Content-Type to 'boundary'.
jpayne@68 837
jpayne@68 838 This is subtly different than deleting the Content-Type header and
jpayne@68 839 adding a new one with a new boundary parameter via add_header(). The
jpayne@68 840 main difference is that using the set_boundary() method preserves the
jpayne@68 841 order of the Content-Type header in the original message.
jpayne@68 842
jpayne@68 843 HeaderParseError is raised if the message has no Content-Type header.
jpayne@68 844 """
jpayne@68 845 missing = object()
jpayne@68 846 params = self._get_params_preserve(missing, 'content-type')
jpayne@68 847 if params is missing:
jpayne@68 848 # There was no Content-Type header, and we don't know what type
jpayne@68 849 # to set it to, so raise an exception.
jpayne@68 850 raise errors.HeaderParseError('No Content-Type header found')
jpayne@68 851 newparams = []
jpayne@68 852 foundp = False
jpayne@68 853 for pk, pv in params:
jpayne@68 854 if pk.lower() == 'boundary':
jpayne@68 855 newparams.append(('boundary', '"%s"' % boundary))
jpayne@68 856 foundp = True
jpayne@68 857 else:
jpayne@68 858 newparams.append((pk, pv))
jpayne@68 859 if not foundp:
jpayne@68 860 # The original Content-Type header had no boundary attribute.
jpayne@68 861 # Tack one on the end. BAW: should we raise an exception
jpayne@68 862 # instead???
jpayne@68 863 newparams.append(('boundary', '"%s"' % boundary))
jpayne@68 864 # Replace the existing Content-Type header with the new value
jpayne@68 865 newheaders = []
jpayne@68 866 for h, v in self._headers:
jpayne@68 867 if h.lower() == 'content-type':
jpayne@68 868 parts = []
jpayne@68 869 for k, v in newparams:
jpayne@68 870 if v == '':
jpayne@68 871 parts.append(k)
jpayne@68 872 else:
jpayne@68 873 parts.append('%s=%s' % (k, v))
jpayne@68 874 val = SEMISPACE.join(parts)
jpayne@68 875 newheaders.append(self.policy.header_store_parse(h, val))
jpayne@68 876
jpayne@68 877 else:
jpayne@68 878 newheaders.append((h, v))
jpayne@68 879 self._headers = newheaders
jpayne@68 880
jpayne@68 881 def get_content_charset(self, failobj=None):
jpayne@68 882 """Return the charset parameter of the Content-Type header.
jpayne@68 883
jpayne@68 884 The returned string is always coerced to lower case. If there is no
jpayne@68 885 Content-Type header, or if that header has no charset parameter,
jpayne@68 886 failobj is returned.
jpayne@68 887 """
jpayne@68 888 missing = object()
jpayne@68 889 charset = self.get_param('charset', missing)
jpayne@68 890 if charset is missing:
jpayne@68 891 return failobj
jpayne@68 892 if isinstance(charset, tuple):
jpayne@68 893 # RFC 2231 encoded, so decode it, and it better end up as ascii.
jpayne@68 894 pcharset = charset[0] or 'us-ascii'
jpayne@68 895 try:
jpayne@68 896 # LookupError will be raised if the charset isn't known to
jpayne@68 897 # Python. UnicodeError will be raised if the encoded text
jpayne@68 898 # contains a character not in the charset.
jpayne@68 899 as_bytes = charset[2].encode('raw-unicode-escape')
jpayne@68 900 charset = str(as_bytes, pcharset)
jpayne@68 901 except (LookupError, UnicodeError):
jpayne@68 902 charset = charset[2]
jpayne@68 903 # charset characters must be in us-ascii range
jpayne@68 904 try:
jpayne@68 905 charset.encode('us-ascii')
jpayne@68 906 except UnicodeError:
jpayne@68 907 return failobj
jpayne@68 908 # RFC 2046, $4.1.2 says charsets are not case sensitive
jpayne@68 909 return charset.lower()
jpayne@68 910
jpayne@68 911 def get_charsets(self, failobj=None):
jpayne@68 912 """Return a list containing the charset(s) used in this message.
jpayne@68 913
jpayne@68 914 The returned list of items describes the Content-Type headers'
jpayne@68 915 charset parameter for this message and all the subparts in its
jpayne@68 916 payload.
jpayne@68 917
jpayne@68 918 Each item will either be a string (the value of the charset parameter
jpayne@68 919 in the Content-Type header of that part) or the value of the
jpayne@68 920 'failobj' parameter (defaults to None), if the part does not have a
jpayne@68 921 main MIME type of "text", or the charset is not defined.
jpayne@68 922
jpayne@68 923 The list will contain one string for each part of the message, plus
jpayne@68 924 one for the container message (i.e. self), so that a non-multipart
jpayne@68 925 message will still return a list of length 1.
jpayne@68 926 """
jpayne@68 927 return [part.get_content_charset(failobj) for part in self.walk()]
jpayne@68 928
jpayne@68 929 def get_content_disposition(self):
jpayne@68 930 """Return the message's content-disposition if it exists, or None.
jpayne@68 931
jpayne@68 932 The return values can be either 'inline', 'attachment' or None
jpayne@68 933 according to the rfc2183.
jpayne@68 934 """
jpayne@68 935 value = self.get('content-disposition')
jpayne@68 936 if value is None:
jpayne@68 937 return None
jpayne@68 938 c_d = _splitparam(value)[0].lower()
jpayne@68 939 return c_d
jpayne@68 940
jpayne@68 941 # I.e. def walk(self): ...
jpayne@68 942 from email.iterators import walk
jpayne@68 943
jpayne@68 944
jpayne@68 945 class MIMEPart(Message):
jpayne@68 946
jpayne@68 947 def __init__(self, policy=None):
jpayne@68 948 if policy is None:
jpayne@68 949 from email.policy import default
jpayne@68 950 policy = default
jpayne@68 951 Message.__init__(self, policy)
jpayne@68 952
jpayne@68 953
jpayne@68 954 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
jpayne@68 955 """Return the entire formatted message as a string.
jpayne@68 956
jpayne@68 957 Optional 'unixfrom', when true, means include the Unix From_ envelope
jpayne@68 958 header. maxheaderlen is retained for backward compatibility with the
jpayne@68 959 base Message class, but defaults to None, meaning that the policy value
jpayne@68 960 for max_line_length controls the header maximum length. 'policy' is
jpayne@68 961 passed to the Generator instance used to serialize the mesasge; if it
jpayne@68 962 is not specified the policy associated with the message instance is
jpayne@68 963 used.
jpayne@68 964 """
jpayne@68 965 policy = self.policy if policy is None else policy
jpayne@68 966 if maxheaderlen is None:
jpayne@68 967 maxheaderlen = policy.max_line_length
jpayne@68 968 return super().as_string(maxheaderlen=maxheaderlen, policy=policy)
jpayne@68 969
jpayne@68 970 def __str__(self):
jpayne@68 971 return self.as_string(policy=self.policy.clone(utf8=True))
jpayne@68 972
jpayne@68 973 def is_attachment(self):
jpayne@68 974 c_d = self.get('content-disposition')
jpayne@68 975 return False if c_d is None else c_d.content_disposition == 'attachment'
jpayne@68 976
jpayne@68 977 def _find_body(self, part, preferencelist):
jpayne@68 978 if part.is_attachment():
jpayne@68 979 return
jpayne@68 980 maintype, subtype = part.get_content_type().split('/')
jpayne@68 981 if maintype == 'text':
jpayne@68 982 if subtype in preferencelist:
jpayne@68 983 yield (preferencelist.index(subtype), part)
jpayne@68 984 return
jpayne@68 985 if maintype != 'multipart':
jpayne@68 986 return
jpayne@68 987 if subtype != 'related':
jpayne@68 988 for subpart in part.iter_parts():
jpayne@68 989 yield from self._find_body(subpart, preferencelist)
jpayne@68 990 return
jpayne@68 991 if 'related' in preferencelist:
jpayne@68 992 yield (preferencelist.index('related'), part)
jpayne@68 993 candidate = None
jpayne@68 994 start = part.get_param('start')
jpayne@68 995 if start:
jpayne@68 996 for subpart in part.iter_parts():
jpayne@68 997 if subpart['content-id'] == start:
jpayne@68 998 candidate = subpart
jpayne@68 999 break
jpayne@68 1000 if candidate is None:
jpayne@68 1001 subparts = part.get_payload()
jpayne@68 1002 candidate = subparts[0] if subparts else None
jpayne@68 1003 if candidate is not None:
jpayne@68 1004 yield from self._find_body(candidate, preferencelist)
jpayne@68 1005
jpayne@68 1006 def get_body(self, preferencelist=('related', 'html', 'plain')):
jpayne@68 1007 """Return best candidate mime part for display as 'body' of message.
jpayne@68 1008
jpayne@68 1009 Do a depth first search, starting with self, looking for the first part
jpayne@68 1010 matching each of the items in preferencelist, and return the part
jpayne@68 1011 corresponding to the first item that has a match, or None if no items
jpayne@68 1012 have a match. If 'related' is not included in preferencelist, consider
jpayne@68 1013 the root part of any multipart/related encountered as a candidate
jpayne@68 1014 match. Ignore parts with 'Content-Disposition: attachment'.
jpayne@68 1015 """
jpayne@68 1016 best_prio = len(preferencelist)
jpayne@68 1017 body = None
jpayne@68 1018 for prio, part in self._find_body(self, preferencelist):
jpayne@68 1019 if prio < best_prio:
jpayne@68 1020 best_prio = prio
jpayne@68 1021 body = part
jpayne@68 1022 if prio == 0:
jpayne@68 1023 break
jpayne@68 1024 return body
jpayne@68 1025
jpayne@68 1026 _body_types = {('text', 'plain'),
jpayne@68 1027 ('text', 'html'),
jpayne@68 1028 ('multipart', 'related'),
jpayne@68 1029 ('multipart', 'alternative')}
jpayne@68 1030 def iter_attachments(self):
jpayne@68 1031 """Return an iterator over the non-main parts of a multipart.
jpayne@68 1032
jpayne@68 1033 Skip the first of each occurrence of text/plain, text/html,
jpayne@68 1034 multipart/related, or multipart/alternative in the multipart (unless
jpayne@68 1035 they have a 'Content-Disposition: attachment' header) and include all
jpayne@68 1036 remaining subparts in the returned iterator. When applied to a
jpayne@68 1037 multipart/related, return all parts except the root part. Return an
jpayne@68 1038 empty iterator when applied to a multipart/alternative or a
jpayne@68 1039 non-multipart.
jpayne@68 1040 """
jpayne@68 1041 maintype, subtype = self.get_content_type().split('/')
jpayne@68 1042 if maintype != 'multipart' or subtype == 'alternative':
jpayne@68 1043 return
jpayne@68 1044 payload = self.get_payload()
jpayne@68 1045 # Certain malformed messages can have content type set to `multipart/*`
jpayne@68 1046 # but still have single part body, in which case payload.copy() can
jpayne@68 1047 # fail with AttributeError.
jpayne@68 1048 try:
jpayne@68 1049 parts = payload.copy()
jpayne@68 1050 except AttributeError:
jpayne@68 1051 # payload is not a list, it is most probably a string.
jpayne@68 1052 return
jpayne@68 1053
jpayne@68 1054 if maintype == 'multipart' and subtype == 'related':
jpayne@68 1055 # For related, we treat everything but the root as an attachment.
jpayne@68 1056 # The root may be indicated by 'start'; if there's no start or we
jpayne@68 1057 # can't find the named start, treat the first subpart as the root.
jpayne@68 1058 start = self.get_param('start')
jpayne@68 1059 if start:
jpayne@68 1060 found = False
jpayne@68 1061 attachments = []
jpayne@68 1062 for part in parts:
jpayne@68 1063 if part.get('content-id') == start:
jpayne@68 1064 found = True
jpayne@68 1065 else:
jpayne@68 1066 attachments.append(part)
jpayne@68 1067 if found:
jpayne@68 1068 yield from attachments
jpayne@68 1069 return
jpayne@68 1070 parts.pop(0)
jpayne@68 1071 yield from parts
jpayne@68 1072 return
jpayne@68 1073 # Otherwise we more or less invert the remaining logic in get_body.
jpayne@68 1074 # This only really works in edge cases (ex: non-text related or
jpayne@68 1075 # alternatives) if the sending agent sets content-disposition.
jpayne@68 1076 seen = [] # Only skip the first example of each candidate type.
jpayne@68 1077 for part in parts:
jpayne@68 1078 maintype, subtype = part.get_content_type().split('/')
jpayne@68 1079 if ((maintype, subtype) in self._body_types and
jpayne@68 1080 not part.is_attachment() and subtype not in seen):
jpayne@68 1081 seen.append(subtype)
jpayne@68 1082 continue
jpayne@68 1083 yield part
jpayne@68 1084
jpayne@68 1085 def iter_parts(self):
jpayne@68 1086 """Return an iterator over all immediate subparts of a multipart.
jpayne@68 1087
jpayne@68 1088 Return an empty iterator for a non-multipart.
jpayne@68 1089 """
jpayne@68 1090 if self.get_content_maintype() == 'multipart':
jpayne@68 1091 yield from self.get_payload()
jpayne@68 1092
jpayne@68 1093 def get_content(self, *args, content_manager=None, **kw):
jpayne@68 1094 if content_manager is None:
jpayne@68 1095 content_manager = self.policy.content_manager
jpayne@68 1096 return content_manager.get_content(self, *args, **kw)
jpayne@68 1097
jpayne@68 1098 def set_content(self, *args, content_manager=None, **kw):
jpayne@68 1099 if content_manager is None:
jpayne@68 1100 content_manager = self.policy.content_manager
jpayne@68 1101 content_manager.set_content(self, *args, **kw)
jpayne@68 1102
jpayne@68 1103 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
jpayne@68 1104 if self.get_content_maintype() == 'multipart':
jpayne@68 1105 existing_subtype = self.get_content_subtype()
jpayne@68 1106 disallowed_subtypes = disallowed_subtypes + (subtype,)
jpayne@68 1107 if existing_subtype in disallowed_subtypes:
jpayne@68 1108 raise ValueError("Cannot convert {} to {}".format(
jpayne@68 1109 existing_subtype, subtype))
jpayne@68 1110 keep_headers = []
jpayne@68 1111 part_headers = []
jpayne@68 1112 for name, value in self._headers:
jpayne@68 1113 if name.lower().startswith('content-'):
jpayne@68 1114 part_headers.append((name, value))
jpayne@68 1115 else:
jpayne@68 1116 keep_headers.append((name, value))
jpayne@68 1117 if part_headers:
jpayne@68 1118 # There is existing content, move it to the first subpart.
jpayne@68 1119 part = type(self)(policy=self.policy)
jpayne@68 1120 part._headers = part_headers
jpayne@68 1121 part._payload = self._payload
jpayne@68 1122 self._payload = [part]
jpayne@68 1123 else:
jpayne@68 1124 self._payload = []
jpayne@68 1125 self._headers = keep_headers
jpayne@68 1126 self['Content-Type'] = 'multipart/' + subtype
jpayne@68 1127 if boundary is not None:
jpayne@68 1128 self.set_param('boundary', boundary)
jpayne@68 1129
jpayne@68 1130 def make_related(self, boundary=None):
jpayne@68 1131 self._make_multipart('related', ('alternative', 'mixed'), boundary)
jpayne@68 1132
jpayne@68 1133 def make_alternative(self, boundary=None):
jpayne@68 1134 self._make_multipart('alternative', ('mixed',), boundary)
jpayne@68 1135
jpayne@68 1136 def make_mixed(self, boundary=None):
jpayne@68 1137 self._make_multipart('mixed', (), boundary)
jpayne@68 1138
jpayne@68 1139 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
jpayne@68 1140 if (self.get_content_maintype() != 'multipart' or
jpayne@68 1141 self.get_content_subtype() != _subtype):
jpayne@68 1142 getattr(self, 'make_' + _subtype)()
jpayne@68 1143 part = type(self)(policy=self.policy)
jpayne@68 1144 part.set_content(*args, **kw)
jpayne@68 1145 if _disp and 'content-disposition' not in part:
jpayne@68 1146 part['Content-Disposition'] = _disp
jpayne@68 1147 self.attach(part)
jpayne@68 1148
jpayne@68 1149 def add_related(self, *args, **kw):
jpayne@68 1150 self._add_multipart('related', *args, _disp='inline', **kw)
jpayne@68 1151
jpayne@68 1152 def add_alternative(self, *args, **kw):
jpayne@68 1153 self._add_multipart('alternative', *args, **kw)
jpayne@68 1154
jpayne@68 1155 def add_attachment(self, *args, **kw):
jpayne@68 1156 self._add_multipart('mixed', *args, _disp='attachment', **kw)
jpayne@68 1157
jpayne@68 1158 def clear(self):
jpayne@68 1159 self._headers = []
jpayne@68 1160 self._payload = None
jpayne@68 1161
jpayne@68 1162 def clear_content(self):
jpayne@68 1163 self._headers = [(n, v) for n, v in self._headers
jpayne@68 1164 if not n.lower().startswith('content-')]
jpayne@68 1165 self._payload = None
jpayne@68 1166
jpayne@68 1167
jpayne@68 1168 class EmailMessage(MIMEPart):
jpayne@68 1169
jpayne@68 1170 def set_content(self, *args, **kw):
jpayne@68 1171 super().set_content(*args, **kw)
jpayne@68 1172 if 'MIME-Version' not in self:
jpayne@68 1173 self['MIME-Version'] = '1.0'