jpayne@68
|
1 # Copyright (C) 2001-2007 Python Software Foundation
|
jpayne@68
|
2 # Author: Barry Warsaw
|
jpayne@68
|
3 # Contact: email-sig@python.org
|
jpayne@68
|
4
|
jpayne@68
|
5 """Basic message object for the email package object model."""
|
jpayne@68
|
6
|
jpayne@68
|
7 __all__ = ['Message', 'EmailMessage']
|
jpayne@68
|
8
|
jpayne@68
|
9 import re
|
jpayne@68
|
10 import uu
|
jpayne@68
|
11 import quopri
|
jpayne@68
|
12 from io import BytesIO, StringIO
|
jpayne@68
|
13
|
jpayne@68
|
14 # Intrapackage imports
|
jpayne@68
|
15 from email import utils
|
jpayne@68
|
16 from email import errors
|
jpayne@68
|
17 from email._policybase import Policy, compat32
|
jpayne@68
|
18 from email import charset as _charset
|
jpayne@68
|
19 from email._encoded_words import decode_b
|
jpayne@68
|
20 Charset = _charset.Charset
|
jpayne@68
|
21
|
jpayne@68
|
22 SEMISPACE = '; '
|
jpayne@68
|
23
|
jpayne@68
|
24 # Regular expression that matches `special' characters in parameters, the
|
jpayne@68
|
25 # existence of which force quoting of the parameter value.
|
jpayne@68
|
26 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
|
jpayne@68
|
27
|
jpayne@68
|
28
|
jpayne@68
|
29 def _splitparam(param):
|
jpayne@68
|
30 # Split header parameters. BAW: this may be too simple. It isn't
|
jpayne@68
|
31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
|
jpayne@68
|
32 # found in the wild. We may eventually need a full fledged parser.
|
jpayne@68
|
33 # RDM: we might have a Header here; for now just stringify it.
|
jpayne@68
|
34 a, sep, b = str(param).partition(';')
|
jpayne@68
|
35 if not sep:
|
jpayne@68
|
36 return a.strip(), None
|
jpayne@68
|
37 return a.strip(), b.strip()
|
jpayne@68
|
38
|
jpayne@68
|
39 def _formatparam(param, value=None, quote=True):
|
jpayne@68
|
40 """Convenience function to format and return a key=value pair.
|
jpayne@68
|
41
|
jpayne@68
|
42 This will quote the value if needed or if quote is true. If value is a
|
jpayne@68
|
43 three tuple (charset, language, value), it will be encoded according
|
jpayne@68
|
44 to RFC2231 rules. If it contains non-ascii characters it will likewise
|
jpayne@68
|
45 be encoded according to RFC2231 rules, using the utf-8 charset and
|
jpayne@68
|
46 a null language.
|
jpayne@68
|
47 """
|
jpayne@68
|
48 if value is not None and len(value) > 0:
|
jpayne@68
|
49 # A tuple is used for RFC 2231 encoded parameter values where items
|
jpayne@68
|
50 # are (charset, language, value). charset is a string, not a Charset
|
jpayne@68
|
51 # instance. RFC 2231 encoded values are never quoted, per RFC.
|
jpayne@68
|
52 if isinstance(value, tuple):
|
jpayne@68
|
53 # Encode as per RFC 2231
|
jpayne@68
|
54 param += '*'
|
jpayne@68
|
55 value = utils.encode_rfc2231(value[2], value[0], value[1])
|
jpayne@68
|
56 return '%s=%s' % (param, value)
|
jpayne@68
|
57 else:
|
jpayne@68
|
58 try:
|
jpayne@68
|
59 value.encode('ascii')
|
jpayne@68
|
60 except UnicodeEncodeError:
|
jpayne@68
|
61 param += '*'
|
jpayne@68
|
62 value = utils.encode_rfc2231(value, 'utf-8', '')
|
jpayne@68
|
63 return '%s=%s' % (param, value)
|
jpayne@68
|
64 # BAW: Please check this. I think that if quote is set it should
|
jpayne@68
|
65 # force quoting even if not necessary.
|
jpayne@68
|
66 if quote or tspecials.search(value):
|
jpayne@68
|
67 return '%s="%s"' % (param, utils.quote(value))
|
jpayne@68
|
68 else:
|
jpayne@68
|
69 return '%s=%s' % (param, value)
|
jpayne@68
|
70 else:
|
jpayne@68
|
71 return param
|
jpayne@68
|
72
|
jpayne@68
|
73 def _parseparam(s):
|
jpayne@68
|
74 # RDM This might be a Header, so for now stringify it.
|
jpayne@68
|
75 s = ';' + str(s)
|
jpayne@68
|
76 plist = []
|
jpayne@68
|
77 while s[:1] == ';':
|
jpayne@68
|
78 s = s[1:]
|
jpayne@68
|
79 end = s.find(';')
|
jpayne@68
|
80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
|
jpayne@68
|
81 end = s.find(';', end + 1)
|
jpayne@68
|
82 if end < 0:
|
jpayne@68
|
83 end = len(s)
|
jpayne@68
|
84 f = s[:end]
|
jpayne@68
|
85 if '=' in f:
|
jpayne@68
|
86 i = f.index('=')
|
jpayne@68
|
87 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
|
jpayne@68
|
88 plist.append(f.strip())
|
jpayne@68
|
89 s = s[end:]
|
jpayne@68
|
90 return plist
|
jpayne@68
|
91
|
jpayne@68
|
92
|
jpayne@68
|
93 def _unquotevalue(value):
|
jpayne@68
|
94 # This is different than utils.collapse_rfc2231_value() because it doesn't
|
jpayne@68
|
95 # try to convert the value to a unicode. Message.get_param() and
|
jpayne@68
|
96 # Message.get_params() are both currently defined to return the tuple in
|
jpayne@68
|
97 # the face of RFC 2231 parameters.
|
jpayne@68
|
98 if isinstance(value, tuple):
|
jpayne@68
|
99 return value[0], value[1], utils.unquote(value[2])
|
jpayne@68
|
100 else:
|
jpayne@68
|
101 return utils.unquote(value)
|
jpayne@68
|
102
|
jpayne@68
|
103
|
jpayne@68
|
104
|
jpayne@68
|
105 class Message:
|
jpayne@68
|
106 """Basic message object.
|
jpayne@68
|
107
|
jpayne@68
|
108 A message object is defined as something that has a bunch of RFC 2822
|
jpayne@68
|
109 headers and a payload. It may optionally have an envelope header
|
jpayne@68
|
110 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
|
jpayne@68
|
111 multipart or a message/rfc822), then the payload is a list of Message
|
jpayne@68
|
112 objects, otherwise it is a string.
|
jpayne@68
|
113
|
jpayne@68
|
114 Message objects implement part of the `mapping' interface, which assumes
|
jpayne@68
|
115 there is exactly one occurrence of the header per message. Some headers
|
jpayne@68
|
116 do in fact appear multiple times (e.g. Received) and for those headers,
|
jpayne@68
|
117 you must use the explicit API to set or get all the headers. Not all of
|
jpayne@68
|
118 the mapping methods are implemented.
|
jpayne@68
|
119 """
|
jpayne@68
|
120 def __init__(self, policy=compat32):
|
jpayne@68
|
121 self.policy = policy
|
jpayne@68
|
122 self._headers = []
|
jpayne@68
|
123 self._unixfrom = None
|
jpayne@68
|
124 self._payload = None
|
jpayne@68
|
125 self._charset = None
|
jpayne@68
|
126 # Defaults for multipart messages
|
jpayne@68
|
127 self.preamble = self.epilogue = None
|
jpayne@68
|
128 self.defects = []
|
jpayne@68
|
129 # Default content type
|
jpayne@68
|
130 self._default_type = 'text/plain'
|
jpayne@68
|
131
|
jpayne@68
|
132 def __str__(self):
|
jpayne@68
|
133 """Return the entire formatted message as a string.
|
jpayne@68
|
134 """
|
jpayne@68
|
135 return self.as_string()
|
jpayne@68
|
136
|
jpayne@68
|
137 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
|
jpayne@68
|
138 """Return the entire formatted message as a string.
|
jpayne@68
|
139
|
jpayne@68
|
140 Optional 'unixfrom', when true, means include the Unix From_ envelope
|
jpayne@68
|
141 header. For backward compatibility reasons, if maxheaderlen is
|
jpayne@68
|
142 not specified it defaults to 0, so you must override it explicitly
|
jpayne@68
|
143 if you want a different maxheaderlen. 'policy' is passed to the
|
jpayne@68
|
144 Generator instance used to serialize the mesasge; if it is not
|
jpayne@68
|
145 specified the policy associated with the message instance is used.
|
jpayne@68
|
146
|
jpayne@68
|
147 If the message object contains binary data that is not encoded
|
jpayne@68
|
148 according to RFC standards, the non-compliant data will be replaced by
|
jpayne@68
|
149 unicode "unknown character" code points.
|
jpayne@68
|
150 """
|
jpayne@68
|
151 from email.generator import Generator
|
jpayne@68
|
152 policy = self.policy if policy is None else policy
|
jpayne@68
|
153 fp = StringIO()
|
jpayne@68
|
154 g = Generator(fp,
|
jpayne@68
|
155 mangle_from_=False,
|
jpayne@68
|
156 maxheaderlen=maxheaderlen,
|
jpayne@68
|
157 policy=policy)
|
jpayne@68
|
158 g.flatten(self, unixfrom=unixfrom)
|
jpayne@68
|
159 return fp.getvalue()
|
jpayne@68
|
160
|
jpayne@68
|
161 def __bytes__(self):
|
jpayne@68
|
162 """Return the entire formatted message as a bytes object.
|
jpayne@68
|
163 """
|
jpayne@68
|
164 return self.as_bytes()
|
jpayne@68
|
165
|
jpayne@68
|
166 def as_bytes(self, unixfrom=False, policy=None):
|
jpayne@68
|
167 """Return the entire formatted message as a bytes object.
|
jpayne@68
|
168
|
jpayne@68
|
169 Optional 'unixfrom', when true, means include the Unix From_ envelope
|
jpayne@68
|
170 header. 'policy' is passed to the BytesGenerator instance used to
|
jpayne@68
|
171 serialize the message; if not specified the policy associated with
|
jpayne@68
|
172 the message instance is used.
|
jpayne@68
|
173 """
|
jpayne@68
|
174 from email.generator import BytesGenerator
|
jpayne@68
|
175 policy = self.policy if policy is None else policy
|
jpayne@68
|
176 fp = BytesIO()
|
jpayne@68
|
177 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
|
jpayne@68
|
178 g.flatten(self, unixfrom=unixfrom)
|
jpayne@68
|
179 return fp.getvalue()
|
jpayne@68
|
180
|
jpayne@68
|
181 def is_multipart(self):
|
jpayne@68
|
182 """Return True if the message consists of multiple parts."""
|
jpayne@68
|
183 return isinstance(self._payload, list)
|
jpayne@68
|
184
|
jpayne@68
|
185 #
|
jpayne@68
|
186 # Unix From_ line
|
jpayne@68
|
187 #
|
jpayne@68
|
188 def set_unixfrom(self, unixfrom):
|
jpayne@68
|
189 self._unixfrom = unixfrom
|
jpayne@68
|
190
|
jpayne@68
|
191 def get_unixfrom(self):
|
jpayne@68
|
192 return self._unixfrom
|
jpayne@68
|
193
|
jpayne@68
|
194 #
|
jpayne@68
|
195 # Payload manipulation.
|
jpayne@68
|
196 #
|
jpayne@68
|
197 def attach(self, payload):
|
jpayne@68
|
198 """Add the given payload to the current payload.
|
jpayne@68
|
199
|
jpayne@68
|
200 The current payload will always be a list of objects after this method
|
jpayne@68
|
201 is called. If you want to set the payload to a scalar object, use
|
jpayne@68
|
202 set_payload() instead.
|
jpayne@68
|
203 """
|
jpayne@68
|
204 if self._payload is None:
|
jpayne@68
|
205 self._payload = [payload]
|
jpayne@68
|
206 else:
|
jpayne@68
|
207 try:
|
jpayne@68
|
208 self._payload.append(payload)
|
jpayne@68
|
209 except AttributeError:
|
jpayne@68
|
210 raise TypeError("Attach is not valid on a message with a"
|
jpayne@68
|
211 " non-multipart payload")
|
jpayne@68
|
212
|
jpayne@68
|
213 def get_payload(self, i=None, decode=False):
|
jpayne@68
|
214 """Return a reference to the payload.
|
jpayne@68
|
215
|
jpayne@68
|
216 The payload will either be a list object or a string. If you mutate
|
jpayne@68
|
217 the list object, you modify the message's payload in place. Optional
|
jpayne@68
|
218 i returns that index into the payload.
|
jpayne@68
|
219
|
jpayne@68
|
220 Optional decode is a flag indicating whether the payload should be
|
jpayne@68
|
221 decoded or not, according to the Content-Transfer-Encoding header
|
jpayne@68
|
222 (default is False).
|
jpayne@68
|
223
|
jpayne@68
|
224 When True and the message is not a multipart, the payload will be
|
jpayne@68
|
225 decoded if this header's value is `quoted-printable' or `base64'. If
|
jpayne@68
|
226 some other encoding is used, or the header is missing, or if the
|
jpayne@68
|
227 payload has bogus data (i.e. bogus base64 or uuencoded data), the
|
jpayne@68
|
228 payload is returned as-is.
|
jpayne@68
|
229
|
jpayne@68
|
230 If the message is a multipart and the decode flag is True, then None
|
jpayne@68
|
231 is returned.
|
jpayne@68
|
232 """
|
jpayne@68
|
233 # Here is the logic table for this code, based on the email5.0.0 code:
|
jpayne@68
|
234 # i decode is_multipart result
|
jpayne@68
|
235 # ------ ------ ------------ ------------------------------
|
jpayne@68
|
236 # None True True None
|
jpayne@68
|
237 # i True True None
|
jpayne@68
|
238 # None False True _payload (a list)
|
jpayne@68
|
239 # i False True _payload element i (a Message)
|
jpayne@68
|
240 # i False False error (not a list)
|
jpayne@68
|
241 # i True False error (not a list)
|
jpayne@68
|
242 # None False False _payload
|
jpayne@68
|
243 # None True False _payload decoded (bytes)
|
jpayne@68
|
244 # Note that Barry planned to factor out the 'decode' case, but that
|
jpayne@68
|
245 # isn't so easy now that we handle the 8 bit data, which needs to be
|
jpayne@68
|
246 # converted in both the decode and non-decode path.
|
jpayne@68
|
247 if self.is_multipart():
|
jpayne@68
|
248 if decode:
|
jpayne@68
|
249 return None
|
jpayne@68
|
250 if i is None:
|
jpayne@68
|
251 return self._payload
|
jpayne@68
|
252 else:
|
jpayne@68
|
253 return self._payload[i]
|
jpayne@68
|
254 # For backward compatibility, Use isinstance and this error message
|
jpayne@68
|
255 # instead of the more logical is_multipart test.
|
jpayne@68
|
256 if i is not None and not isinstance(self._payload, list):
|
jpayne@68
|
257 raise TypeError('Expected list, got %s' % type(self._payload))
|
jpayne@68
|
258 payload = self._payload
|
jpayne@68
|
259 # cte might be a Header, so for now stringify it.
|
jpayne@68
|
260 cte = str(self.get('content-transfer-encoding', '')).lower()
|
jpayne@68
|
261 # payload may be bytes here.
|
jpayne@68
|
262 if isinstance(payload, str):
|
jpayne@68
|
263 if utils._has_surrogates(payload):
|
jpayne@68
|
264 bpayload = payload.encode('ascii', 'surrogateescape')
|
jpayne@68
|
265 if not decode:
|
jpayne@68
|
266 try:
|
jpayne@68
|
267 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
|
jpayne@68
|
268 except LookupError:
|
jpayne@68
|
269 payload = bpayload.decode('ascii', 'replace')
|
jpayne@68
|
270 elif decode:
|
jpayne@68
|
271 try:
|
jpayne@68
|
272 bpayload = payload.encode('ascii')
|
jpayne@68
|
273 except UnicodeError:
|
jpayne@68
|
274 # This won't happen for RFC compliant messages (messages
|
jpayne@68
|
275 # containing only ASCII code points in the unicode input).
|
jpayne@68
|
276 # If it does happen, turn the string into bytes in a way
|
jpayne@68
|
277 # guaranteed not to fail.
|
jpayne@68
|
278 bpayload = payload.encode('raw-unicode-escape')
|
jpayne@68
|
279 if not decode:
|
jpayne@68
|
280 return payload
|
jpayne@68
|
281 if cte == 'quoted-printable':
|
jpayne@68
|
282 return quopri.decodestring(bpayload)
|
jpayne@68
|
283 elif cte == 'base64':
|
jpayne@68
|
284 # XXX: this is a bit of a hack; decode_b should probably be factored
|
jpayne@68
|
285 # out somewhere, but I haven't figured out where yet.
|
jpayne@68
|
286 value, defects = decode_b(b''.join(bpayload.splitlines()))
|
jpayne@68
|
287 for defect in defects:
|
jpayne@68
|
288 self.policy.handle_defect(self, defect)
|
jpayne@68
|
289 return value
|
jpayne@68
|
290 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
|
jpayne@68
|
291 in_file = BytesIO(bpayload)
|
jpayne@68
|
292 out_file = BytesIO()
|
jpayne@68
|
293 try:
|
jpayne@68
|
294 uu.decode(in_file, out_file, quiet=True)
|
jpayne@68
|
295 return out_file.getvalue()
|
jpayne@68
|
296 except uu.Error:
|
jpayne@68
|
297 # Some decoding problem
|
jpayne@68
|
298 return bpayload
|
jpayne@68
|
299 if isinstance(payload, str):
|
jpayne@68
|
300 return bpayload
|
jpayne@68
|
301 return payload
|
jpayne@68
|
302
|
jpayne@68
|
303 def set_payload(self, payload, charset=None):
|
jpayne@68
|
304 """Set the payload to the given value.
|
jpayne@68
|
305
|
jpayne@68
|
306 Optional charset sets the message's default character set. See
|
jpayne@68
|
307 set_charset() for details.
|
jpayne@68
|
308 """
|
jpayne@68
|
309 if hasattr(payload, 'encode'):
|
jpayne@68
|
310 if charset is None:
|
jpayne@68
|
311 self._payload = payload
|
jpayne@68
|
312 return
|
jpayne@68
|
313 if not isinstance(charset, Charset):
|
jpayne@68
|
314 charset = Charset(charset)
|
jpayne@68
|
315 payload = payload.encode(charset.output_charset)
|
jpayne@68
|
316 if hasattr(payload, 'decode'):
|
jpayne@68
|
317 self._payload = payload.decode('ascii', 'surrogateescape')
|
jpayne@68
|
318 else:
|
jpayne@68
|
319 self._payload = payload
|
jpayne@68
|
320 if charset is not None:
|
jpayne@68
|
321 self.set_charset(charset)
|
jpayne@68
|
322
|
jpayne@68
|
323 def set_charset(self, charset):
|
jpayne@68
|
324 """Set the charset of the payload to a given character set.
|
jpayne@68
|
325
|
jpayne@68
|
326 charset can be a Charset instance, a string naming a character set, or
|
jpayne@68
|
327 None. If it is a string it will be converted to a Charset instance.
|
jpayne@68
|
328 If charset is None, the charset parameter will be removed from the
|
jpayne@68
|
329 Content-Type field. Anything else will generate a TypeError.
|
jpayne@68
|
330
|
jpayne@68
|
331 The message will be assumed to be of type text/* encoded with
|
jpayne@68
|
332 charset.input_charset. It will be converted to charset.output_charset
|
jpayne@68
|
333 and encoded properly, if needed, when generating the plain text
|
jpayne@68
|
334 representation of the message. MIME headers (MIME-Version,
|
jpayne@68
|
335 Content-Type, Content-Transfer-Encoding) will be added as needed.
|
jpayne@68
|
336 """
|
jpayne@68
|
337 if charset is None:
|
jpayne@68
|
338 self.del_param('charset')
|
jpayne@68
|
339 self._charset = None
|
jpayne@68
|
340 return
|
jpayne@68
|
341 if not isinstance(charset, Charset):
|
jpayne@68
|
342 charset = Charset(charset)
|
jpayne@68
|
343 self._charset = charset
|
jpayne@68
|
344 if 'MIME-Version' not in self:
|
jpayne@68
|
345 self.add_header('MIME-Version', '1.0')
|
jpayne@68
|
346 if 'Content-Type' not in self:
|
jpayne@68
|
347 self.add_header('Content-Type', 'text/plain',
|
jpayne@68
|
348 charset=charset.get_output_charset())
|
jpayne@68
|
349 else:
|
jpayne@68
|
350 self.set_param('charset', charset.get_output_charset())
|
jpayne@68
|
351 if charset != charset.get_output_charset():
|
jpayne@68
|
352 self._payload = charset.body_encode(self._payload)
|
jpayne@68
|
353 if 'Content-Transfer-Encoding' not in self:
|
jpayne@68
|
354 cte = charset.get_body_encoding()
|
jpayne@68
|
355 try:
|
jpayne@68
|
356 cte(self)
|
jpayne@68
|
357 except TypeError:
|
jpayne@68
|
358 # This 'if' is for backward compatibility, it allows unicode
|
jpayne@68
|
359 # through even though that won't work correctly if the
|
jpayne@68
|
360 # message is serialized.
|
jpayne@68
|
361 payload = self._payload
|
jpayne@68
|
362 if payload:
|
jpayne@68
|
363 try:
|
jpayne@68
|
364 payload = payload.encode('ascii', 'surrogateescape')
|
jpayne@68
|
365 except UnicodeError:
|
jpayne@68
|
366 payload = payload.encode(charset.output_charset)
|
jpayne@68
|
367 self._payload = charset.body_encode(payload)
|
jpayne@68
|
368 self.add_header('Content-Transfer-Encoding', cte)
|
jpayne@68
|
369
|
jpayne@68
|
370 def get_charset(self):
|
jpayne@68
|
371 """Return the Charset instance associated with the message's payload.
|
jpayne@68
|
372 """
|
jpayne@68
|
373 return self._charset
|
jpayne@68
|
374
|
jpayne@68
|
375 #
|
jpayne@68
|
376 # MAPPING INTERFACE (partial)
|
jpayne@68
|
377 #
|
jpayne@68
|
378 def __len__(self):
|
jpayne@68
|
379 """Return the total number of headers, including duplicates."""
|
jpayne@68
|
380 return len(self._headers)
|
jpayne@68
|
381
|
jpayne@68
|
382 def __getitem__(self, name):
|
jpayne@68
|
383 """Get a header value.
|
jpayne@68
|
384
|
jpayne@68
|
385 Return None if the header is missing instead of raising an exception.
|
jpayne@68
|
386
|
jpayne@68
|
387 Note that if the header appeared multiple times, exactly which
|
jpayne@68
|
388 occurrence gets returned is undefined. Use get_all() to get all
|
jpayne@68
|
389 the values matching a header field name.
|
jpayne@68
|
390 """
|
jpayne@68
|
391 return self.get(name)
|
jpayne@68
|
392
|
jpayne@68
|
393 def __setitem__(self, name, val):
|
jpayne@68
|
394 """Set the value of a header.
|
jpayne@68
|
395
|
jpayne@68
|
396 Note: this does not overwrite an existing header with the same field
|
jpayne@68
|
397 name. Use __delitem__() first to delete any existing headers.
|
jpayne@68
|
398 """
|
jpayne@68
|
399 max_count = self.policy.header_max_count(name)
|
jpayne@68
|
400 if max_count:
|
jpayne@68
|
401 lname = name.lower()
|
jpayne@68
|
402 found = 0
|
jpayne@68
|
403 for k, v in self._headers:
|
jpayne@68
|
404 if k.lower() == lname:
|
jpayne@68
|
405 found += 1
|
jpayne@68
|
406 if found >= max_count:
|
jpayne@68
|
407 raise ValueError("There may be at most {} {} headers "
|
jpayne@68
|
408 "in a message".format(max_count, name))
|
jpayne@68
|
409 self._headers.append(self.policy.header_store_parse(name, val))
|
jpayne@68
|
410
|
jpayne@68
|
411 def __delitem__(self, name):
|
jpayne@68
|
412 """Delete all occurrences of a header, if present.
|
jpayne@68
|
413
|
jpayne@68
|
414 Does not raise an exception if the header is missing.
|
jpayne@68
|
415 """
|
jpayne@68
|
416 name = name.lower()
|
jpayne@68
|
417 newheaders = []
|
jpayne@68
|
418 for k, v in self._headers:
|
jpayne@68
|
419 if k.lower() != name:
|
jpayne@68
|
420 newheaders.append((k, v))
|
jpayne@68
|
421 self._headers = newheaders
|
jpayne@68
|
422
|
jpayne@68
|
423 def __contains__(self, name):
|
jpayne@68
|
424 return name.lower() in [k.lower() for k, v in self._headers]
|
jpayne@68
|
425
|
jpayne@68
|
426 def __iter__(self):
|
jpayne@68
|
427 for field, value in self._headers:
|
jpayne@68
|
428 yield field
|
jpayne@68
|
429
|
jpayne@68
|
430 def keys(self):
|
jpayne@68
|
431 """Return a list of all the message's header field names.
|
jpayne@68
|
432
|
jpayne@68
|
433 These will be sorted in the order they appeared in the original
|
jpayne@68
|
434 message, or were added to the message, and may contain duplicates.
|
jpayne@68
|
435 Any fields deleted and re-inserted are always appended to the header
|
jpayne@68
|
436 list.
|
jpayne@68
|
437 """
|
jpayne@68
|
438 return [k for k, v in self._headers]
|
jpayne@68
|
439
|
jpayne@68
|
440 def values(self):
|
jpayne@68
|
441 """Return a list of all the message's header values.
|
jpayne@68
|
442
|
jpayne@68
|
443 These will be sorted in the order they appeared in the original
|
jpayne@68
|
444 message, or were added to the message, and may contain duplicates.
|
jpayne@68
|
445 Any fields deleted and re-inserted are always appended to the header
|
jpayne@68
|
446 list.
|
jpayne@68
|
447 """
|
jpayne@68
|
448 return [self.policy.header_fetch_parse(k, v)
|
jpayne@68
|
449 for k, v in self._headers]
|
jpayne@68
|
450
|
jpayne@68
|
451 def items(self):
|
jpayne@68
|
452 """Get all the message's header fields and values.
|
jpayne@68
|
453
|
jpayne@68
|
454 These will be sorted in the order they appeared in the original
|
jpayne@68
|
455 message, or were added to the message, and may contain duplicates.
|
jpayne@68
|
456 Any fields deleted and re-inserted are always appended to the header
|
jpayne@68
|
457 list.
|
jpayne@68
|
458 """
|
jpayne@68
|
459 return [(k, self.policy.header_fetch_parse(k, v))
|
jpayne@68
|
460 for k, v in self._headers]
|
jpayne@68
|
461
|
jpayne@68
|
462 def get(self, name, failobj=None):
|
jpayne@68
|
463 """Get a header value.
|
jpayne@68
|
464
|
jpayne@68
|
465 Like __getitem__() but return failobj instead of None when the field
|
jpayne@68
|
466 is missing.
|
jpayne@68
|
467 """
|
jpayne@68
|
468 name = name.lower()
|
jpayne@68
|
469 for k, v in self._headers:
|
jpayne@68
|
470 if k.lower() == name:
|
jpayne@68
|
471 return self.policy.header_fetch_parse(k, v)
|
jpayne@68
|
472 return failobj
|
jpayne@68
|
473
|
jpayne@68
|
474 #
|
jpayne@68
|
475 # "Internal" methods (public API, but only intended for use by a parser
|
jpayne@68
|
476 # or generator, not normal application code.
|
jpayne@68
|
477 #
|
jpayne@68
|
478
|
jpayne@68
|
479 def set_raw(self, name, value):
|
jpayne@68
|
480 """Store name and value in the model without modification.
|
jpayne@68
|
481
|
jpayne@68
|
482 This is an "internal" API, intended only for use by a parser.
|
jpayne@68
|
483 """
|
jpayne@68
|
484 self._headers.append((name, value))
|
jpayne@68
|
485
|
jpayne@68
|
486 def raw_items(self):
|
jpayne@68
|
487 """Return the (name, value) header pairs without modification.
|
jpayne@68
|
488
|
jpayne@68
|
489 This is an "internal" API, intended only for use by a generator.
|
jpayne@68
|
490 """
|
jpayne@68
|
491 return iter(self._headers.copy())
|
jpayne@68
|
492
|
jpayne@68
|
493 #
|
jpayne@68
|
494 # Additional useful stuff
|
jpayne@68
|
495 #
|
jpayne@68
|
496
|
jpayne@68
|
497 def get_all(self, name, failobj=None):
|
jpayne@68
|
498 """Return a list of all the values for the named field.
|
jpayne@68
|
499
|
jpayne@68
|
500 These will be sorted in the order they appeared in the original
|
jpayne@68
|
501 message, and may contain duplicates. Any fields deleted and
|
jpayne@68
|
502 re-inserted are always appended to the header list.
|
jpayne@68
|
503
|
jpayne@68
|
504 If no such fields exist, failobj is returned (defaults to None).
|
jpayne@68
|
505 """
|
jpayne@68
|
506 values = []
|
jpayne@68
|
507 name = name.lower()
|
jpayne@68
|
508 for k, v in self._headers:
|
jpayne@68
|
509 if k.lower() == name:
|
jpayne@68
|
510 values.append(self.policy.header_fetch_parse(k, v))
|
jpayne@68
|
511 if not values:
|
jpayne@68
|
512 return failobj
|
jpayne@68
|
513 return values
|
jpayne@68
|
514
|
jpayne@68
|
515 def add_header(self, _name, _value, **_params):
|
jpayne@68
|
516 """Extended header setting.
|
jpayne@68
|
517
|
jpayne@68
|
518 name is the header field to add. keyword arguments can be used to set
|
jpayne@68
|
519 additional parameters for the header field, with underscores converted
|
jpayne@68
|
520 to dashes. Normally the parameter will be added as key="value" unless
|
jpayne@68
|
521 value is None, in which case only the key will be added. If a
|
jpayne@68
|
522 parameter value contains non-ASCII characters it can be specified as a
|
jpayne@68
|
523 three-tuple of (charset, language, value), in which case it will be
|
jpayne@68
|
524 encoded according to RFC2231 rules. Otherwise it will be encoded using
|
jpayne@68
|
525 the utf-8 charset and a language of ''.
|
jpayne@68
|
526
|
jpayne@68
|
527 Examples:
|
jpayne@68
|
528
|
jpayne@68
|
529 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
|
jpayne@68
|
530 msg.add_header('content-disposition', 'attachment',
|
jpayne@68
|
531 filename=('utf-8', '', Fußballer.ppt'))
|
jpayne@68
|
532 msg.add_header('content-disposition', 'attachment',
|
jpayne@68
|
533 filename='Fußballer.ppt'))
|
jpayne@68
|
534 """
|
jpayne@68
|
535 parts = []
|
jpayne@68
|
536 for k, v in _params.items():
|
jpayne@68
|
537 if v is None:
|
jpayne@68
|
538 parts.append(k.replace('_', '-'))
|
jpayne@68
|
539 else:
|
jpayne@68
|
540 parts.append(_formatparam(k.replace('_', '-'), v))
|
jpayne@68
|
541 if _value is not None:
|
jpayne@68
|
542 parts.insert(0, _value)
|
jpayne@68
|
543 self[_name] = SEMISPACE.join(parts)
|
jpayne@68
|
544
|
jpayne@68
|
545 def replace_header(self, _name, _value):
|
jpayne@68
|
546 """Replace a header.
|
jpayne@68
|
547
|
jpayne@68
|
548 Replace the first matching header found in the message, retaining
|
jpayne@68
|
549 header order and case. If no matching header was found, a KeyError is
|
jpayne@68
|
550 raised.
|
jpayne@68
|
551 """
|
jpayne@68
|
552 _name = _name.lower()
|
jpayne@68
|
553 for i, (k, v) in zip(range(len(self._headers)), self._headers):
|
jpayne@68
|
554 if k.lower() == _name:
|
jpayne@68
|
555 self._headers[i] = self.policy.header_store_parse(k, _value)
|
jpayne@68
|
556 break
|
jpayne@68
|
557 else:
|
jpayne@68
|
558 raise KeyError(_name)
|
jpayne@68
|
559
|
jpayne@68
|
560 #
|
jpayne@68
|
561 # Use these three methods instead of the three above.
|
jpayne@68
|
562 #
|
jpayne@68
|
563
|
jpayne@68
|
564 def get_content_type(self):
|
jpayne@68
|
565 """Return the message's content type.
|
jpayne@68
|
566
|
jpayne@68
|
567 The returned string is coerced to lower case of the form
|
jpayne@68
|
568 `maintype/subtype'. If there was no Content-Type header in the
|
jpayne@68
|
569 message, the default type as given by get_default_type() will be
|
jpayne@68
|
570 returned. Since according to RFC 2045, messages always have a default
|
jpayne@68
|
571 type this will always return a value.
|
jpayne@68
|
572
|
jpayne@68
|
573 RFC 2045 defines a message's default type to be text/plain unless it
|
jpayne@68
|
574 appears inside a multipart/digest container, in which case it would be
|
jpayne@68
|
575 message/rfc822.
|
jpayne@68
|
576 """
|
jpayne@68
|
577 missing = object()
|
jpayne@68
|
578 value = self.get('content-type', missing)
|
jpayne@68
|
579 if value is missing:
|
jpayne@68
|
580 # This should have no parameters
|
jpayne@68
|
581 return self.get_default_type()
|
jpayne@68
|
582 ctype = _splitparam(value)[0].lower()
|
jpayne@68
|
583 # RFC 2045, section 5.2 says if its invalid, use text/plain
|
jpayne@68
|
584 if ctype.count('/') != 1:
|
jpayne@68
|
585 return 'text/plain'
|
jpayne@68
|
586 return ctype
|
jpayne@68
|
587
|
jpayne@68
|
588 def get_content_maintype(self):
|
jpayne@68
|
589 """Return the message's main content type.
|
jpayne@68
|
590
|
jpayne@68
|
591 This is the `maintype' part of the string returned by
|
jpayne@68
|
592 get_content_type().
|
jpayne@68
|
593 """
|
jpayne@68
|
594 ctype = self.get_content_type()
|
jpayne@68
|
595 return ctype.split('/')[0]
|
jpayne@68
|
596
|
jpayne@68
|
597 def get_content_subtype(self):
|
jpayne@68
|
598 """Returns the message's sub-content type.
|
jpayne@68
|
599
|
jpayne@68
|
600 This is the `subtype' part of the string returned by
|
jpayne@68
|
601 get_content_type().
|
jpayne@68
|
602 """
|
jpayne@68
|
603 ctype = self.get_content_type()
|
jpayne@68
|
604 return ctype.split('/')[1]
|
jpayne@68
|
605
|
jpayne@68
|
606 def get_default_type(self):
|
jpayne@68
|
607 """Return the `default' content type.
|
jpayne@68
|
608
|
jpayne@68
|
609 Most messages have a default content type of text/plain, except for
|
jpayne@68
|
610 messages that are subparts of multipart/digest containers. Such
|
jpayne@68
|
611 subparts have a default content type of message/rfc822.
|
jpayne@68
|
612 """
|
jpayne@68
|
613 return self._default_type
|
jpayne@68
|
614
|
jpayne@68
|
615 def set_default_type(self, ctype):
|
jpayne@68
|
616 """Set the `default' content type.
|
jpayne@68
|
617
|
jpayne@68
|
618 ctype should be either "text/plain" or "message/rfc822", although this
|
jpayne@68
|
619 is not enforced. The default content type is not stored in the
|
jpayne@68
|
620 Content-Type header.
|
jpayne@68
|
621 """
|
jpayne@68
|
622 self._default_type = ctype
|
jpayne@68
|
623
|
jpayne@68
|
624 def _get_params_preserve(self, failobj, header):
|
jpayne@68
|
625 # Like get_params() but preserves the quoting of values. BAW:
|
jpayne@68
|
626 # should this be part of the public interface?
|
jpayne@68
|
627 missing = object()
|
jpayne@68
|
628 value = self.get(header, missing)
|
jpayne@68
|
629 if value is missing:
|
jpayne@68
|
630 return failobj
|
jpayne@68
|
631 params = []
|
jpayne@68
|
632 for p in _parseparam(value):
|
jpayne@68
|
633 try:
|
jpayne@68
|
634 name, val = p.split('=', 1)
|
jpayne@68
|
635 name = name.strip()
|
jpayne@68
|
636 val = val.strip()
|
jpayne@68
|
637 except ValueError:
|
jpayne@68
|
638 # Must have been a bare attribute
|
jpayne@68
|
639 name = p.strip()
|
jpayne@68
|
640 val = ''
|
jpayne@68
|
641 params.append((name, val))
|
jpayne@68
|
642 params = utils.decode_params(params)
|
jpayne@68
|
643 return params
|
jpayne@68
|
644
|
jpayne@68
|
645 def get_params(self, failobj=None, header='content-type', unquote=True):
|
jpayne@68
|
646 """Return the message's Content-Type parameters, as a list.
|
jpayne@68
|
647
|
jpayne@68
|
648 The elements of the returned list are 2-tuples of key/value pairs, as
|
jpayne@68
|
649 split on the `=' sign. The left hand side of the `=' is the key,
|
jpayne@68
|
650 while the right hand side is the value. If there is no `=' sign in
|
jpayne@68
|
651 the parameter the value is the empty string. The value is as
|
jpayne@68
|
652 described in the get_param() method.
|
jpayne@68
|
653
|
jpayne@68
|
654 Optional failobj is the object to return if there is no Content-Type
|
jpayne@68
|
655 header. Optional header is the header to search instead of
|
jpayne@68
|
656 Content-Type. If unquote is True, the value is unquoted.
|
jpayne@68
|
657 """
|
jpayne@68
|
658 missing = object()
|
jpayne@68
|
659 params = self._get_params_preserve(missing, header)
|
jpayne@68
|
660 if params is missing:
|
jpayne@68
|
661 return failobj
|
jpayne@68
|
662 if unquote:
|
jpayne@68
|
663 return [(k, _unquotevalue(v)) for k, v in params]
|
jpayne@68
|
664 else:
|
jpayne@68
|
665 return params
|
jpayne@68
|
666
|
jpayne@68
|
667 def get_param(self, param, failobj=None, header='content-type',
|
jpayne@68
|
668 unquote=True):
|
jpayne@68
|
669 """Return the parameter value if found in the Content-Type header.
|
jpayne@68
|
670
|
jpayne@68
|
671 Optional failobj is the object to return if there is no Content-Type
|
jpayne@68
|
672 header, or the Content-Type header has no such parameter. Optional
|
jpayne@68
|
673 header is the header to search instead of Content-Type.
|
jpayne@68
|
674
|
jpayne@68
|
675 Parameter keys are always compared case insensitively. The return
|
jpayne@68
|
676 value can either be a string, or a 3-tuple if the parameter was RFC
|
jpayne@68
|
677 2231 encoded. When it's a 3-tuple, the elements of the value are of
|
jpayne@68
|
678 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
|
jpayne@68
|
679 LANGUAGE can be None, in which case you should consider VALUE to be
|
jpayne@68
|
680 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
|
jpayne@68
|
681 The parameter value (either the returned string, or the VALUE item in
|
jpayne@68
|
682 the 3-tuple) is always unquoted, unless unquote is set to False.
|
jpayne@68
|
683
|
jpayne@68
|
684 If your application doesn't care whether the parameter was RFC 2231
|
jpayne@68
|
685 encoded, it can turn the return value into a string as follows:
|
jpayne@68
|
686
|
jpayne@68
|
687 rawparam = msg.get_param('foo')
|
jpayne@68
|
688 param = email.utils.collapse_rfc2231_value(rawparam)
|
jpayne@68
|
689
|
jpayne@68
|
690 """
|
jpayne@68
|
691 if header not in self:
|
jpayne@68
|
692 return failobj
|
jpayne@68
|
693 for k, v in self._get_params_preserve(failobj, header):
|
jpayne@68
|
694 if k.lower() == param.lower():
|
jpayne@68
|
695 if unquote:
|
jpayne@68
|
696 return _unquotevalue(v)
|
jpayne@68
|
697 else:
|
jpayne@68
|
698 return v
|
jpayne@68
|
699 return failobj
|
jpayne@68
|
700
|
jpayne@68
|
701 def set_param(self, param, value, header='Content-Type', requote=True,
|
jpayne@68
|
702 charset=None, language='', replace=False):
|
jpayne@68
|
703 """Set a parameter in the Content-Type header.
|
jpayne@68
|
704
|
jpayne@68
|
705 If the parameter already exists in the header, its value will be
|
jpayne@68
|
706 replaced with the new value.
|
jpayne@68
|
707
|
jpayne@68
|
708 If header is Content-Type and has not yet been defined for this
|
jpayne@68
|
709 message, it will be set to "text/plain" and the new parameter and
|
jpayne@68
|
710 value will be appended as per RFC 2045.
|
jpayne@68
|
711
|
jpayne@68
|
712 An alternate header can be specified in the header argument, and all
|
jpayne@68
|
713 parameters will be quoted as necessary unless requote is False.
|
jpayne@68
|
714
|
jpayne@68
|
715 If charset is specified, the parameter will be encoded according to RFC
|
jpayne@68
|
716 2231. Optional language specifies the RFC 2231 language, defaulting
|
jpayne@68
|
717 to the empty string. Both charset and language should be strings.
|
jpayne@68
|
718 """
|
jpayne@68
|
719 if not isinstance(value, tuple) and charset:
|
jpayne@68
|
720 value = (charset, language, value)
|
jpayne@68
|
721
|
jpayne@68
|
722 if header not in self and header.lower() == 'content-type':
|
jpayne@68
|
723 ctype = 'text/plain'
|
jpayne@68
|
724 else:
|
jpayne@68
|
725 ctype = self.get(header)
|
jpayne@68
|
726 if not self.get_param(param, header=header):
|
jpayne@68
|
727 if not ctype:
|
jpayne@68
|
728 ctype = _formatparam(param, value, requote)
|
jpayne@68
|
729 else:
|
jpayne@68
|
730 ctype = SEMISPACE.join(
|
jpayne@68
|
731 [ctype, _formatparam(param, value, requote)])
|
jpayne@68
|
732 else:
|
jpayne@68
|
733 ctype = ''
|
jpayne@68
|
734 for old_param, old_value in self.get_params(header=header,
|
jpayne@68
|
735 unquote=requote):
|
jpayne@68
|
736 append_param = ''
|
jpayne@68
|
737 if old_param.lower() == param.lower():
|
jpayne@68
|
738 append_param = _formatparam(param, value, requote)
|
jpayne@68
|
739 else:
|
jpayne@68
|
740 append_param = _formatparam(old_param, old_value, requote)
|
jpayne@68
|
741 if not ctype:
|
jpayne@68
|
742 ctype = append_param
|
jpayne@68
|
743 else:
|
jpayne@68
|
744 ctype = SEMISPACE.join([ctype, append_param])
|
jpayne@68
|
745 if ctype != self.get(header):
|
jpayne@68
|
746 if replace:
|
jpayne@68
|
747 self.replace_header(header, ctype)
|
jpayne@68
|
748 else:
|
jpayne@68
|
749 del self[header]
|
jpayne@68
|
750 self[header] = ctype
|
jpayne@68
|
751
|
jpayne@68
|
752 def del_param(self, param, header='content-type', requote=True):
|
jpayne@68
|
753 """Remove the given parameter completely from the Content-Type header.
|
jpayne@68
|
754
|
jpayne@68
|
755 The header will be re-written in place without the parameter or its
|
jpayne@68
|
756 value. All values will be quoted as necessary unless requote is
|
jpayne@68
|
757 False. Optional header specifies an alternative to the Content-Type
|
jpayne@68
|
758 header.
|
jpayne@68
|
759 """
|
jpayne@68
|
760 if header not in self:
|
jpayne@68
|
761 return
|
jpayne@68
|
762 new_ctype = ''
|
jpayne@68
|
763 for p, v in self.get_params(header=header, unquote=requote):
|
jpayne@68
|
764 if p.lower() != param.lower():
|
jpayne@68
|
765 if not new_ctype:
|
jpayne@68
|
766 new_ctype = _formatparam(p, v, requote)
|
jpayne@68
|
767 else:
|
jpayne@68
|
768 new_ctype = SEMISPACE.join([new_ctype,
|
jpayne@68
|
769 _formatparam(p, v, requote)])
|
jpayne@68
|
770 if new_ctype != self.get(header):
|
jpayne@68
|
771 del self[header]
|
jpayne@68
|
772 self[header] = new_ctype
|
jpayne@68
|
773
|
jpayne@68
|
774 def set_type(self, type, header='Content-Type', requote=True):
|
jpayne@68
|
775 """Set the main type and subtype for the Content-Type header.
|
jpayne@68
|
776
|
jpayne@68
|
777 type must be a string in the form "maintype/subtype", otherwise a
|
jpayne@68
|
778 ValueError is raised.
|
jpayne@68
|
779
|
jpayne@68
|
780 This method replaces the Content-Type header, keeping all the
|
jpayne@68
|
781 parameters in place. If requote is False, this leaves the existing
|
jpayne@68
|
782 header's quoting as is. Otherwise, the parameters will be quoted (the
|
jpayne@68
|
783 default).
|
jpayne@68
|
784
|
jpayne@68
|
785 An alternative header can be specified in the header argument. When
|
jpayne@68
|
786 the Content-Type header is set, we'll always also add a MIME-Version
|
jpayne@68
|
787 header.
|
jpayne@68
|
788 """
|
jpayne@68
|
789 # BAW: should we be strict?
|
jpayne@68
|
790 if not type.count('/') == 1:
|
jpayne@68
|
791 raise ValueError
|
jpayne@68
|
792 # Set the Content-Type, you get a MIME-Version
|
jpayne@68
|
793 if header.lower() == 'content-type':
|
jpayne@68
|
794 del self['mime-version']
|
jpayne@68
|
795 self['MIME-Version'] = '1.0'
|
jpayne@68
|
796 if header not in self:
|
jpayne@68
|
797 self[header] = type
|
jpayne@68
|
798 return
|
jpayne@68
|
799 params = self.get_params(header=header, unquote=requote)
|
jpayne@68
|
800 del self[header]
|
jpayne@68
|
801 self[header] = type
|
jpayne@68
|
802 # Skip the first param; it's the old type.
|
jpayne@68
|
803 for p, v in params[1:]:
|
jpayne@68
|
804 self.set_param(p, v, header, requote)
|
jpayne@68
|
805
|
jpayne@68
|
806 def get_filename(self, failobj=None):
|
jpayne@68
|
807 """Return the filename associated with the payload if present.
|
jpayne@68
|
808
|
jpayne@68
|
809 The filename is extracted from the Content-Disposition header's
|
jpayne@68
|
810 `filename' parameter, and it is unquoted. If that header is missing
|
jpayne@68
|
811 the `filename' parameter, this method falls back to looking for the
|
jpayne@68
|
812 `name' parameter.
|
jpayne@68
|
813 """
|
jpayne@68
|
814 missing = object()
|
jpayne@68
|
815 filename = self.get_param('filename', missing, 'content-disposition')
|
jpayne@68
|
816 if filename is missing:
|
jpayne@68
|
817 filename = self.get_param('name', missing, 'content-type')
|
jpayne@68
|
818 if filename is missing:
|
jpayne@68
|
819 return failobj
|
jpayne@68
|
820 return utils.collapse_rfc2231_value(filename).strip()
|
jpayne@68
|
821
|
jpayne@68
|
822 def get_boundary(self, failobj=None):
|
jpayne@68
|
823 """Return the boundary associated with the payload if present.
|
jpayne@68
|
824
|
jpayne@68
|
825 The boundary is extracted from the Content-Type header's `boundary'
|
jpayne@68
|
826 parameter, and it is unquoted.
|
jpayne@68
|
827 """
|
jpayne@68
|
828 missing = object()
|
jpayne@68
|
829 boundary = self.get_param('boundary', missing)
|
jpayne@68
|
830 if boundary is missing:
|
jpayne@68
|
831 return failobj
|
jpayne@68
|
832 # RFC 2046 says that boundaries may begin but not end in w/s
|
jpayne@68
|
833 return utils.collapse_rfc2231_value(boundary).rstrip()
|
jpayne@68
|
834
|
jpayne@68
|
835 def set_boundary(self, boundary):
|
jpayne@68
|
836 """Set the boundary parameter in Content-Type to 'boundary'.
|
jpayne@68
|
837
|
jpayne@68
|
838 This is subtly different than deleting the Content-Type header and
|
jpayne@68
|
839 adding a new one with a new boundary parameter via add_header(). The
|
jpayne@68
|
840 main difference is that using the set_boundary() method preserves the
|
jpayne@68
|
841 order of the Content-Type header in the original message.
|
jpayne@68
|
842
|
jpayne@68
|
843 HeaderParseError is raised if the message has no Content-Type header.
|
jpayne@68
|
844 """
|
jpayne@68
|
845 missing = object()
|
jpayne@68
|
846 params = self._get_params_preserve(missing, 'content-type')
|
jpayne@68
|
847 if params is missing:
|
jpayne@68
|
848 # There was no Content-Type header, and we don't know what type
|
jpayne@68
|
849 # to set it to, so raise an exception.
|
jpayne@68
|
850 raise errors.HeaderParseError('No Content-Type header found')
|
jpayne@68
|
851 newparams = []
|
jpayne@68
|
852 foundp = False
|
jpayne@68
|
853 for pk, pv in params:
|
jpayne@68
|
854 if pk.lower() == 'boundary':
|
jpayne@68
|
855 newparams.append(('boundary', '"%s"' % boundary))
|
jpayne@68
|
856 foundp = True
|
jpayne@68
|
857 else:
|
jpayne@68
|
858 newparams.append((pk, pv))
|
jpayne@68
|
859 if not foundp:
|
jpayne@68
|
860 # The original Content-Type header had no boundary attribute.
|
jpayne@68
|
861 # Tack one on the end. BAW: should we raise an exception
|
jpayne@68
|
862 # instead???
|
jpayne@68
|
863 newparams.append(('boundary', '"%s"' % boundary))
|
jpayne@68
|
864 # Replace the existing Content-Type header with the new value
|
jpayne@68
|
865 newheaders = []
|
jpayne@68
|
866 for h, v in self._headers:
|
jpayne@68
|
867 if h.lower() == 'content-type':
|
jpayne@68
|
868 parts = []
|
jpayne@68
|
869 for k, v in newparams:
|
jpayne@68
|
870 if v == '':
|
jpayne@68
|
871 parts.append(k)
|
jpayne@68
|
872 else:
|
jpayne@68
|
873 parts.append('%s=%s' % (k, v))
|
jpayne@68
|
874 val = SEMISPACE.join(parts)
|
jpayne@68
|
875 newheaders.append(self.policy.header_store_parse(h, val))
|
jpayne@68
|
876
|
jpayne@68
|
877 else:
|
jpayne@68
|
878 newheaders.append((h, v))
|
jpayne@68
|
879 self._headers = newheaders
|
jpayne@68
|
880
|
jpayne@68
|
881 def get_content_charset(self, failobj=None):
|
jpayne@68
|
882 """Return the charset parameter of the Content-Type header.
|
jpayne@68
|
883
|
jpayne@68
|
884 The returned string is always coerced to lower case. If there is no
|
jpayne@68
|
885 Content-Type header, or if that header has no charset parameter,
|
jpayne@68
|
886 failobj is returned.
|
jpayne@68
|
887 """
|
jpayne@68
|
888 missing = object()
|
jpayne@68
|
889 charset = self.get_param('charset', missing)
|
jpayne@68
|
890 if charset is missing:
|
jpayne@68
|
891 return failobj
|
jpayne@68
|
892 if isinstance(charset, tuple):
|
jpayne@68
|
893 # RFC 2231 encoded, so decode it, and it better end up as ascii.
|
jpayne@68
|
894 pcharset = charset[0] or 'us-ascii'
|
jpayne@68
|
895 try:
|
jpayne@68
|
896 # LookupError will be raised if the charset isn't known to
|
jpayne@68
|
897 # Python. UnicodeError will be raised if the encoded text
|
jpayne@68
|
898 # contains a character not in the charset.
|
jpayne@68
|
899 as_bytes = charset[2].encode('raw-unicode-escape')
|
jpayne@68
|
900 charset = str(as_bytes, pcharset)
|
jpayne@68
|
901 except (LookupError, UnicodeError):
|
jpayne@68
|
902 charset = charset[2]
|
jpayne@68
|
903 # charset characters must be in us-ascii range
|
jpayne@68
|
904 try:
|
jpayne@68
|
905 charset.encode('us-ascii')
|
jpayne@68
|
906 except UnicodeError:
|
jpayne@68
|
907 return failobj
|
jpayne@68
|
908 # RFC 2046, $4.1.2 says charsets are not case sensitive
|
jpayne@68
|
909 return charset.lower()
|
jpayne@68
|
910
|
jpayne@68
|
911 def get_charsets(self, failobj=None):
|
jpayne@68
|
912 """Return a list containing the charset(s) used in this message.
|
jpayne@68
|
913
|
jpayne@68
|
914 The returned list of items describes the Content-Type headers'
|
jpayne@68
|
915 charset parameter for this message and all the subparts in its
|
jpayne@68
|
916 payload.
|
jpayne@68
|
917
|
jpayne@68
|
918 Each item will either be a string (the value of the charset parameter
|
jpayne@68
|
919 in the Content-Type header of that part) or the value of the
|
jpayne@68
|
920 'failobj' parameter (defaults to None), if the part does not have a
|
jpayne@68
|
921 main MIME type of "text", or the charset is not defined.
|
jpayne@68
|
922
|
jpayne@68
|
923 The list will contain one string for each part of the message, plus
|
jpayne@68
|
924 one for the container message (i.e. self), so that a non-multipart
|
jpayne@68
|
925 message will still return a list of length 1.
|
jpayne@68
|
926 """
|
jpayne@68
|
927 return [part.get_content_charset(failobj) for part in self.walk()]
|
jpayne@68
|
928
|
jpayne@68
|
929 def get_content_disposition(self):
|
jpayne@68
|
930 """Return the message's content-disposition if it exists, or None.
|
jpayne@68
|
931
|
jpayne@68
|
932 The return values can be either 'inline', 'attachment' or None
|
jpayne@68
|
933 according to the rfc2183.
|
jpayne@68
|
934 """
|
jpayne@68
|
935 value = self.get('content-disposition')
|
jpayne@68
|
936 if value is None:
|
jpayne@68
|
937 return None
|
jpayne@68
|
938 c_d = _splitparam(value)[0].lower()
|
jpayne@68
|
939 return c_d
|
jpayne@68
|
940
|
jpayne@68
|
941 # I.e. def walk(self): ...
|
jpayne@68
|
942 from email.iterators import walk
|
jpayne@68
|
943
|
jpayne@68
|
944
|
jpayne@68
|
945 class MIMEPart(Message):
|
jpayne@68
|
946
|
jpayne@68
|
947 def __init__(self, policy=None):
|
jpayne@68
|
948 if policy is None:
|
jpayne@68
|
949 from email.policy import default
|
jpayne@68
|
950 policy = default
|
jpayne@68
|
951 Message.__init__(self, policy)
|
jpayne@68
|
952
|
jpayne@68
|
953
|
jpayne@68
|
954 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
|
jpayne@68
|
955 """Return the entire formatted message as a string.
|
jpayne@68
|
956
|
jpayne@68
|
957 Optional 'unixfrom', when true, means include the Unix From_ envelope
|
jpayne@68
|
958 header. maxheaderlen is retained for backward compatibility with the
|
jpayne@68
|
959 base Message class, but defaults to None, meaning that the policy value
|
jpayne@68
|
960 for max_line_length controls the header maximum length. 'policy' is
|
jpayne@68
|
961 passed to the Generator instance used to serialize the mesasge; if it
|
jpayne@68
|
962 is not specified the policy associated with the message instance is
|
jpayne@68
|
963 used.
|
jpayne@68
|
964 """
|
jpayne@68
|
965 policy = self.policy if policy is None else policy
|
jpayne@68
|
966 if maxheaderlen is None:
|
jpayne@68
|
967 maxheaderlen = policy.max_line_length
|
jpayne@68
|
968 return super().as_string(maxheaderlen=maxheaderlen, policy=policy)
|
jpayne@68
|
969
|
jpayne@68
|
970 def __str__(self):
|
jpayne@68
|
971 return self.as_string(policy=self.policy.clone(utf8=True))
|
jpayne@68
|
972
|
jpayne@68
|
973 def is_attachment(self):
|
jpayne@68
|
974 c_d = self.get('content-disposition')
|
jpayne@68
|
975 return False if c_d is None else c_d.content_disposition == 'attachment'
|
jpayne@68
|
976
|
jpayne@68
|
977 def _find_body(self, part, preferencelist):
|
jpayne@68
|
978 if part.is_attachment():
|
jpayne@68
|
979 return
|
jpayne@68
|
980 maintype, subtype = part.get_content_type().split('/')
|
jpayne@68
|
981 if maintype == 'text':
|
jpayne@68
|
982 if subtype in preferencelist:
|
jpayne@68
|
983 yield (preferencelist.index(subtype), part)
|
jpayne@68
|
984 return
|
jpayne@68
|
985 if maintype != 'multipart':
|
jpayne@68
|
986 return
|
jpayne@68
|
987 if subtype != 'related':
|
jpayne@68
|
988 for subpart in part.iter_parts():
|
jpayne@68
|
989 yield from self._find_body(subpart, preferencelist)
|
jpayne@68
|
990 return
|
jpayne@68
|
991 if 'related' in preferencelist:
|
jpayne@68
|
992 yield (preferencelist.index('related'), part)
|
jpayne@68
|
993 candidate = None
|
jpayne@68
|
994 start = part.get_param('start')
|
jpayne@68
|
995 if start:
|
jpayne@68
|
996 for subpart in part.iter_parts():
|
jpayne@68
|
997 if subpart['content-id'] == start:
|
jpayne@68
|
998 candidate = subpart
|
jpayne@68
|
999 break
|
jpayne@68
|
1000 if candidate is None:
|
jpayne@68
|
1001 subparts = part.get_payload()
|
jpayne@68
|
1002 candidate = subparts[0] if subparts else None
|
jpayne@68
|
1003 if candidate is not None:
|
jpayne@68
|
1004 yield from self._find_body(candidate, preferencelist)
|
jpayne@68
|
1005
|
jpayne@68
|
1006 def get_body(self, preferencelist=('related', 'html', 'plain')):
|
jpayne@68
|
1007 """Return best candidate mime part for display as 'body' of message.
|
jpayne@68
|
1008
|
jpayne@68
|
1009 Do a depth first search, starting with self, looking for the first part
|
jpayne@68
|
1010 matching each of the items in preferencelist, and return the part
|
jpayne@68
|
1011 corresponding to the first item that has a match, or None if no items
|
jpayne@68
|
1012 have a match. If 'related' is not included in preferencelist, consider
|
jpayne@68
|
1013 the root part of any multipart/related encountered as a candidate
|
jpayne@68
|
1014 match. Ignore parts with 'Content-Disposition: attachment'.
|
jpayne@68
|
1015 """
|
jpayne@68
|
1016 best_prio = len(preferencelist)
|
jpayne@68
|
1017 body = None
|
jpayne@68
|
1018 for prio, part in self._find_body(self, preferencelist):
|
jpayne@68
|
1019 if prio < best_prio:
|
jpayne@68
|
1020 best_prio = prio
|
jpayne@68
|
1021 body = part
|
jpayne@68
|
1022 if prio == 0:
|
jpayne@68
|
1023 break
|
jpayne@68
|
1024 return body
|
jpayne@68
|
1025
|
jpayne@68
|
1026 _body_types = {('text', 'plain'),
|
jpayne@68
|
1027 ('text', 'html'),
|
jpayne@68
|
1028 ('multipart', 'related'),
|
jpayne@68
|
1029 ('multipart', 'alternative')}
|
jpayne@68
|
1030 def iter_attachments(self):
|
jpayne@68
|
1031 """Return an iterator over the non-main parts of a multipart.
|
jpayne@68
|
1032
|
jpayne@68
|
1033 Skip the first of each occurrence of text/plain, text/html,
|
jpayne@68
|
1034 multipart/related, or multipart/alternative in the multipart (unless
|
jpayne@68
|
1035 they have a 'Content-Disposition: attachment' header) and include all
|
jpayne@68
|
1036 remaining subparts in the returned iterator. When applied to a
|
jpayne@68
|
1037 multipart/related, return all parts except the root part. Return an
|
jpayne@68
|
1038 empty iterator when applied to a multipart/alternative or a
|
jpayne@68
|
1039 non-multipart.
|
jpayne@68
|
1040 """
|
jpayne@68
|
1041 maintype, subtype = self.get_content_type().split('/')
|
jpayne@68
|
1042 if maintype != 'multipart' or subtype == 'alternative':
|
jpayne@68
|
1043 return
|
jpayne@68
|
1044 payload = self.get_payload()
|
jpayne@68
|
1045 # Certain malformed messages can have content type set to `multipart/*`
|
jpayne@68
|
1046 # but still have single part body, in which case payload.copy() can
|
jpayne@68
|
1047 # fail with AttributeError.
|
jpayne@68
|
1048 try:
|
jpayne@68
|
1049 parts = payload.copy()
|
jpayne@68
|
1050 except AttributeError:
|
jpayne@68
|
1051 # payload is not a list, it is most probably a string.
|
jpayne@68
|
1052 return
|
jpayne@68
|
1053
|
jpayne@68
|
1054 if maintype == 'multipart' and subtype == 'related':
|
jpayne@68
|
1055 # For related, we treat everything but the root as an attachment.
|
jpayne@68
|
1056 # The root may be indicated by 'start'; if there's no start or we
|
jpayne@68
|
1057 # can't find the named start, treat the first subpart as the root.
|
jpayne@68
|
1058 start = self.get_param('start')
|
jpayne@68
|
1059 if start:
|
jpayne@68
|
1060 found = False
|
jpayne@68
|
1061 attachments = []
|
jpayne@68
|
1062 for part in parts:
|
jpayne@68
|
1063 if part.get('content-id') == start:
|
jpayne@68
|
1064 found = True
|
jpayne@68
|
1065 else:
|
jpayne@68
|
1066 attachments.append(part)
|
jpayne@68
|
1067 if found:
|
jpayne@68
|
1068 yield from attachments
|
jpayne@68
|
1069 return
|
jpayne@68
|
1070 parts.pop(0)
|
jpayne@68
|
1071 yield from parts
|
jpayne@68
|
1072 return
|
jpayne@68
|
1073 # Otherwise we more or less invert the remaining logic in get_body.
|
jpayne@68
|
1074 # This only really works in edge cases (ex: non-text related or
|
jpayne@68
|
1075 # alternatives) if the sending agent sets content-disposition.
|
jpayne@68
|
1076 seen = [] # Only skip the first example of each candidate type.
|
jpayne@68
|
1077 for part in parts:
|
jpayne@68
|
1078 maintype, subtype = part.get_content_type().split('/')
|
jpayne@68
|
1079 if ((maintype, subtype) in self._body_types and
|
jpayne@68
|
1080 not part.is_attachment() and subtype not in seen):
|
jpayne@68
|
1081 seen.append(subtype)
|
jpayne@68
|
1082 continue
|
jpayne@68
|
1083 yield part
|
jpayne@68
|
1084
|
jpayne@68
|
1085 def iter_parts(self):
|
jpayne@68
|
1086 """Return an iterator over all immediate subparts of a multipart.
|
jpayne@68
|
1087
|
jpayne@68
|
1088 Return an empty iterator for a non-multipart.
|
jpayne@68
|
1089 """
|
jpayne@68
|
1090 if self.get_content_maintype() == 'multipart':
|
jpayne@68
|
1091 yield from self.get_payload()
|
jpayne@68
|
1092
|
jpayne@68
|
1093 def get_content(self, *args, content_manager=None, **kw):
|
jpayne@68
|
1094 if content_manager is None:
|
jpayne@68
|
1095 content_manager = self.policy.content_manager
|
jpayne@68
|
1096 return content_manager.get_content(self, *args, **kw)
|
jpayne@68
|
1097
|
jpayne@68
|
1098 def set_content(self, *args, content_manager=None, **kw):
|
jpayne@68
|
1099 if content_manager is None:
|
jpayne@68
|
1100 content_manager = self.policy.content_manager
|
jpayne@68
|
1101 content_manager.set_content(self, *args, **kw)
|
jpayne@68
|
1102
|
jpayne@68
|
1103 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
|
jpayne@68
|
1104 if self.get_content_maintype() == 'multipart':
|
jpayne@68
|
1105 existing_subtype = self.get_content_subtype()
|
jpayne@68
|
1106 disallowed_subtypes = disallowed_subtypes + (subtype,)
|
jpayne@68
|
1107 if existing_subtype in disallowed_subtypes:
|
jpayne@68
|
1108 raise ValueError("Cannot convert {} to {}".format(
|
jpayne@68
|
1109 existing_subtype, subtype))
|
jpayne@68
|
1110 keep_headers = []
|
jpayne@68
|
1111 part_headers = []
|
jpayne@68
|
1112 for name, value in self._headers:
|
jpayne@68
|
1113 if name.lower().startswith('content-'):
|
jpayne@68
|
1114 part_headers.append((name, value))
|
jpayne@68
|
1115 else:
|
jpayne@68
|
1116 keep_headers.append((name, value))
|
jpayne@68
|
1117 if part_headers:
|
jpayne@68
|
1118 # There is existing content, move it to the first subpart.
|
jpayne@68
|
1119 part = type(self)(policy=self.policy)
|
jpayne@68
|
1120 part._headers = part_headers
|
jpayne@68
|
1121 part._payload = self._payload
|
jpayne@68
|
1122 self._payload = [part]
|
jpayne@68
|
1123 else:
|
jpayne@68
|
1124 self._payload = []
|
jpayne@68
|
1125 self._headers = keep_headers
|
jpayne@68
|
1126 self['Content-Type'] = 'multipart/' + subtype
|
jpayne@68
|
1127 if boundary is not None:
|
jpayne@68
|
1128 self.set_param('boundary', boundary)
|
jpayne@68
|
1129
|
jpayne@68
|
1130 def make_related(self, boundary=None):
|
jpayne@68
|
1131 self._make_multipart('related', ('alternative', 'mixed'), boundary)
|
jpayne@68
|
1132
|
jpayne@68
|
1133 def make_alternative(self, boundary=None):
|
jpayne@68
|
1134 self._make_multipart('alternative', ('mixed',), boundary)
|
jpayne@68
|
1135
|
jpayne@68
|
1136 def make_mixed(self, boundary=None):
|
jpayne@68
|
1137 self._make_multipart('mixed', (), boundary)
|
jpayne@68
|
1138
|
jpayne@68
|
1139 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
|
jpayne@68
|
1140 if (self.get_content_maintype() != 'multipart' or
|
jpayne@68
|
1141 self.get_content_subtype() != _subtype):
|
jpayne@68
|
1142 getattr(self, 'make_' + _subtype)()
|
jpayne@68
|
1143 part = type(self)(policy=self.policy)
|
jpayne@68
|
1144 part.set_content(*args, **kw)
|
jpayne@68
|
1145 if _disp and 'content-disposition' not in part:
|
jpayne@68
|
1146 part['Content-Disposition'] = _disp
|
jpayne@68
|
1147 self.attach(part)
|
jpayne@68
|
1148
|
jpayne@68
|
1149 def add_related(self, *args, **kw):
|
jpayne@68
|
1150 self._add_multipart('related', *args, _disp='inline', **kw)
|
jpayne@68
|
1151
|
jpayne@68
|
1152 def add_alternative(self, *args, **kw):
|
jpayne@68
|
1153 self._add_multipart('alternative', *args, **kw)
|
jpayne@68
|
1154
|
jpayne@68
|
1155 def add_attachment(self, *args, **kw):
|
jpayne@68
|
1156 self._add_multipart('mixed', *args, _disp='attachment', **kw)
|
jpayne@68
|
1157
|
jpayne@68
|
1158 def clear(self):
|
jpayne@68
|
1159 self._headers = []
|
jpayne@68
|
1160 self._payload = None
|
jpayne@68
|
1161
|
jpayne@68
|
1162 def clear_content(self):
|
jpayne@68
|
1163 self._headers = [(n, v) for n, v in self._headers
|
jpayne@68
|
1164 if not n.lower().startswith('content-')]
|
jpayne@68
|
1165 self._payload = None
|
jpayne@68
|
1166
|
jpayne@68
|
1167
|
jpayne@68
|
1168 class EmailMessage(MIMEPart):
|
jpayne@68
|
1169
|
jpayne@68
|
1170 def set_content(self, *args, **kw):
|
jpayne@68
|
1171 super().set_content(*args, **kw)
|
jpayne@68
|
1172 if 'MIME-Version' not in self:
|
jpayne@68
|
1173 self['MIME-Version'] = '1.0'
|