Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/email/generator.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 # Copyright (C) 2001-2010 Python Software Foundation | |
2 # Author: Barry Warsaw | |
3 # Contact: email-sig@python.org | |
4 | |
5 """Classes to generate plain text from a message object tree.""" | |
6 | |
7 __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] | |
8 | |
9 import re | |
10 import sys | |
11 import time | |
12 import random | |
13 | |
14 from copy import deepcopy | |
15 from io import StringIO, BytesIO | |
16 from email.utils import _has_surrogates | |
17 | |
18 UNDERSCORE = '_' | |
19 NL = '\n' # XXX: no longer used by the code below. | |
20 | |
21 NLCRE = re.compile(r'\r\n|\r|\n') | |
22 fcre = re.compile(r'^From ', re.MULTILINE) | |
23 | |
24 | |
25 | |
26 class Generator: | |
27 """Generates output from a Message object tree. | |
28 | |
29 This basic generator writes the message to the given file object as plain | |
30 text. | |
31 """ | |
32 # | |
33 # Public interface | |
34 # | |
35 | |
36 def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *, | |
37 policy=None): | |
38 """Create the generator for message flattening. | |
39 | |
40 outfp is the output file-like object for writing the message to. It | |
41 must have a write() method. | |
42 | |
43 Optional mangle_from_ is a flag that, when True (the default if policy | |
44 is not set), escapes From_ lines in the body of the message by putting | |
45 a `>' in front of them. | |
46 | |
47 Optional maxheaderlen specifies the longest length for a non-continued | |
48 header. When a header line is longer (in characters, with tabs | |
49 expanded to 8 spaces) than maxheaderlen, the header will split as | |
50 defined in the Header class. Set maxheaderlen to zero to disable | |
51 header wrapping. The default is 78, as recommended (but not required) | |
52 by RFC 2822. | |
53 | |
54 The policy keyword specifies a policy object that controls a number of | |
55 aspects of the generator's operation. If no policy is specified, | |
56 the policy associated with the Message object passed to the | |
57 flatten method is used. | |
58 | |
59 """ | |
60 | |
61 if mangle_from_ is None: | |
62 mangle_from_ = True if policy is None else policy.mangle_from_ | |
63 self._fp = outfp | |
64 self._mangle_from_ = mangle_from_ | |
65 self.maxheaderlen = maxheaderlen | |
66 self.policy = policy | |
67 | |
68 def write(self, s): | |
69 # Just delegate to the file object | |
70 self._fp.write(s) | |
71 | |
72 def flatten(self, msg, unixfrom=False, linesep=None): | |
73 r"""Print the message object tree rooted at msg to the output file | |
74 specified when the Generator instance was created. | |
75 | |
76 unixfrom is a flag that forces the printing of a Unix From_ delimiter | |
77 before the first object in the message tree. If the original message | |
78 has no From_ delimiter, a `standard' one is crafted. By default, this | |
79 is False to inhibit the printing of any From_ delimiter. | |
80 | |
81 Note that for subobjects, no From_ line is printed. | |
82 | |
83 linesep specifies the characters used to indicate a new line in | |
84 the output. The default value is determined by the policy specified | |
85 when the Generator instance was created or, if none was specified, | |
86 from the policy associated with the msg. | |
87 | |
88 """ | |
89 # We use the _XXX constants for operating on data that comes directly | |
90 # from the msg, and _encoded_XXX constants for operating on data that | |
91 # has already been converted (to bytes in the BytesGenerator) and | |
92 # inserted into a temporary buffer. | |
93 policy = msg.policy if self.policy is None else self.policy | |
94 if linesep is not None: | |
95 policy = policy.clone(linesep=linesep) | |
96 if self.maxheaderlen is not None: | |
97 policy = policy.clone(max_line_length=self.maxheaderlen) | |
98 self._NL = policy.linesep | |
99 self._encoded_NL = self._encode(self._NL) | |
100 self._EMPTY = '' | |
101 self._encoded_EMPTY = self._encode(self._EMPTY) | |
102 # Because we use clone (below) when we recursively process message | |
103 # subparts, and because clone uses the computed policy (not None), | |
104 # submessages will automatically get set to the computed policy when | |
105 # they are processed by this code. | |
106 old_gen_policy = self.policy | |
107 old_msg_policy = msg.policy | |
108 try: | |
109 self.policy = policy | |
110 msg.policy = policy | |
111 if unixfrom: | |
112 ufrom = msg.get_unixfrom() | |
113 if not ufrom: | |
114 ufrom = 'From nobody ' + time.ctime(time.time()) | |
115 self.write(ufrom + self._NL) | |
116 self._write(msg) | |
117 finally: | |
118 self.policy = old_gen_policy | |
119 msg.policy = old_msg_policy | |
120 | |
121 def clone(self, fp): | |
122 """Clone this generator with the exact same options.""" | |
123 return self.__class__(fp, | |
124 self._mangle_from_, | |
125 None, # Use policy setting, which we've adjusted | |
126 policy=self.policy) | |
127 | |
128 # | |
129 # Protected interface - undocumented ;/ | |
130 # | |
131 | |
132 # Note that we use 'self.write' when what we are writing is coming from | |
133 # the source, and self._fp.write when what we are writing is coming from a | |
134 # buffer (because the Bytes subclass has already had a chance to transform | |
135 # the data in its write method in that case). This is an entirely | |
136 # pragmatic split determined by experiment; we could be more general by | |
137 # always using write and having the Bytes subclass write method detect when | |
138 # it has already transformed the input; but, since this whole thing is a | |
139 # hack anyway this seems good enough. | |
140 | |
141 def _new_buffer(self): | |
142 # BytesGenerator overrides this to return BytesIO. | |
143 return StringIO() | |
144 | |
145 def _encode(self, s): | |
146 # BytesGenerator overrides this to encode strings to bytes. | |
147 return s | |
148 | |
149 def _write_lines(self, lines): | |
150 # We have to transform the line endings. | |
151 if not lines: | |
152 return | |
153 lines = NLCRE.split(lines) | |
154 for line in lines[:-1]: | |
155 self.write(line) | |
156 self.write(self._NL) | |
157 if lines[-1]: | |
158 self.write(lines[-1]) | |
159 # XXX logic tells me this else should be needed, but the tests fail | |
160 # with it and pass without it. (NLCRE.split ends with a blank element | |
161 # if and only if there was a trailing newline.) | |
162 #else: | |
163 # self.write(self._NL) | |
164 | |
165 def _write(self, msg): | |
166 # We can't write the headers yet because of the following scenario: | |
167 # say a multipart message includes the boundary string somewhere in | |
168 # its body. We'd have to calculate the new boundary /before/ we write | |
169 # the headers so that we can write the correct Content-Type: | |
170 # parameter. | |
171 # | |
172 # The way we do this, so as to make the _handle_*() methods simpler, | |
173 # is to cache any subpart writes into a buffer. The we write the | |
174 # headers and the buffer contents. That way, subpart handlers can | |
175 # Do The Right Thing, and can still modify the Content-Type: header if | |
176 # necessary. | |
177 oldfp = self._fp | |
178 try: | |
179 self._munge_cte = None | |
180 self._fp = sfp = self._new_buffer() | |
181 self._dispatch(msg) | |
182 finally: | |
183 self._fp = oldfp | |
184 munge_cte = self._munge_cte | |
185 del self._munge_cte | |
186 # If we munged the cte, copy the message again and re-fix the CTE. | |
187 if munge_cte: | |
188 msg = deepcopy(msg) | |
189 msg.replace_header('content-transfer-encoding', munge_cte[0]) | |
190 msg.replace_header('content-type', munge_cte[1]) | |
191 # Write the headers. First we see if the message object wants to | |
192 # handle that itself. If not, we'll do it generically. | |
193 meth = getattr(msg, '_write_headers', None) | |
194 if meth is None: | |
195 self._write_headers(msg) | |
196 else: | |
197 meth(self) | |
198 self._fp.write(sfp.getvalue()) | |
199 | |
200 def _dispatch(self, msg): | |
201 # Get the Content-Type: for the message, then try to dispatch to | |
202 # self._handle_<maintype>_<subtype>(). If there's no handler for the | |
203 # full MIME type, then dispatch to self._handle_<maintype>(). If | |
204 # that's missing too, then dispatch to self._writeBody(). | |
205 main = msg.get_content_maintype() | |
206 sub = msg.get_content_subtype() | |
207 specific = UNDERSCORE.join((main, sub)).replace('-', '_') | |
208 meth = getattr(self, '_handle_' + specific, None) | |
209 if meth is None: | |
210 generic = main.replace('-', '_') | |
211 meth = getattr(self, '_handle_' + generic, None) | |
212 if meth is None: | |
213 meth = self._writeBody | |
214 meth(msg) | |
215 | |
216 # | |
217 # Default handlers | |
218 # | |
219 | |
220 def _write_headers(self, msg): | |
221 for h, v in msg.raw_items(): | |
222 self.write(self.policy.fold(h, v)) | |
223 # A blank line always separates headers from body | |
224 self.write(self._NL) | |
225 | |
226 # | |
227 # Handlers for writing types and subtypes | |
228 # | |
229 | |
230 def _handle_text(self, msg): | |
231 payload = msg.get_payload() | |
232 if payload is None: | |
233 return | |
234 if not isinstance(payload, str): | |
235 raise TypeError('string payload expected: %s' % type(payload)) | |
236 if _has_surrogates(msg._payload): | |
237 charset = msg.get_param('charset') | |
238 if charset is not None: | |
239 # XXX: This copy stuff is an ugly hack to avoid modifying the | |
240 # existing message. | |
241 msg = deepcopy(msg) | |
242 del msg['content-transfer-encoding'] | |
243 msg.set_payload(payload, charset) | |
244 payload = msg.get_payload() | |
245 self._munge_cte = (msg['content-transfer-encoding'], | |
246 msg['content-type']) | |
247 if self._mangle_from_: | |
248 payload = fcre.sub('>From ', payload) | |
249 self._write_lines(payload) | |
250 | |
251 # Default body handler | |
252 _writeBody = _handle_text | |
253 | |
254 def _handle_multipart(self, msg): | |
255 # The trick here is to write out each part separately, merge them all | |
256 # together, and then make sure that the boundary we've chosen isn't | |
257 # present in the payload. | |
258 msgtexts = [] | |
259 subparts = msg.get_payload() | |
260 if subparts is None: | |
261 subparts = [] | |
262 elif isinstance(subparts, str): | |
263 # e.g. a non-strict parse of a message with no starting boundary. | |
264 self.write(subparts) | |
265 return | |
266 elif not isinstance(subparts, list): | |
267 # Scalar payload | |
268 subparts = [subparts] | |
269 for part in subparts: | |
270 s = self._new_buffer() | |
271 g = self.clone(s) | |
272 g.flatten(part, unixfrom=False, linesep=self._NL) | |
273 msgtexts.append(s.getvalue()) | |
274 # BAW: What about boundaries that are wrapped in double-quotes? | |
275 boundary = msg.get_boundary() | |
276 if not boundary: | |
277 # Create a boundary that doesn't appear in any of the | |
278 # message texts. | |
279 alltext = self._encoded_NL.join(msgtexts) | |
280 boundary = self._make_boundary(alltext) | |
281 msg.set_boundary(boundary) | |
282 # If there's a preamble, write it out, with a trailing CRLF | |
283 if msg.preamble is not None: | |
284 if self._mangle_from_: | |
285 preamble = fcre.sub('>From ', msg.preamble) | |
286 else: | |
287 preamble = msg.preamble | |
288 self._write_lines(preamble) | |
289 self.write(self._NL) | |
290 # dash-boundary transport-padding CRLF | |
291 self.write('--' + boundary + self._NL) | |
292 # body-part | |
293 if msgtexts: | |
294 self._fp.write(msgtexts.pop(0)) | |
295 # *encapsulation | |
296 # --> delimiter transport-padding | |
297 # --> CRLF body-part | |
298 for body_part in msgtexts: | |
299 # delimiter transport-padding CRLF | |
300 self.write(self._NL + '--' + boundary + self._NL) | |
301 # body-part | |
302 self._fp.write(body_part) | |
303 # close-delimiter transport-padding | |
304 self.write(self._NL + '--' + boundary + '--' + self._NL) | |
305 if msg.epilogue is not None: | |
306 if self._mangle_from_: | |
307 epilogue = fcre.sub('>From ', msg.epilogue) | |
308 else: | |
309 epilogue = msg.epilogue | |
310 self._write_lines(epilogue) | |
311 | |
312 def _handle_multipart_signed(self, msg): | |
313 # The contents of signed parts has to stay unmodified in order to keep | |
314 # the signature intact per RFC1847 2.1, so we disable header wrapping. | |
315 # RDM: This isn't enough to completely preserve the part, but it helps. | |
316 p = self.policy | |
317 self.policy = p.clone(max_line_length=0) | |
318 try: | |
319 self._handle_multipart(msg) | |
320 finally: | |
321 self.policy = p | |
322 | |
323 def _handle_message_delivery_status(self, msg): | |
324 # We can't just write the headers directly to self's file object | |
325 # because this will leave an extra newline between the last header | |
326 # block and the boundary. Sigh. | |
327 blocks = [] | |
328 for part in msg.get_payload(): | |
329 s = self._new_buffer() | |
330 g = self.clone(s) | |
331 g.flatten(part, unixfrom=False, linesep=self._NL) | |
332 text = s.getvalue() | |
333 lines = text.split(self._encoded_NL) | |
334 # Strip off the unnecessary trailing empty line | |
335 if lines and lines[-1] == self._encoded_EMPTY: | |
336 blocks.append(self._encoded_NL.join(lines[:-1])) | |
337 else: | |
338 blocks.append(text) | |
339 # Now join all the blocks with an empty line. This has the lovely | |
340 # effect of separating each block with an empty line, but not adding | |
341 # an extra one after the last one. | |
342 self._fp.write(self._encoded_NL.join(blocks)) | |
343 | |
344 def _handle_message(self, msg): | |
345 s = self._new_buffer() | |
346 g = self.clone(s) | |
347 # The payload of a message/rfc822 part should be a multipart sequence | |
348 # of length 1. The zeroth element of the list should be the Message | |
349 # object for the subpart. Extract that object, stringify it, and | |
350 # write it out. | |
351 # Except, it turns out, when it's a string instead, which happens when | |
352 # and only when HeaderParser is used on a message of mime type | |
353 # message/rfc822. Such messages are generated by, for example, | |
354 # Groupwise when forwarding unadorned messages. (Issue 7970.) So | |
355 # in that case we just emit the string body. | |
356 payload = msg._payload | |
357 if isinstance(payload, list): | |
358 g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) | |
359 payload = s.getvalue() | |
360 else: | |
361 payload = self._encode(payload) | |
362 self._fp.write(payload) | |
363 | |
364 # This used to be a module level function; we use a classmethod for this | |
365 # and _compile_re so we can continue to provide the module level function | |
366 # for backward compatibility by doing | |
367 # _make_boundary = Generator._make_boundary | |
368 # at the end of the module. It *is* internal, so we could drop that... | |
369 @classmethod | |
370 def _make_boundary(cls, text=None): | |
371 # Craft a random boundary. If text is given, ensure that the chosen | |
372 # boundary doesn't appear in the text. | |
373 token = random.randrange(sys.maxsize) | |
374 boundary = ('=' * 15) + (_fmt % token) + '==' | |
375 if text is None: | |
376 return boundary | |
377 b = boundary | |
378 counter = 0 | |
379 while True: | |
380 cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) | |
381 if not cre.search(text): | |
382 break | |
383 b = boundary + '.' + str(counter) | |
384 counter += 1 | |
385 return b | |
386 | |
387 @classmethod | |
388 def _compile_re(cls, s, flags): | |
389 return re.compile(s, flags) | |
390 | |
391 | |
392 class BytesGenerator(Generator): | |
393 """Generates a bytes version of a Message object tree. | |
394 | |
395 Functionally identical to the base Generator except that the output is | |
396 bytes and not string. When surrogates were used in the input to encode | |
397 bytes, these are decoded back to bytes for output. If the policy has | |
398 cte_type set to 7bit, then the message is transformed such that the | |
399 non-ASCII bytes are properly content transfer encoded, using the charset | |
400 unknown-8bit. | |
401 | |
402 The outfp object must accept bytes in its write method. | |
403 """ | |
404 | |
405 def write(self, s): | |
406 self._fp.write(s.encode('ascii', 'surrogateescape')) | |
407 | |
408 def _new_buffer(self): | |
409 return BytesIO() | |
410 | |
411 def _encode(self, s): | |
412 return s.encode('ascii') | |
413 | |
414 def _write_headers(self, msg): | |
415 # This is almost the same as the string version, except for handling | |
416 # strings with 8bit bytes. | |
417 for h, v in msg.raw_items(): | |
418 self._fp.write(self.policy.fold_binary(h, v)) | |
419 # A blank line always separates headers from body | |
420 self.write(self._NL) | |
421 | |
422 def _handle_text(self, msg): | |
423 # If the string has surrogates the original source was bytes, so | |
424 # just write it back out. | |
425 if msg._payload is None: | |
426 return | |
427 if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': | |
428 if self._mangle_from_: | |
429 msg._payload = fcre.sub(">From ", msg._payload) | |
430 self._write_lines(msg._payload) | |
431 else: | |
432 super(BytesGenerator,self)._handle_text(msg) | |
433 | |
434 # Default body handler | |
435 _writeBody = _handle_text | |
436 | |
437 @classmethod | |
438 def _compile_re(cls, s, flags): | |
439 return re.compile(s.encode('ascii'), flags) | |
440 | |
441 | |
442 | |
443 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' | |
444 | |
445 class DecodedGenerator(Generator): | |
446 """Generates a text representation of a message. | |
447 | |
448 Like the Generator base class, except that non-text parts are substituted | |
449 with a format string representing the part. | |
450 """ | |
451 def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *, | |
452 policy=None): | |
453 """Like Generator.__init__() except that an additional optional | |
454 argument is allowed. | |
455 | |
456 Walks through all subparts of a message. If the subpart is of main | |
457 type `text', then it prints the decoded payload of the subpart. | |
458 | |
459 Otherwise, fmt is a format string that is used instead of the message | |
460 payload. fmt is expanded with the following keywords (in | |
461 %(keyword)s format): | |
462 | |
463 type : Full MIME type of the non-text part | |
464 maintype : Main MIME type of the non-text part | |
465 subtype : Sub-MIME type of the non-text part | |
466 filename : Filename of the non-text part | |
467 description: Description associated with the non-text part | |
468 encoding : Content transfer encoding of the non-text part | |
469 | |
470 The default value for fmt is None, meaning | |
471 | |
472 [Non-text (%(type)s) part of message omitted, filename %(filename)s] | |
473 """ | |
474 Generator.__init__(self, outfp, mangle_from_, maxheaderlen, | |
475 policy=policy) | |
476 if fmt is None: | |
477 self._fmt = _FMT | |
478 else: | |
479 self._fmt = fmt | |
480 | |
481 def _dispatch(self, msg): | |
482 for part in msg.walk(): | |
483 maintype = part.get_content_maintype() | |
484 if maintype == 'text': | |
485 print(part.get_payload(decode=False), file=self) | |
486 elif maintype == 'multipart': | |
487 # Just skip this | |
488 pass | |
489 else: | |
490 print(self._fmt % { | |
491 'type' : part.get_content_type(), | |
492 'maintype' : part.get_content_maintype(), | |
493 'subtype' : part.get_content_subtype(), | |
494 'filename' : part.get_filename('[no filename]'), | |
495 'description': part.get('Content-Description', | |
496 '[no description]'), | |
497 'encoding' : part.get('Content-Transfer-Encoding', | |
498 '[no encoding]'), | |
499 }, file=self) | |
500 | |
501 | |
502 | |
503 # Helper used by Generator._make_boundary | |
504 _width = len(repr(sys.maxsize-1)) | |
505 _fmt = '%%0%dd' % _width | |
506 | |
507 # Backward compatibility | |
508 _make_boundary = Generator._make_boundary |