jpayne@68: # Copyright (C) 2001-2010 Python Software Foundation jpayne@68: # Author: Barry Warsaw jpayne@68: # Contact: email-sig@python.org jpayne@68: jpayne@68: """Classes to generate plain text from a message object tree.""" jpayne@68: jpayne@68: __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] jpayne@68: jpayne@68: import re jpayne@68: import sys jpayne@68: import time jpayne@68: import random jpayne@68: jpayne@68: from copy import deepcopy jpayne@68: from io import StringIO, BytesIO jpayne@68: from email.utils import _has_surrogates jpayne@68: jpayne@68: UNDERSCORE = '_' jpayne@68: NL = '\n' # XXX: no longer used by the code below. jpayne@68: jpayne@68: NLCRE = re.compile(r'\r\n|\r|\n') jpayne@68: fcre = re.compile(r'^From ', re.MULTILINE) jpayne@68: jpayne@68: jpayne@68: jpayne@68: class Generator: jpayne@68: """Generates output from a Message object tree. jpayne@68: jpayne@68: This basic generator writes the message to the given file object as plain jpayne@68: text. jpayne@68: """ jpayne@68: # jpayne@68: # Public interface jpayne@68: # jpayne@68: jpayne@68: def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *, jpayne@68: policy=None): jpayne@68: """Create the generator for message flattening. jpayne@68: jpayne@68: outfp is the output file-like object for writing the message to. It jpayne@68: must have a write() method. jpayne@68: jpayne@68: Optional mangle_from_ is a flag that, when True (the default if policy jpayne@68: is not set), escapes From_ lines in the body of the message by putting jpayne@68: a `>' in front of them. jpayne@68: jpayne@68: Optional maxheaderlen specifies the longest length for a non-continued jpayne@68: header. When a header line is longer (in characters, with tabs jpayne@68: expanded to 8 spaces) than maxheaderlen, the header will split as jpayne@68: defined in the Header class. Set maxheaderlen to zero to disable jpayne@68: header wrapping. The default is 78, as recommended (but not required) jpayne@68: by RFC 2822. jpayne@68: jpayne@68: The policy keyword specifies a policy object that controls a number of jpayne@68: aspects of the generator's operation. If no policy is specified, jpayne@68: the policy associated with the Message object passed to the jpayne@68: flatten method is used. jpayne@68: jpayne@68: """ jpayne@68: jpayne@68: if mangle_from_ is None: jpayne@68: mangle_from_ = True if policy is None else policy.mangle_from_ jpayne@68: self._fp = outfp jpayne@68: self._mangle_from_ = mangle_from_ jpayne@68: self.maxheaderlen = maxheaderlen jpayne@68: self.policy = policy jpayne@68: jpayne@68: def write(self, s): jpayne@68: # Just delegate to the file object jpayne@68: self._fp.write(s) jpayne@68: jpayne@68: def flatten(self, msg, unixfrom=False, linesep=None): jpayne@68: r"""Print the message object tree rooted at msg to the output file jpayne@68: specified when the Generator instance was created. jpayne@68: jpayne@68: unixfrom is a flag that forces the printing of a Unix From_ delimiter jpayne@68: before the first object in the message tree. If the original message jpayne@68: has no From_ delimiter, a `standard' one is crafted. By default, this jpayne@68: is False to inhibit the printing of any From_ delimiter. jpayne@68: jpayne@68: Note that for subobjects, no From_ line is printed. jpayne@68: jpayne@68: linesep specifies the characters used to indicate a new line in jpayne@68: the output. The default value is determined by the policy specified jpayne@68: when the Generator instance was created or, if none was specified, jpayne@68: from the policy associated with the msg. jpayne@68: jpayne@68: """ jpayne@68: # We use the _XXX constants for operating on data that comes directly jpayne@68: # from the msg, and _encoded_XXX constants for operating on data that jpayne@68: # has already been converted (to bytes in the BytesGenerator) and jpayne@68: # inserted into a temporary buffer. jpayne@68: policy = msg.policy if self.policy is None else self.policy jpayne@68: if linesep is not None: jpayne@68: policy = policy.clone(linesep=linesep) jpayne@68: if self.maxheaderlen is not None: jpayne@68: policy = policy.clone(max_line_length=self.maxheaderlen) jpayne@68: self._NL = policy.linesep jpayne@68: self._encoded_NL = self._encode(self._NL) jpayne@68: self._EMPTY = '' jpayne@68: self._encoded_EMPTY = self._encode(self._EMPTY) jpayne@68: # Because we use clone (below) when we recursively process message jpayne@68: # subparts, and because clone uses the computed policy (not None), jpayne@68: # submessages will automatically get set to the computed policy when jpayne@68: # they are processed by this code. jpayne@68: old_gen_policy = self.policy jpayne@68: old_msg_policy = msg.policy jpayne@68: try: jpayne@68: self.policy = policy jpayne@68: msg.policy = policy jpayne@68: if unixfrom: jpayne@68: ufrom = msg.get_unixfrom() jpayne@68: if not ufrom: jpayne@68: ufrom = 'From nobody ' + time.ctime(time.time()) jpayne@68: self.write(ufrom + self._NL) jpayne@68: self._write(msg) jpayne@68: finally: jpayne@68: self.policy = old_gen_policy jpayne@68: msg.policy = old_msg_policy jpayne@68: jpayne@68: def clone(self, fp): jpayne@68: """Clone this generator with the exact same options.""" jpayne@68: return self.__class__(fp, jpayne@68: self._mangle_from_, jpayne@68: None, # Use policy setting, which we've adjusted jpayne@68: policy=self.policy) jpayne@68: jpayne@68: # jpayne@68: # Protected interface - undocumented ;/ jpayne@68: # jpayne@68: jpayne@68: # Note that we use 'self.write' when what we are writing is coming from jpayne@68: # the source, and self._fp.write when what we are writing is coming from a jpayne@68: # buffer (because the Bytes subclass has already had a chance to transform jpayne@68: # the data in its write method in that case). This is an entirely jpayne@68: # pragmatic split determined by experiment; we could be more general by jpayne@68: # always using write and having the Bytes subclass write method detect when jpayne@68: # it has already transformed the input; but, since this whole thing is a jpayne@68: # hack anyway this seems good enough. jpayne@68: jpayne@68: def _new_buffer(self): jpayne@68: # BytesGenerator overrides this to return BytesIO. jpayne@68: return StringIO() jpayne@68: jpayne@68: def _encode(self, s): jpayne@68: # BytesGenerator overrides this to encode strings to bytes. jpayne@68: return s jpayne@68: jpayne@68: def _write_lines(self, lines): jpayne@68: # We have to transform the line endings. jpayne@68: if not lines: jpayne@68: return jpayne@68: lines = NLCRE.split(lines) jpayne@68: for line in lines[:-1]: jpayne@68: self.write(line) jpayne@68: self.write(self._NL) jpayne@68: if lines[-1]: jpayne@68: self.write(lines[-1]) jpayne@68: # XXX logic tells me this else should be needed, but the tests fail jpayne@68: # with it and pass without it. (NLCRE.split ends with a blank element jpayne@68: # if and only if there was a trailing newline.) jpayne@68: #else: jpayne@68: # self.write(self._NL) jpayne@68: jpayne@68: def _write(self, msg): jpayne@68: # We can't write the headers yet because of the following scenario: jpayne@68: # say a multipart message includes the boundary string somewhere in jpayne@68: # its body. We'd have to calculate the new boundary /before/ we write jpayne@68: # the headers so that we can write the correct Content-Type: jpayne@68: # parameter. jpayne@68: # jpayne@68: # The way we do this, so as to make the _handle_*() methods simpler, jpayne@68: # is to cache any subpart writes into a buffer. The we write the jpayne@68: # headers and the buffer contents. That way, subpart handlers can jpayne@68: # Do The Right Thing, and can still modify the Content-Type: header if jpayne@68: # necessary. jpayne@68: oldfp = self._fp jpayne@68: try: jpayne@68: self._munge_cte = None jpayne@68: self._fp = sfp = self._new_buffer() jpayne@68: self._dispatch(msg) jpayne@68: finally: jpayne@68: self._fp = oldfp jpayne@68: munge_cte = self._munge_cte jpayne@68: del self._munge_cte jpayne@68: # If we munged the cte, copy the message again and re-fix the CTE. jpayne@68: if munge_cte: jpayne@68: msg = deepcopy(msg) jpayne@68: msg.replace_header('content-transfer-encoding', munge_cte[0]) jpayne@68: msg.replace_header('content-type', munge_cte[1]) jpayne@68: # Write the headers. First we see if the message object wants to jpayne@68: # handle that itself. If not, we'll do it generically. jpayne@68: meth = getattr(msg, '_write_headers', None) jpayne@68: if meth is None: jpayne@68: self._write_headers(msg) jpayne@68: else: jpayne@68: meth(self) jpayne@68: self._fp.write(sfp.getvalue()) jpayne@68: jpayne@68: def _dispatch(self, msg): jpayne@68: # Get the Content-Type: for the message, then try to dispatch to jpayne@68: # self._handle__(). If there's no handler for the jpayne@68: # full MIME type, then dispatch to self._handle_(). If jpayne@68: # that's missing too, then dispatch to self._writeBody(). jpayne@68: main = msg.get_content_maintype() jpayne@68: sub = msg.get_content_subtype() jpayne@68: specific = UNDERSCORE.join((main, sub)).replace('-', '_') jpayne@68: meth = getattr(self, '_handle_' + specific, None) jpayne@68: if meth is None: jpayne@68: generic = main.replace('-', '_') jpayne@68: meth = getattr(self, '_handle_' + generic, None) jpayne@68: if meth is None: jpayne@68: meth = self._writeBody jpayne@68: meth(msg) jpayne@68: jpayne@68: # jpayne@68: # Default handlers jpayne@68: # jpayne@68: jpayne@68: def _write_headers(self, msg): jpayne@68: for h, v in msg.raw_items(): jpayne@68: self.write(self.policy.fold(h, v)) jpayne@68: # A blank line always separates headers from body jpayne@68: self.write(self._NL) jpayne@68: jpayne@68: # jpayne@68: # Handlers for writing types and subtypes jpayne@68: # jpayne@68: jpayne@68: def _handle_text(self, msg): jpayne@68: payload = msg.get_payload() jpayne@68: if payload is None: jpayne@68: return jpayne@68: if not isinstance(payload, str): jpayne@68: raise TypeError('string payload expected: %s' % type(payload)) jpayne@68: if _has_surrogates(msg._payload): jpayne@68: charset = msg.get_param('charset') jpayne@68: if charset is not None: jpayne@68: # XXX: This copy stuff is an ugly hack to avoid modifying the jpayne@68: # existing message. jpayne@68: msg = deepcopy(msg) jpayne@68: del msg['content-transfer-encoding'] jpayne@68: msg.set_payload(payload, charset) jpayne@68: payload = msg.get_payload() jpayne@68: self._munge_cte = (msg['content-transfer-encoding'], jpayne@68: msg['content-type']) jpayne@68: if self._mangle_from_: jpayne@68: payload = fcre.sub('>From ', payload) jpayne@68: self._write_lines(payload) jpayne@68: jpayne@68: # Default body handler jpayne@68: _writeBody = _handle_text jpayne@68: jpayne@68: def _handle_multipart(self, msg): jpayne@68: # The trick here is to write out each part separately, merge them all jpayne@68: # together, and then make sure that the boundary we've chosen isn't jpayne@68: # present in the payload. jpayne@68: msgtexts = [] jpayne@68: subparts = msg.get_payload() jpayne@68: if subparts is None: jpayne@68: subparts = [] jpayne@68: elif isinstance(subparts, str): jpayne@68: # e.g. a non-strict parse of a message with no starting boundary. jpayne@68: self.write(subparts) jpayne@68: return jpayne@68: elif not isinstance(subparts, list): jpayne@68: # Scalar payload jpayne@68: subparts = [subparts] jpayne@68: for part in subparts: jpayne@68: s = self._new_buffer() jpayne@68: g = self.clone(s) jpayne@68: g.flatten(part, unixfrom=False, linesep=self._NL) jpayne@68: msgtexts.append(s.getvalue()) jpayne@68: # BAW: What about boundaries that are wrapped in double-quotes? jpayne@68: boundary = msg.get_boundary() jpayne@68: if not boundary: jpayne@68: # Create a boundary that doesn't appear in any of the jpayne@68: # message texts. jpayne@68: alltext = self._encoded_NL.join(msgtexts) jpayne@68: boundary = self._make_boundary(alltext) jpayne@68: msg.set_boundary(boundary) jpayne@68: # If there's a preamble, write it out, with a trailing CRLF jpayne@68: if msg.preamble is not None: jpayne@68: if self._mangle_from_: jpayne@68: preamble = fcre.sub('>From ', msg.preamble) jpayne@68: else: jpayne@68: preamble = msg.preamble jpayne@68: self._write_lines(preamble) jpayne@68: self.write(self._NL) jpayne@68: # dash-boundary transport-padding CRLF jpayne@68: self.write('--' + boundary + self._NL) jpayne@68: # body-part jpayne@68: if msgtexts: jpayne@68: self._fp.write(msgtexts.pop(0)) jpayne@68: # *encapsulation jpayne@68: # --> delimiter transport-padding jpayne@68: # --> CRLF body-part jpayne@68: for body_part in msgtexts: jpayne@68: # delimiter transport-padding CRLF jpayne@68: self.write(self._NL + '--' + boundary + self._NL) jpayne@68: # body-part jpayne@68: self._fp.write(body_part) jpayne@68: # close-delimiter transport-padding jpayne@68: self.write(self._NL + '--' + boundary + '--' + self._NL) jpayne@68: if msg.epilogue is not None: jpayne@68: if self._mangle_from_: jpayne@68: epilogue = fcre.sub('>From ', msg.epilogue) jpayne@68: else: jpayne@68: epilogue = msg.epilogue jpayne@68: self._write_lines(epilogue) jpayne@68: jpayne@68: def _handle_multipart_signed(self, msg): jpayne@68: # The contents of signed parts has to stay unmodified in order to keep jpayne@68: # the signature intact per RFC1847 2.1, so we disable header wrapping. jpayne@68: # RDM: This isn't enough to completely preserve the part, but it helps. jpayne@68: p = self.policy jpayne@68: self.policy = p.clone(max_line_length=0) jpayne@68: try: jpayne@68: self._handle_multipart(msg) jpayne@68: finally: jpayne@68: self.policy = p jpayne@68: jpayne@68: def _handle_message_delivery_status(self, msg): jpayne@68: # We can't just write the headers directly to self's file object jpayne@68: # because this will leave an extra newline between the last header jpayne@68: # block and the boundary. Sigh. jpayne@68: blocks = [] jpayne@68: for part in msg.get_payload(): jpayne@68: s = self._new_buffer() jpayne@68: g = self.clone(s) jpayne@68: g.flatten(part, unixfrom=False, linesep=self._NL) jpayne@68: text = s.getvalue() jpayne@68: lines = text.split(self._encoded_NL) jpayne@68: # Strip off the unnecessary trailing empty line jpayne@68: if lines and lines[-1] == self._encoded_EMPTY: jpayne@68: blocks.append(self._encoded_NL.join(lines[:-1])) jpayne@68: else: jpayne@68: blocks.append(text) jpayne@68: # Now join all the blocks with an empty line. This has the lovely jpayne@68: # effect of separating each block with an empty line, but not adding jpayne@68: # an extra one after the last one. jpayne@68: self._fp.write(self._encoded_NL.join(blocks)) jpayne@68: jpayne@68: def _handle_message(self, msg): jpayne@68: s = self._new_buffer() jpayne@68: g = self.clone(s) jpayne@68: # The payload of a message/rfc822 part should be a multipart sequence jpayne@68: # of length 1. The zeroth element of the list should be the Message jpayne@68: # object for the subpart. Extract that object, stringify it, and jpayne@68: # write it out. jpayne@68: # Except, it turns out, when it's a string instead, which happens when jpayne@68: # and only when HeaderParser is used on a message of mime type jpayne@68: # message/rfc822. Such messages are generated by, for example, jpayne@68: # Groupwise when forwarding unadorned messages. (Issue 7970.) So jpayne@68: # in that case we just emit the string body. jpayne@68: payload = msg._payload jpayne@68: if isinstance(payload, list): jpayne@68: g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) jpayne@68: payload = s.getvalue() jpayne@68: else: jpayne@68: payload = self._encode(payload) jpayne@68: self._fp.write(payload) jpayne@68: jpayne@68: # This used to be a module level function; we use a classmethod for this jpayne@68: # and _compile_re so we can continue to provide the module level function jpayne@68: # for backward compatibility by doing jpayne@68: # _make_boundary = Generator._make_boundary jpayne@68: # at the end of the module. It *is* internal, so we could drop that... jpayne@68: @classmethod jpayne@68: def _make_boundary(cls, text=None): jpayne@68: # Craft a random boundary. If text is given, ensure that the chosen jpayne@68: # boundary doesn't appear in the text. jpayne@68: token = random.randrange(sys.maxsize) jpayne@68: boundary = ('=' * 15) + (_fmt % token) + '==' jpayne@68: if text is None: jpayne@68: return boundary jpayne@68: b = boundary jpayne@68: counter = 0 jpayne@68: while True: jpayne@68: cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) jpayne@68: if not cre.search(text): jpayne@68: break jpayne@68: b = boundary + '.' + str(counter) jpayne@68: counter += 1 jpayne@68: return b jpayne@68: jpayne@68: @classmethod jpayne@68: def _compile_re(cls, s, flags): jpayne@68: return re.compile(s, flags) jpayne@68: jpayne@68: jpayne@68: class BytesGenerator(Generator): jpayne@68: """Generates a bytes version of a Message object tree. jpayne@68: jpayne@68: Functionally identical to the base Generator except that the output is jpayne@68: bytes and not string. When surrogates were used in the input to encode jpayne@68: bytes, these are decoded back to bytes for output. If the policy has jpayne@68: cte_type set to 7bit, then the message is transformed such that the jpayne@68: non-ASCII bytes are properly content transfer encoded, using the charset jpayne@68: unknown-8bit. jpayne@68: jpayne@68: The outfp object must accept bytes in its write method. jpayne@68: """ jpayne@68: jpayne@68: def write(self, s): jpayne@68: self._fp.write(s.encode('ascii', 'surrogateescape')) jpayne@68: jpayne@68: def _new_buffer(self): jpayne@68: return BytesIO() jpayne@68: jpayne@68: def _encode(self, s): jpayne@68: return s.encode('ascii') jpayne@68: jpayne@68: def _write_headers(self, msg): jpayne@68: # This is almost the same as the string version, except for handling jpayne@68: # strings with 8bit bytes. jpayne@68: for h, v in msg.raw_items(): jpayne@68: self._fp.write(self.policy.fold_binary(h, v)) jpayne@68: # A blank line always separates headers from body jpayne@68: self.write(self._NL) jpayne@68: jpayne@68: def _handle_text(self, msg): jpayne@68: # If the string has surrogates the original source was bytes, so jpayne@68: # just write it back out. jpayne@68: if msg._payload is None: jpayne@68: return jpayne@68: if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': jpayne@68: if self._mangle_from_: jpayne@68: msg._payload = fcre.sub(">From ", msg._payload) jpayne@68: self._write_lines(msg._payload) jpayne@68: else: jpayne@68: super(BytesGenerator,self)._handle_text(msg) jpayne@68: jpayne@68: # Default body handler jpayne@68: _writeBody = _handle_text jpayne@68: jpayne@68: @classmethod jpayne@68: def _compile_re(cls, s, flags): jpayne@68: return re.compile(s.encode('ascii'), flags) jpayne@68: jpayne@68: jpayne@68: jpayne@68: _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' jpayne@68: jpayne@68: class DecodedGenerator(Generator): jpayne@68: """Generates a text representation of a message. jpayne@68: jpayne@68: Like the Generator base class, except that non-text parts are substituted jpayne@68: with a format string representing the part. jpayne@68: """ jpayne@68: def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *, jpayne@68: policy=None): jpayne@68: """Like Generator.__init__() except that an additional optional jpayne@68: argument is allowed. jpayne@68: jpayne@68: Walks through all subparts of a message. If the subpart is of main jpayne@68: type `text', then it prints the decoded payload of the subpart. jpayne@68: jpayne@68: Otherwise, fmt is a format string that is used instead of the message jpayne@68: payload. fmt is expanded with the following keywords (in jpayne@68: %(keyword)s format): jpayne@68: jpayne@68: type : Full MIME type of the non-text part jpayne@68: maintype : Main MIME type of the non-text part jpayne@68: subtype : Sub-MIME type of the non-text part jpayne@68: filename : Filename of the non-text part jpayne@68: description: Description associated with the non-text part jpayne@68: encoding : Content transfer encoding of the non-text part jpayne@68: jpayne@68: The default value for fmt is None, meaning jpayne@68: jpayne@68: [Non-text (%(type)s) part of message omitted, filename %(filename)s] jpayne@68: """ jpayne@68: Generator.__init__(self, outfp, mangle_from_, maxheaderlen, jpayne@68: policy=policy) jpayne@68: if fmt is None: jpayne@68: self._fmt = _FMT jpayne@68: else: jpayne@68: self._fmt = fmt jpayne@68: jpayne@68: def _dispatch(self, msg): jpayne@68: for part in msg.walk(): jpayne@68: maintype = part.get_content_maintype() jpayne@68: if maintype == 'text': jpayne@68: print(part.get_payload(decode=False), file=self) jpayne@68: elif maintype == 'multipart': jpayne@68: # Just skip this jpayne@68: pass jpayne@68: else: jpayne@68: print(self._fmt % { jpayne@68: 'type' : part.get_content_type(), jpayne@68: 'maintype' : part.get_content_maintype(), jpayne@68: 'subtype' : part.get_content_subtype(), jpayne@68: 'filename' : part.get_filename('[no filename]'), jpayne@68: 'description': part.get('Content-Description', jpayne@68: '[no description]'), jpayne@68: 'encoding' : part.get('Content-Transfer-Encoding', jpayne@68: '[no encoding]'), jpayne@68: }, file=self) jpayne@68: jpayne@68: jpayne@68: jpayne@68: # Helper used by Generator._make_boundary jpayne@68: _width = len(repr(sys.maxsize-1)) jpayne@68: _fmt = '%%0%dd' % _width jpayne@68: jpayne@68: # Backward compatibility jpayne@68: _make_boundary = Generator._make_boundary