jpayne@69: # Copyright (C) 2001-2010 Python Software Foundation jpayne@69: # Author: Barry Warsaw jpayne@69: # Contact: email-sig@python.org jpayne@69: jpayne@69: """Classes to generate plain text from a message object tree.""" jpayne@69: jpayne@69: __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] jpayne@69: jpayne@69: import re jpayne@69: import sys jpayne@69: import time jpayne@69: import random jpayne@69: jpayne@69: from copy import deepcopy jpayne@69: from io import StringIO, BytesIO jpayne@69: from email.utils import _has_surrogates jpayne@69: jpayne@69: UNDERSCORE = '_' jpayne@69: NL = '\n' # XXX: no longer used by the code below. jpayne@69: jpayne@69: NLCRE = re.compile(r'\r\n|\r|\n') jpayne@69: fcre = re.compile(r'^From ', re.MULTILINE) jpayne@69: jpayne@69: jpayne@69: jpayne@69: class Generator: jpayne@69: """Generates output from a Message object tree. jpayne@69: jpayne@69: This basic generator writes the message to the given file object as plain jpayne@69: text. jpayne@69: """ jpayne@69: # jpayne@69: # Public interface jpayne@69: # jpayne@69: jpayne@69: def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *, jpayne@69: policy=None): jpayne@69: """Create the generator for message flattening. jpayne@69: jpayne@69: outfp is the output file-like object for writing the message to. It jpayne@69: must have a write() method. jpayne@69: jpayne@69: Optional mangle_from_ is a flag that, when True (the default if policy jpayne@69: is not set), escapes From_ lines in the body of the message by putting jpayne@69: a `>' in front of them. jpayne@69: jpayne@69: Optional maxheaderlen specifies the longest length for a non-continued jpayne@69: header. When a header line is longer (in characters, with tabs jpayne@69: expanded to 8 spaces) than maxheaderlen, the header will split as jpayne@69: defined in the Header class. Set maxheaderlen to zero to disable jpayne@69: header wrapping. The default is 78, as recommended (but not required) jpayne@69: by RFC 2822. jpayne@69: jpayne@69: The policy keyword specifies a policy object that controls a number of jpayne@69: aspects of the generator's operation. If no policy is specified, jpayne@69: the policy associated with the Message object passed to the jpayne@69: flatten method is used. jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: if mangle_from_ is None: jpayne@69: mangle_from_ = True if policy is None else policy.mangle_from_ jpayne@69: self._fp = outfp jpayne@69: self._mangle_from_ = mangle_from_ jpayne@69: self.maxheaderlen = maxheaderlen jpayne@69: self.policy = policy jpayne@69: jpayne@69: def write(self, s): jpayne@69: # Just delegate to the file object jpayne@69: self._fp.write(s) jpayne@69: jpayne@69: def flatten(self, msg, unixfrom=False, linesep=None): jpayne@69: r"""Print the message object tree rooted at msg to the output file jpayne@69: specified when the Generator instance was created. jpayne@69: jpayne@69: unixfrom is a flag that forces the printing of a Unix From_ delimiter jpayne@69: before the first object in the message tree. If the original message jpayne@69: has no From_ delimiter, a `standard' one is crafted. By default, this jpayne@69: is False to inhibit the printing of any From_ delimiter. jpayne@69: jpayne@69: Note that for subobjects, no From_ line is printed. jpayne@69: jpayne@69: linesep specifies the characters used to indicate a new line in jpayne@69: the output. The default value is determined by the policy specified jpayne@69: when the Generator instance was created or, if none was specified, jpayne@69: from the policy associated with the msg. jpayne@69: jpayne@69: """ jpayne@69: # We use the _XXX constants for operating on data that comes directly jpayne@69: # from the msg, and _encoded_XXX constants for operating on data that jpayne@69: # has already been converted (to bytes in the BytesGenerator) and jpayne@69: # inserted into a temporary buffer. jpayne@69: policy = msg.policy if self.policy is None else self.policy jpayne@69: if linesep is not None: jpayne@69: policy = policy.clone(linesep=linesep) jpayne@69: if self.maxheaderlen is not None: jpayne@69: policy = policy.clone(max_line_length=self.maxheaderlen) jpayne@69: self._NL = policy.linesep jpayne@69: self._encoded_NL = self._encode(self._NL) jpayne@69: self._EMPTY = '' jpayne@69: self._encoded_EMPTY = self._encode(self._EMPTY) jpayne@69: # Because we use clone (below) when we recursively process message jpayne@69: # subparts, and because clone uses the computed policy (not None), jpayne@69: # submessages will automatically get set to the computed policy when jpayne@69: # they are processed by this code. jpayne@69: old_gen_policy = self.policy jpayne@69: old_msg_policy = msg.policy jpayne@69: try: jpayne@69: self.policy = policy jpayne@69: msg.policy = policy jpayne@69: if unixfrom: jpayne@69: ufrom = msg.get_unixfrom() jpayne@69: if not ufrom: jpayne@69: ufrom = 'From nobody ' + time.ctime(time.time()) jpayne@69: self.write(ufrom + self._NL) jpayne@69: self._write(msg) jpayne@69: finally: jpayne@69: self.policy = old_gen_policy jpayne@69: msg.policy = old_msg_policy jpayne@69: jpayne@69: def clone(self, fp): jpayne@69: """Clone this generator with the exact same options.""" jpayne@69: return self.__class__(fp, jpayne@69: self._mangle_from_, jpayne@69: None, # Use policy setting, which we've adjusted jpayne@69: policy=self.policy) jpayne@69: jpayne@69: # jpayne@69: # Protected interface - undocumented ;/ jpayne@69: # jpayne@69: jpayne@69: # Note that we use 'self.write' when what we are writing is coming from jpayne@69: # the source, and self._fp.write when what we are writing is coming from a jpayne@69: # buffer (because the Bytes subclass has already had a chance to transform jpayne@69: # the data in its write method in that case). This is an entirely jpayne@69: # pragmatic split determined by experiment; we could be more general by jpayne@69: # always using write and having the Bytes subclass write method detect when jpayne@69: # it has already transformed the input; but, since this whole thing is a jpayne@69: # hack anyway this seems good enough. jpayne@69: jpayne@69: def _new_buffer(self): jpayne@69: # BytesGenerator overrides this to return BytesIO. jpayne@69: return StringIO() jpayne@69: jpayne@69: def _encode(self, s): jpayne@69: # BytesGenerator overrides this to encode strings to bytes. jpayne@69: return s jpayne@69: jpayne@69: def _write_lines(self, lines): jpayne@69: # We have to transform the line endings. jpayne@69: if not lines: jpayne@69: return jpayne@69: lines = NLCRE.split(lines) jpayne@69: for line in lines[:-1]: jpayne@69: self.write(line) jpayne@69: self.write(self._NL) jpayne@69: if lines[-1]: jpayne@69: self.write(lines[-1]) jpayne@69: # XXX logic tells me this else should be needed, but the tests fail jpayne@69: # with it and pass without it. (NLCRE.split ends with a blank element jpayne@69: # if and only if there was a trailing newline.) jpayne@69: #else: jpayne@69: # self.write(self._NL) jpayne@69: jpayne@69: def _write(self, msg): jpayne@69: # We can't write the headers yet because of the following scenario: jpayne@69: # say a multipart message includes the boundary string somewhere in jpayne@69: # its body. We'd have to calculate the new boundary /before/ we write jpayne@69: # the headers so that we can write the correct Content-Type: jpayne@69: # parameter. jpayne@69: # jpayne@69: # The way we do this, so as to make the _handle_*() methods simpler, jpayne@69: # is to cache any subpart writes into a buffer. The we write the jpayne@69: # headers and the buffer contents. That way, subpart handlers can jpayne@69: # Do The Right Thing, and can still modify the Content-Type: header if jpayne@69: # necessary. jpayne@69: oldfp = self._fp jpayne@69: try: jpayne@69: self._munge_cte = None jpayne@69: self._fp = sfp = self._new_buffer() jpayne@69: self._dispatch(msg) jpayne@69: finally: jpayne@69: self._fp = oldfp jpayne@69: munge_cte = self._munge_cte jpayne@69: del self._munge_cte jpayne@69: # If we munged the cte, copy the message again and re-fix the CTE. jpayne@69: if munge_cte: jpayne@69: msg = deepcopy(msg) jpayne@69: msg.replace_header('content-transfer-encoding', munge_cte[0]) jpayne@69: msg.replace_header('content-type', munge_cte[1]) jpayne@69: # Write the headers. First we see if the message object wants to jpayne@69: # handle that itself. If not, we'll do it generically. jpayne@69: meth = getattr(msg, '_write_headers', None) jpayne@69: if meth is None: jpayne@69: self._write_headers(msg) jpayne@69: else: jpayne@69: meth(self) jpayne@69: self._fp.write(sfp.getvalue()) jpayne@69: jpayne@69: def _dispatch(self, msg): jpayne@69: # Get the Content-Type: for the message, then try to dispatch to jpayne@69: # self._handle__(). If there's no handler for the jpayne@69: # full MIME type, then dispatch to self._handle_(). If jpayne@69: # that's missing too, then dispatch to self._writeBody(). jpayne@69: main = msg.get_content_maintype() jpayne@69: sub = msg.get_content_subtype() jpayne@69: specific = UNDERSCORE.join((main, sub)).replace('-', '_') jpayne@69: meth = getattr(self, '_handle_' + specific, None) jpayne@69: if meth is None: jpayne@69: generic = main.replace('-', '_') jpayne@69: meth = getattr(self, '_handle_' + generic, None) jpayne@69: if meth is None: jpayne@69: meth = self._writeBody jpayne@69: meth(msg) jpayne@69: jpayne@69: # jpayne@69: # Default handlers jpayne@69: # jpayne@69: jpayne@69: def _write_headers(self, msg): jpayne@69: for h, v in msg.raw_items(): jpayne@69: self.write(self.policy.fold(h, v)) jpayne@69: # A blank line always separates headers from body jpayne@69: self.write(self._NL) jpayne@69: jpayne@69: # jpayne@69: # Handlers for writing types and subtypes jpayne@69: # jpayne@69: jpayne@69: def _handle_text(self, msg): jpayne@69: payload = msg.get_payload() jpayne@69: if payload is None: jpayne@69: return jpayne@69: if not isinstance(payload, str): jpayne@69: raise TypeError('string payload expected: %s' % type(payload)) jpayne@69: if _has_surrogates(msg._payload): jpayne@69: charset = msg.get_param('charset') jpayne@69: if charset is not None: jpayne@69: # XXX: This copy stuff is an ugly hack to avoid modifying the jpayne@69: # existing message. jpayne@69: msg = deepcopy(msg) jpayne@69: del msg['content-transfer-encoding'] jpayne@69: msg.set_payload(payload, charset) jpayne@69: payload = msg.get_payload() jpayne@69: self._munge_cte = (msg['content-transfer-encoding'], jpayne@69: msg['content-type']) jpayne@69: if self._mangle_from_: jpayne@69: payload = fcre.sub('>From ', payload) jpayne@69: self._write_lines(payload) jpayne@69: jpayne@69: # Default body handler jpayne@69: _writeBody = _handle_text jpayne@69: jpayne@69: def _handle_multipart(self, msg): jpayne@69: # The trick here is to write out each part separately, merge them all jpayne@69: # together, and then make sure that the boundary we've chosen isn't jpayne@69: # present in the payload. jpayne@69: msgtexts = [] jpayne@69: subparts = msg.get_payload() jpayne@69: if subparts is None: jpayne@69: subparts = [] jpayne@69: elif isinstance(subparts, str): jpayne@69: # e.g. a non-strict parse of a message with no starting boundary. jpayne@69: self.write(subparts) jpayne@69: return jpayne@69: elif not isinstance(subparts, list): jpayne@69: # Scalar payload jpayne@69: subparts = [subparts] jpayne@69: for part in subparts: jpayne@69: s = self._new_buffer() jpayne@69: g = self.clone(s) jpayne@69: g.flatten(part, unixfrom=False, linesep=self._NL) jpayne@69: msgtexts.append(s.getvalue()) jpayne@69: # BAW: What about boundaries that are wrapped in double-quotes? jpayne@69: boundary = msg.get_boundary() jpayne@69: if not boundary: jpayne@69: # Create a boundary that doesn't appear in any of the jpayne@69: # message texts. jpayne@69: alltext = self._encoded_NL.join(msgtexts) jpayne@69: boundary = self._make_boundary(alltext) jpayne@69: msg.set_boundary(boundary) jpayne@69: # If there's a preamble, write it out, with a trailing CRLF jpayne@69: if msg.preamble is not None: jpayne@69: if self._mangle_from_: jpayne@69: preamble = fcre.sub('>From ', msg.preamble) jpayne@69: else: jpayne@69: preamble = msg.preamble jpayne@69: self._write_lines(preamble) jpayne@69: self.write(self._NL) jpayne@69: # dash-boundary transport-padding CRLF jpayne@69: self.write('--' + boundary + self._NL) jpayne@69: # body-part jpayne@69: if msgtexts: jpayne@69: self._fp.write(msgtexts.pop(0)) jpayne@69: # *encapsulation jpayne@69: # --> delimiter transport-padding jpayne@69: # --> CRLF body-part jpayne@69: for body_part in msgtexts: jpayne@69: # delimiter transport-padding CRLF jpayne@69: self.write(self._NL + '--' + boundary + self._NL) jpayne@69: # body-part jpayne@69: self._fp.write(body_part) jpayne@69: # close-delimiter transport-padding jpayne@69: self.write(self._NL + '--' + boundary + '--' + self._NL) jpayne@69: if msg.epilogue is not None: jpayne@69: if self._mangle_from_: jpayne@69: epilogue = fcre.sub('>From ', msg.epilogue) jpayne@69: else: jpayne@69: epilogue = msg.epilogue jpayne@69: self._write_lines(epilogue) jpayne@69: jpayne@69: def _handle_multipart_signed(self, msg): jpayne@69: # The contents of signed parts has to stay unmodified in order to keep jpayne@69: # the signature intact per RFC1847 2.1, so we disable header wrapping. jpayne@69: # RDM: This isn't enough to completely preserve the part, but it helps. jpayne@69: p = self.policy jpayne@69: self.policy = p.clone(max_line_length=0) jpayne@69: try: jpayne@69: self._handle_multipart(msg) jpayne@69: finally: jpayne@69: self.policy = p jpayne@69: jpayne@69: def _handle_message_delivery_status(self, msg): jpayne@69: # We can't just write the headers directly to self's file object jpayne@69: # because this will leave an extra newline between the last header jpayne@69: # block and the boundary. Sigh. jpayne@69: blocks = [] jpayne@69: for part in msg.get_payload(): jpayne@69: s = self._new_buffer() jpayne@69: g = self.clone(s) jpayne@69: g.flatten(part, unixfrom=False, linesep=self._NL) jpayne@69: text = s.getvalue() jpayne@69: lines = text.split(self._encoded_NL) jpayne@69: # Strip off the unnecessary trailing empty line jpayne@69: if lines and lines[-1] == self._encoded_EMPTY: jpayne@69: blocks.append(self._encoded_NL.join(lines[:-1])) jpayne@69: else: jpayne@69: blocks.append(text) jpayne@69: # Now join all the blocks with an empty line. This has the lovely jpayne@69: # effect of separating each block with an empty line, but not adding jpayne@69: # an extra one after the last one. jpayne@69: self._fp.write(self._encoded_NL.join(blocks)) jpayne@69: jpayne@69: def _handle_message(self, msg): jpayne@69: s = self._new_buffer() jpayne@69: g = self.clone(s) jpayne@69: # The payload of a message/rfc822 part should be a multipart sequence jpayne@69: # of length 1. The zeroth element of the list should be the Message jpayne@69: # object for the subpart. Extract that object, stringify it, and jpayne@69: # write it out. jpayne@69: # Except, it turns out, when it's a string instead, which happens when jpayne@69: # and only when HeaderParser is used on a message of mime type jpayne@69: # message/rfc822. Such messages are generated by, for example, jpayne@69: # Groupwise when forwarding unadorned messages. (Issue 7970.) So jpayne@69: # in that case we just emit the string body. jpayne@69: payload = msg._payload jpayne@69: if isinstance(payload, list): jpayne@69: g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) jpayne@69: payload = s.getvalue() jpayne@69: else: jpayne@69: payload = self._encode(payload) jpayne@69: self._fp.write(payload) jpayne@69: jpayne@69: # This used to be a module level function; we use a classmethod for this jpayne@69: # and _compile_re so we can continue to provide the module level function jpayne@69: # for backward compatibility by doing jpayne@69: # _make_boundary = Generator._make_boundary jpayne@69: # at the end of the module. It *is* internal, so we could drop that... jpayne@69: @classmethod jpayne@69: def _make_boundary(cls, text=None): jpayne@69: # Craft a random boundary. If text is given, ensure that the chosen jpayne@69: # boundary doesn't appear in the text. jpayne@69: token = random.randrange(sys.maxsize) jpayne@69: boundary = ('=' * 15) + (_fmt % token) + '==' jpayne@69: if text is None: jpayne@69: return boundary jpayne@69: b = boundary jpayne@69: counter = 0 jpayne@69: while True: jpayne@69: cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) jpayne@69: if not cre.search(text): jpayne@69: break jpayne@69: b = boundary + '.' + str(counter) jpayne@69: counter += 1 jpayne@69: return b jpayne@69: jpayne@69: @classmethod jpayne@69: def _compile_re(cls, s, flags): jpayne@69: return re.compile(s, flags) jpayne@69: jpayne@69: jpayne@69: class BytesGenerator(Generator): jpayne@69: """Generates a bytes version of a Message object tree. jpayne@69: jpayne@69: Functionally identical to the base Generator except that the output is jpayne@69: bytes and not string. When surrogates were used in the input to encode jpayne@69: bytes, these are decoded back to bytes for output. If the policy has jpayne@69: cte_type set to 7bit, then the message is transformed such that the jpayne@69: non-ASCII bytes are properly content transfer encoded, using the charset jpayne@69: unknown-8bit. jpayne@69: jpayne@69: The outfp object must accept bytes in its write method. jpayne@69: """ jpayne@69: jpayne@69: def write(self, s): jpayne@69: self._fp.write(s.encode('ascii', 'surrogateescape')) jpayne@69: jpayne@69: def _new_buffer(self): jpayne@69: return BytesIO() jpayne@69: jpayne@69: def _encode(self, s): jpayne@69: return s.encode('ascii') jpayne@69: jpayne@69: def _write_headers(self, msg): jpayne@69: # This is almost the same as the string version, except for handling jpayne@69: # strings with 8bit bytes. jpayne@69: for h, v in msg.raw_items(): jpayne@69: self._fp.write(self.policy.fold_binary(h, v)) jpayne@69: # A blank line always separates headers from body jpayne@69: self.write(self._NL) jpayne@69: jpayne@69: def _handle_text(self, msg): jpayne@69: # If the string has surrogates the original source was bytes, so jpayne@69: # just write it back out. jpayne@69: if msg._payload is None: jpayne@69: return jpayne@69: if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': jpayne@69: if self._mangle_from_: jpayne@69: msg._payload = fcre.sub(">From ", msg._payload) jpayne@69: self._write_lines(msg._payload) jpayne@69: else: jpayne@69: super(BytesGenerator,self)._handle_text(msg) jpayne@69: jpayne@69: # Default body handler jpayne@69: _writeBody = _handle_text jpayne@69: jpayne@69: @classmethod jpayne@69: def _compile_re(cls, s, flags): jpayne@69: return re.compile(s.encode('ascii'), flags) jpayne@69: jpayne@69: jpayne@69: jpayne@69: _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' jpayne@69: jpayne@69: class DecodedGenerator(Generator): jpayne@69: """Generates a text representation of a message. jpayne@69: jpayne@69: Like the Generator base class, except that non-text parts are substituted jpayne@69: with a format string representing the part. jpayne@69: """ jpayne@69: def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *, jpayne@69: policy=None): jpayne@69: """Like Generator.__init__() except that an additional optional jpayne@69: argument is allowed. jpayne@69: jpayne@69: Walks through all subparts of a message. If the subpart is of main jpayne@69: type `text', then it prints the decoded payload of the subpart. jpayne@69: jpayne@69: Otherwise, fmt is a format string that is used instead of the message jpayne@69: payload. fmt is expanded with the following keywords (in jpayne@69: %(keyword)s format): jpayne@69: jpayne@69: type : Full MIME type of the non-text part jpayne@69: maintype : Main MIME type of the non-text part jpayne@69: subtype : Sub-MIME type of the non-text part jpayne@69: filename : Filename of the non-text part jpayne@69: description: Description associated with the non-text part jpayne@69: encoding : Content transfer encoding of the non-text part jpayne@69: jpayne@69: The default value for fmt is None, meaning jpayne@69: jpayne@69: [Non-text (%(type)s) part of message omitted, filename %(filename)s] jpayne@69: """ jpayne@69: Generator.__init__(self, outfp, mangle_from_, maxheaderlen, jpayne@69: policy=policy) jpayne@69: if fmt is None: jpayne@69: self._fmt = _FMT jpayne@69: else: jpayne@69: self._fmt = fmt jpayne@69: jpayne@69: def _dispatch(self, msg): jpayne@69: for part in msg.walk(): jpayne@69: maintype = part.get_content_maintype() jpayne@69: if maintype == 'text': jpayne@69: print(part.get_payload(decode=False), file=self) jpayne@69: elif maintype == 'multipart': jpayne@69: # Just skip this jpayne@69: pass jpayne@69: else: jpayne@69: print(self._fmt % { jpayne@69: 'type' : part.get_content_type(), jpayne@69: 'maintype' : part.get_content_maintype(), jpayne@69: 'subtype' : part.get_content_subtype(), jpayne@69: 'filename' : part.get_filename('[no filename]'), jpayne@69: 'description': part.get('Content-Description', jpayne@69: '[no description]'), jpayne@69: 'encoding' : part.get('Content-Transfer-Encoding', jpayne@69: '[no encoding]'), jpayne@69: }, file=self) jpayne@69: jpayne@69: jpayne@69: jpayne@69: # Helper used by Generator._make_boundary jpayne@69: _width = len(repr(sys.maxsize-1)) jpayne@69: _fmt = '%%0%dd' % _width jpayne@69: jpayne@69: # Backward compatibility jpayne@69: _make_boundary = Generator._make_boundary