jpayne@69: # Copyright (C) 2002-2007 Python Software Foundation jpayne@69: # Author: Ben Gertzfield, Barry Warsaw jpayne@69: # Contact: email-sig@python.org jpayne@69: jpayne@69: """Header encoding and decoding functionality.""" jpayne@69: jpayne@69: __all__ = [ jpayne@69: 'Header', jpayne@69: 'decode_header', jpayne@69: 'make_header', jpayne@69: ] jpayne@69: jpayne@69: import re jpayne@69: import binascii jpayne@69: jpayne@69: import email.quoprimime jpayne@69: import email.base64mime jpayne@69: jpayne@69: from email.errors import HeaderParseError jpayne@69: from email import charset as _charset jpayne@69: Charset = _charset.Charset jpayne@69: jpayne@69: NL = '\n' jpayne@69: SPACE = ' ' jpayne@69: BSPACE = b' ' jpayne@69: SPACE8 = ' ' * 8 jpayne@69: EMPTYSTRING = '' jpayne@69: MAXLINELEN = 78 jpayne@69: FWS = ' \t' jpayne@69: jpayne@69: USASCII = Charset('us-ascii') jpayne@69: UTF8 = Charset('utf-8') jpayne@69: jpayne@69: # Match encoded-word strings in the form =?charset?q?Hello_World?= jpayne@69: ecre = re.compile(r''' jpayne@69: =\? # literal =? jpayne@69: (?P[^?]*?) # non-greedy up to the next ? is the charset jpayne@69: \? # literal ? jpayne@69: (?P[qQbB]) # either a "q" or a "b", case insensitive jpayne@69: \? # literal ? jpayne@69: (?P.*?) # non-greedy up to the next ?= is the encoded string jpayne@69: \?= # literal ?= jpayne@69: ''', re.VERBOSE | re.MULTILINE) jpayne@69: jpayne@69: # Field name regexp, including trailing colon, but not separating whitespace, jpayne@69: # according to RFC 2822. Character range is from tilde to exclamation mark. jpayne@69: # For use with .match() jpayne@69: fcre = re.compile(r'[\041-\176]+:$') jpayne@69: jpayne@69: # Find a header embedded in a putative header value. Used to check for jpayne@69: # header injection attack. jpayne@69: _embedded_header = re.compile(r'\n[^ \t]+:') jpayne@69: jpayne@69: jpayne@69: jpayne@69: # Helpers jpayne@69: _max_append = email.quoprimime._max_append jpayne@69: jpayne@69: jpayne@69: jpayne@69: def decode_header(header): jpayne@69: """Decode a message header value without converting charset. jpayne@69: jpayne@69: Returns a list of (string, charset) pairs containing each of the decoded jpayne@69: parts of the header. Charset is None for non-encoded parts of the header, jpayne@69: otherwise a lower-case string containing the name of the character set jpayne@69: specified in the encoded string. jpayne@69: jpayne@69: header may be a string that may or may not contain RFC2047 encoded words, jpayne@69: or it may be a Header object. jpayne@69: jpayne@69: An email.errors.HeaderParseError may be raised when certain decoding error jpayne@69: occurs (e.g. a base64 decoding exception). jpayne@69: """ jpayne@69: # If it is a Header object, we can just return the encoded chunks. jpayne@69: if hasattr(header, '_chunks'): jpayne@69: return [(_charset._encode(string, str(charset)), str(charset)) jpayne@69: for string, charset in header._chunks] jpayne@69: # If no encoding, just return the header with no charset. jpayne@69: if not ecre.search(header): jpayne@69: return [(header, None)] jpayne@69: # First step is to parse all the encoded parts into triplets of the form jpayne@69: # (encoded_string, encoding, charset). For unencoded strings, the last jpayne@69: # two parts will be None. jpayne@69: words = [] jpayne@69: for line in header.splitlines(): jpayne@69: parts = ecre.split(line) jpayne@69: first = True jpayne@69: while parts: jpayne@69: unencoded = parts.pop(0) jpayne@69: if first: jpayne@69: unencoded = unencoded.lstrip() jpayne@69: first = False jpayne@69: if unencoded: jpayne@69: words.append((unencoded, None, None)) jpayne@69: if parts: jpayne@69: charset = parts.pop(0).lower() jpayne@69: encoding = parts.pop(0).lower() jpayne@69: encoded = parts.pop(0) jpayne@69: words.append((encoded, encoding, charset)) jpayne@69: # Now loop over words and remove words that consist of whitespace jpayne@69: # between two encoded strings. jpayne@69: droplist = [] jpayne@69: for n, w in enumerate(words): jpayne@69: if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): jpayne@69: droplist.append(n-1) jpayne@69: for d in reversed(droplist): jpayne@69: del words[d] jpayne@69: jpayne@69: # The next step is to decode each encoded word by applying the reverse jpayne@69: # base64 or quopri transformation. decoded_words is now a list of the jpayne@69: # form (decoded_word, charset). jpayne@69: decoded_words = [] jpayne@69: for encoded_string, encoding, charset in words: jpayne@69: if encoding is None: jpayne@69: # This is an unencoded word. jpayne@69: decoded_words.append((encoded_string, charset)) jpayne@69: elif encoding == 'q': jpayne@69: word = email.quoprimime.header_decode(encoded_string) jpayne@69: decoded_words.append((word, charset)) jpayne@69: elif encoding == 'b': jpayne@69: paderr = len(encoded_string) % 4 # Postel's law: add missing padding jpayne@69: if paderr: jpayne@69: encoded_string += '==='[:4 - paderr] jpayne@69: try: jpayne@69: word = email.base64mime.decode(encoded_string) jpayne@69: except binascii.Error: jpayne@69: raise HeaderParseError('Base64 decoding error') jpayne@69: else: jpayne@69: decoded_words.append((word, charset)) jpayne@69: else: jpayne@69: raise AssertionError('Unexpected encoding: ' + encoding) jpayne@69: # Now convert all words to bytes and collapse consecutive runs of jpayne@69: # similarly encoded words. jpayne@69: collapsed = [] jpayne@69: last_word = last_charset = None jpayne@69: for word, charset in decoded_words: jpayne@69: if isinstance(word, str): jpayne@69: word = bytes(word, 'raw-unicode-escape') jpayne@69: if last_word is None: jpayne@69: last_word = word jpayne@69: last_charset = charset jpayne@69: elif charset != last_charset: jpayne@69: collapsed.append((last_word, last_charset)) jpayne@69: last_word = word jpayne@69: last_charset = charset jpayne@69: elif last_charset is None: jpayne@69: last_word += BSPACE + word jpayne@69: else: jpayne@69: last_word += word jpayne@69: collapsed.append((last_word, last_charset)) jpayne@69: return collapsed jpayne@69: jpayne@69: jpayne@69: jpayne@69: def make_header(decoded_seq, maxlinelen=None, header_name=None, jpayne@69: continuation_ws=' '): jpayne@69: """Create a Header from a sequence of pairs as returned by decode_header() jpayne@69: jpayne@69: decode_header() takes a header value string and returns a sequence of jpayne@69: pairs of the format (decoded_string, charset) where charset is the string jpayne@69: name of the character set. jpayne@69: jpayne@69: This function takes one of those sequence of pairs and returns a Header jpayne@69: instance. Optional maxlinelen, header_name, and continuation_ws are as in jpayne@69: the Header constructor. jpayne@69: """ jpayne@69: h = Header(maxlinelen=maxlinelen, header_name=header_name, jpayne@69: continuation_ws=continuation_ws) jpayne@69: for s, charset in decoded_seq: jpayne@69: # None means us-ascii but we can simply pass it on to h.append() jpayne@69: if charset is not None and not isinstance(charset, Charset): jpayne@69: charset = Charset(charset) jpayne@69: h.append(s, charset) jpayne@69: return h jpayne@69: jpayne@69: jpayne@69: jpayne@69: class Header: jpayne@69: def __init__(self, s=None, charset=None, jpayne@69: maxlinelen=None, header_name=None, jpayne@69: continuation_ws=' ', errors='strict'): jpayne@69: """Create a MIME-compliant header that can contain many character sets. jpayne@69: jpayne@69: Optional s is the initial header value. If None, the initial header jpayne@69: value is not set. You can later append to the header with .append() jpayne@69: method calls. s may be a byte string or a Unicode string, but see the jpayne@69: .append() documentation for semantics. jpayne@69: jpayne@69: Optional charset serves two purposes: it has the same meaning as the jpayne@69: charset argument to the .append() method. It also sets the default jpayne@69: character set for all subsequent .append() calls that omit the charset jpayne@69: argument. If charset is not provided in the constructor, the us-ascii jpayne@69: charset is used both as s's initial charset and as the default for jpayne@69: subsequent .append() calls. jpayne@69: jpayne@69: The maximum line length can be specified explicitly via maxlinelen. For jpayne@69: splitting the first line to a shorter value (to account for the field jpayne@69: header which isn't included in s, e.g. `Subject') pass in the name of jpayne@69: the field in header_name. The default maxlinelen is 78 as recommended jpayne@69: by RFC 2822. jpayne@69: jpayne@69: continuation_ws must be RFC 2822 compliant folding whitespace (usually jpayne@69: either a space or a hard tab) which will be prepended to continuation jpayne@69: lines. jpayne@69: jpayne@69: errors is passed through to the .append() call. jpayne@69: """ jpayne@69: if charset is None: jpayne@69: charset = USASCII jpayne@69: elif not isinstance(charset, Charset): jpayne@69: charset = Charset(charset) jpayne@69: self._charset = charset jpayne@69: self._continuation_ws = continuation_ws jpayne@69: self._chunks = [] jpayne@69: if s is not None: jpayne@69: self.append(s, charset, errors) jpayne@69: if maxlinelen is None: jpayne@69: maxlinelen = MAXLINELEN jpayne@69: self._maxlinelen = maxlinelen jpayne@69: if header_name is None: jpayne@69: self._headerlen = 0 jpayne@69: else: jpayne@69: # Take the separating colon and space into account. jpayne@69: self._headerlen = len(header_name) + 2 jpayne@69: jpayne@69: def __str__(self): jpayne@69: """Return the string value of the header.""" jpayne@69: self._normalize() jpayne@69: uchunks = [] jpayne@69: lastcs = None jpayne@69: lastspace = None jpayne@69: for string, charset in self._chunks: jpayne@69: # We must preserve spaces between encoded and non-encoded word jpayne@69: # boundaries, which means for us we need to add a space when we go jpayne@69: # from a charset to None/us-ascii, or from None/us-ascii to a jpayne@69: # charset. Only do this for the second and subsequent chunks. jpayne@69: # Don't add a space if the None/us-ascii string already has jpayne@69: # a space (trailing or leading depending on transition) jpayne@69: nextcs = charset jpayne@69: if nextcs == _charset.UNKNOWN8BIT: jpayne@69: original_bytes = string.encode('ascii', 'surrogateescape') jpayne@69: string = original_bytes.decode('ascii', 'replace') jpayne@69: if uchunks: jpayne@69: hasspace = string and self._nonctext(string[0]) jpayne@69: if lastcs not in (None, 'us-ascii'): jpayne@69: if nextcs in (None, 'us-ascii') and not hasspace: jpayne@69: uchunks.append(SPACE) jpayne@69: nextcs = None jpayne@69: elif nextcs not in (None, 'us-ascii') and not lastspace: jpayne@69: uchunks.append(SPACE) jpayne@69: lastspace = string and self._nonctext(string[-1]) jpayne@69: lastcs = nextcs jpayne@69: uchunks.append(string) jpayne@69: return EMPTYSTRING.join(uchunks) jpayne@69: jpayne@69: # Rich comparison operators for equality only. BAW: does it make sense to jpayne@69: # have or explicitly disable <, <=, >, >= operators? jpayne@69: def __eq__(self, other): jpayne@69: # other may be a Header or a string. Both are fine so coerce jpayne@69: # ourselves to a unicode (of the unencoded header value), swap the jpayne@69: # args and do another comparison. jpayne@69: return other == str(self) jpayne@69: jpayne@69: def append(self, s, charset=None, errors='strict'): jpayne@69: """Append a string to the MIME header. jpayne@69: jpayne@69: Optional charset, if given, should be a Charset instance or the name jpayne@69: of a character set (which will be converted to a Charset instance). A jpayne@69: value of None (the default) means that the charset given in the jpayne@69: constructor is used. jpayne@69: jpayne@69: s may be a byte string or a Unicode string. If it is a byte string jpayne@69: (i.e. isinstance(s, str) is false), then charset is the encoding of jpayne@69: that byte string, and a UnicodeError will be raised if the string jpayne@69: cannot be decoded with that charset. If s is a Unicode string, then jpayne@69: charset is a hint specifying the character set of the characters in jpayne@69: the string. In either case, when producing an RFC 2822 compliant jpayne@69: header using RFC 2047 rules, the string will be encoded using the jpayne@69: output codec of the charset. If the string cannot be encoded to the jpayne@69: output codec, a UnicodeError will be raised. jpayne@69: jpayne@69: Optional `errors' is passed as the errors argument to the decode jpayne@69: call if s is a byte string. jpayne@69: """ jpayne@69: if charset is None: jpayne@69: charset = self._charset jpayne@69: elif not isinstance(charset, Charset): jpayne@69: charset = Charset(charset) jpayne@69: if not isinstance(s, str): jpayne@69: input_charset = charset.input_codec or 'us-ascii' jpayne@69: if input_charset == _charset.UNKNOWN8BIT: jpayne@69: s = s.decode('us-ascii', 'surrogateescape') jpayne@69: else: jpayne@69: s = s.decode(input_charset, errors) jpayne@69: # Ensure that the bytes we're storing can be decoded to the output jpayne@69: # character set, otherwise an early error is raised. jpayne@69: output_charset = charset.output_codec or 'us-ascii' jpayne@69: if output_charset != _charset.UNKNOWN8BIT: jpayne@69: try: jpayne@69: s.encode(output_charset, errors) jpayne@69: except UnicodeEncodeError: jpayne@69: if output_charset!='us-ascii': jpayne@69: raise jpayne@69: charset = UTF8 jpayne@69: self._chunks.append((s, charset)) jpayne@69: jpayne@69: def _nonctext(self, s): jpayne@69: """True if string s is not a ctext character of RFC822. jpayne@69: """ jpayne@69: return s.isspace() or s in ('(', ')', '\\') jpayne@69: jpayne@69: def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): jpayne@69: r"""Encode a message header into an RFC-compliant format. jpayne@69: jpayne@69: There are many issues involved in converting a given string for use in jpayne@69: an email header. Only certain character sets are readable in most jpayne@69: email clients, and as header strings can only contain a subset of jpayne@69: 7-bit ASCII, care must be taken to properly convert and encode (with jpayne@69: Base64 or quoted-printable) header strings. In addition, there is a jpayne@69: 75-character length limit on any given encoded header field, so jpayne@69: line-wrapping must be performed, even with double-byte character sets. jpayne@69: jpayne@69: Optional maxlinelen specifies the maximum length of each generated jpayne@69: line, exclusive of the linesep string. Individual lines may be longer jpayne@69: than maxlinelen if a folding point cannot be found. The first line jpayne@69: will be shorter by the length of the header name plus ": " if a header jpayne@69: name was specified at Header construction time. The default value for jpayne@69: maxlinelen is determined at header construction time. jpayne@69: jpayne@69: Optional splitchars is a string containing characters which should be jpayne@69: given extra weight by the splitting algorithm during normal header jpayne@69: wrapping. This is in very rough support of RFC 2822's `higher level jpayne@69: syntactic breaks': split points preceded by a splitchar are preferred jpayne@69: during line splitting, with the characters preferred in the order in jpayne@69: which they appear in the string. Space and tab may be included in the jpayne@69: string to indicate whether preference should be given to one over the jpayne@69: other as a split point when other split chars do not appear in the line jpayne@69: being split. Splitchars does not affect RFC 2047 encoded lines. jpayne@69: jpayne@69: Optional linesep is a string to be used to separate the lines of jpayne@69: the value. The default value is the most useful for typical jpayne@69: Python applications, but it can be set to \r\n to produce RFC-compliant jpayne@69: line separators when needed. jpayne@69: """ jpayne@69: self._normalize() jpayne@69: if maxlinelen is None: jpayne@69: maxlinelen = self._maxlinelen jpayne@69: # A maxlinelen of 0 means don't wrap. For all practical purposes, jpayne@69: # choosing a huge number here accomplishes that and makes the jpayne@69: # _ValueFormatter algorithm much simpler. jpayne@69: if maxlinelen == 0: jpayne@69: maxlinelen = 1000000 jpayne@69: formatter = _ValueFormatter(self._headerlen, maxlinelen, jpayne@69: self._continuation_ws, splitchars) jpayne@69: lastcs = None jpayne@69: hasspace = lastspace = None jpayne@69: for string, charset in self._chunks: jpayne@69: if hasspace is not None: jpayne@69: hasspace = string and self._nonctext(string[0]) jpayne@69: if lastcs not in (None, 'us-ascii'): jpayne@69: if not hasspace or charset not in (None, 'us-ascii'): jpayne@69: formatter.add_transition() jpayne@69: elif charset not in (None, 'us-ascii') and not lastspace: jpayne@69: formatter.add_transition() jpayne@69: lastspace = string and self._nonctext(string[-1]) jpayne@69: lastcs = charset jpayne@69: hasspace = False jpayne@69: lines = string.splitlines() jpayne@69: if lines: jpayne@69: formatter.feed('', lines[0], charset) jpayne@69: else: jpayne@69: formatter.feed('', '', charset) jpayne@69: for line in lines[1:]: jpayne@69: formatter.newline() jpayne@69: if charset.header_encoding is not None: jpayne@69: formatter.feed(self._continuation_ws, ' ' + line.lstrip(), jpayne@69: charset) jpayne@69: else: jpayne@69: sline = line.lstrip() jpayne@69: fws = line[:len(line)-len(sline)] jpayne@69: formatter.feed(fws, sline, charset) jpayne@69: if len(lines) > 1: jpayne@69: formatter.newline() jpayne@69: if self._chunks: jpayne@69: formatter.add_transition() jpayne@69: value = formatter._str(linesep) jpayne@69: if _embedded_header.search(value): jpayne@69: raise HeaderParseError("header value appears to contain " jpayne@69: "an embedded header: {!r}".format(value)) jpayne@69: return value jpayne@69: jpayne@69: def _normalize(self): jpayne@69: # Step 1: Normalize the chunks so that all runs of identical charsets jpayne@69: # get collapsed into a single unicode string. jpayne@69: chunks = [] jpayne@69: last_charset = None jpayne@69: last_chunk = [] jpayne@69: for string, charset in self._chunks: jpayne@69: if charset == last_charset: jpayne@69: last_chunk.append(string) jpayne@69: else: jpayne@69: if last_charset is not None: jpayne@69: chunks.append((SPACE.join(last_chunk), last_charset)) jpayne@69: last_chunk = [string] jpayne@69: last_charset = charset jpayne@69: if last_chunk: jpayne@69: chunks.append((SPACE.join(last_chunk), last_charset)) jpayne@69: self._chunks = chunks jpayne@69: jpayne@69: jpayne@69: jpayne@69: class _ValueFormatter: jpayne@69: def __init__(self, headerlen, maxlen, continuation_ws, splitchars): jpayne@69: self._maxlen = maxlen jpayne@69: self._continuation_ws = continuation_ws jpayne@69: self._continuation_ws_len = len(continuation_ws) jpayne@69: self._splitchars = splitchars jpayne@69: self._lines = [] jpayne@69: self._current_line = _Accumulator(headerlen) jpayne@69: jpayne@69: def _str(self, linesep): jpayne@69: self.newline() jpayne@69: return linesep.join(self._lines) jpayne@69: jpayne@69: def __str__(self): jpayne@69: return self._str(NL) jpayne@69: jpayne@69: def newline(self): jpayne@69: end_of_line = self._current_line.pop() jpayne@69: if end_of_line != (' ', ''): jpayne@69: self._current_line.push(*end_of_line) jpayne@69: if len(self._current_line) > 0: jpayne@69: if self._current_line.is_onlyws() and self._lines: jpayne@69: self._lines[-1] += str(self._current_line) jpayne@69: else: jpayne@69: self._lines.append(str(self._current_line)) jpayne@69: self._current_line.reset() jpayne@69: jpayne@69: def add_transition(self): jpayne@69: self._current_line.push(' ', '') jpayne@69: jpayne@69: def feed(self, fws, string, charset): jpayne@69: # If the charset has no header encoding (i.e. it is an ASCII encoding) jpayne@69: # then we must split the header at the "highest level syntactic break" jpayne@69: # possible. Note that we don't have a lot of smarts about field jpayne@69: # syntax; we just try to break on semi-colons, then commas, then jpayne@69: # whitespace. Eventually, this should be pluggable. jpayne@69: if charset.header_encoding is None: jpayne@69: self._ascii_split(fws, string, self._splitchars) jpayne@69: return jpayne@69: # Otherwise, we're doing either a Base64 or a quoted-printable jpayne@69: # encoding which means we don't need to split the line on syntactic jpayne@69: # breaks. We can basically just find enough characters to fit on the jpayne@69: # current line, minus the RFC 2047 chrome. What makes this trickier jpayne@69: # though is that we have to split at octet boundaries, not character jpayne@69: # boundaries but it's only safe to split at character boundaries so at jpayne@69: # best we can only get close. jpayne@69: encoded_lines = charset.header_encode_lines(string, self._maxlengths()) jpayne@69: # The first element extends the current line, but if it's None then jpayne@69: # nothing more fit on the current line so start a new line. jpayne@69: try: jpayne@69: first_line = encoded_lines.pop(0) jpayne@69: except IndexError: jpayne@69: # There are no encoded lines, so we're done. jpayne@69: return jpayne@69: if first_line is not None: jpayne@69: self._append_chunk(fws, first_line) jpayne@69: try: jpayne@69: last_line = encoded_lines.pop() jpayne@69: except IndexError: jpayne@69: # There was only one line. jpayne@69: return jpayne@69: self.newline() jpayne@69: self._current_line.push(self._continuation_ws, last_line) jpayne@69: # Everything else are full lines in themselves. jpayne@69: for line in encoded_lines: jpayne@69: self._lines.append(self._continuation_ws + line) jpayne@69: jpayne@69: def _maxlengths(self): jpayne@69: # The first line's length. jpayne@69: yield self._maxlen - len(self._current_line) jpayne@69: while True: jpayne@69: yield self._maxlen - self._continuation_ws_len jpayne@69: jpayne@69: def _ascii_split(self, fws, string, splitchars): jpayne@69: # The RFC 2822 header folding algorithm is simple in principle but jpayne@69: # complex in practice. Lines may be folded any place where "folding jpayne@69: # white space" appears by inserting a linesep character in front of the jpayne@69: # FWS. The complication is that not all spaces or tabs qualify as FWS, jpayne@69: # and we are also supposed to prefer to break at "higher level jpayne@69: # syntactic breaks". We can't do either of these without intimate jpayne@69: # knowledge of the structure of structured headers, which we don't have jpayne@69: # here. So the best we can do here is prefer to break at the specified jpayne@69: # splitchars, and hope that we don't choose any spaces or tabs that jpayne@69: # aren't legal FWS. (This is at least better than the old algorithm, jpayne@69: # where we would sometimes *introduce* FWS after a splitchar, or the jpayne@69: # algorithm before that, where we would turn all white space runs into jpayne@69: # single spaces or tabs.) jpayne@69: parts = re.split("(["+FWS+"]+)", fws+string) jpayne@69: if parts[0]: jpayne@69: parts[:0] = [''] jpayne@69: else: jpayne@69: parts.pop(0) jpayne@69: for fws, part in zip(*[iter(parts)]*2): jpayne@69: self._append_chunk(fws, part) jpayne@69: jpayne@69: def _append_chunk(self, fws, string): jpayne@69: self._current_line.push(fws, string) jpayne@69: if len(self._current_line) > self._maxlen: jpayne@69: # Find the best split point, working backward from the end. jpayne@69: # There might be none, on a long first line. jpayne@69: for ch in self._splitchars: jpayne@69: for i in range(self._current_line.part_count()-1, 0, -1): jpayne@69: if ch.isspace(): jpayne@69: fws = self._current_line[i][0] jpayne@69: if fws and fws[0]==ch: jpayne@69: break jpayne@69: prevpart = self._current_line[i-1][1] jpayne@69: if prevpart and prevpart[-1]==ch: jpayne@69: break jpayne@69: else: jpayne@69: continue jpayne@69: break jpayne@69: else: jpayne@69: fws, part = self._current_line.pop() jpayne@69: if self._current_line._initial_size > 0: jpayne@69: # There will be a header, so leave it on a line by itself. jpayne@69: self.newline() jpayne@69: if not fws: jpayne@69: # We don't use continuation_ws here because the whitespace jpayne@69: # after a header should always be a space. jpayne@69: fws = ' ' jpayne@69: self._current_line.push(fws, part) jpayne@69: return jpayne@69: remainder = self._current_line.pop_from(i) jpayne@69: self._lines.append(str(self._current_line)) jpayne@69: self._current_line.reset(remainder) jpayne@69: jpayne@69: jpayne@69: class _Accumulator(list): jpayne@69: jpayne@69: def __init__(self, initial_size=0): jpayne@69: self._initial_size = initial_size jpayne@69: super().__init__() jpayne@69: jpayne@69: def push(self, fws, string): jpayne@69: self.append((fws, string)) jpayne@69: jpayne@69: def pop_from(self, i=0): jpayne@69: popped = self[i:] jpayne@69: self[i:] = [] jpayne@69: return popped jpayne@69: jpayne@69: def pop(self): jpayne@69: if self.part_count()==0: jpayne@69: return ('', '') jpayne@69: return super().pop() jpayne@69: jpayne@69: def __len__(self): jpayne@69: return sum((len(fws)+len(part) for fws, part in self), jpayne@69: self._initial_size) jpayne@69: jpayne@69: def __str__(self): jpayne@69: return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) jpayne@69: for fws, part in self)) jpayne@69: jpayne@69: def reset(self, startval=None): jpayne@69: if startval is None: jpayne@69: startval = [] jpayne@69: self[:] = startval jpayne@69: self._initial_size = 0 jpayne@69: jpayne@69: def is_onlyws(self): jpayne@69: return self._initial_size==0 and (not self or str(self).isspace()) jpayne@69: jpayne@69: def part_count(self): jpayne@69: return super().__len__()