jpayne@68: # Copyright (C) 2002-2007 Python Software Foundation jpayne@68: # Author: Ben Gertzfield, Barry Warsaw jpayne@68: # Contact: email-sig@python.org jpayne@68: jpayne@68: """Header encoding and decoding functionality.""" jpayne@68: jpayne@68: __all__ = [ jpayne@68: 'Header', jpayne@68: 'decode_header', jpayne@68: 'make_header', jpayne@68: ] jpayne@68: jpayne@68: import re jpayne@68: import binascii jpayne@68: jpayne@68: import email.quoprimime jpayne@68: import email.base64mime jpayne@68: jpayne@68: from email.errors import HeaderParseError jpayne@68: from email import charset as _charset jpayne@68: Charset = _charset.Charset jpayne@68: jpayne@68: NL = '\n' jpayne@68: SPACE = ' ' jpayne@68: BSPACE = b' ' jpayne@68: SPACE8 = ' ' * 8 jpayne@68: EMPTYSTRING = '' jpayne@68: MAXLINELEN = 78 jpayne@68: FWS = ' \t' jpayne@68: jpayne@68: USASCII = Charset('us-ascii') jpayne@68: UTF8 = Charset('utf-8') jpayne@68: jpayne@68: # Match encoded-word strings in the form =?charset?q?Hello_World?= jpayne@68: ecre = re.compile(r''' jpayne@68: =\? # literal =? jpayne@68: (?P[^?]*?) # non-greedy up to the next ? is the charset jpayne@68: \? # literal ? jpayne@68: (?P[qQbB]) # either a "q" or a "b", case insensitive jpayne@68: \? # literal ? jpayne@68: (?P.*?) # non-greedy up to the next ?= is the encoded string jpayne@68: \?= # literal ?= jpayne@68: ''', re.VERBOSE | re.MULTILINE) jpayne@68: jpayne@68: # Field name regexp, including trailing colon, but not separating whitespace, jpayne@68: # according to RFC 2822. Character range is from tilde to exclamation mark. jpayne@68: # For use with .match() jpayne@68: fcre = re.compile(r'[\041-\176]+:$') jpayne@68: jpayne@68: # Find a header embedded in a putative header value. Used to check for jpayne@68: # header injection attack. jpayne@68: _embedded_header = re.compile(r'\n[^ \t]+:') jpayne@68: jpayne@68: jpayne@68: jpayne@68: # Helpers jpayne@68: _max_append = email.quoprimime._max_append jpayne@68: jpayne@68: jpayne@68: jpayne@68: def decode_header(header): jpayne@68: """Decode a message header value without converting charset. jpayne@68: jpayne@68: Returns a list of (string, charset) pairs containing each of the decoded jpayne@68: parts of the header. Charset is None for non-encoded parts of the header, jpayne@68: otherwise a lower-case string containing the name of the character set jpayne@68: specified in the encoded string. jpayne@68: jpayne@68: header may be a string that may or may not contain RFC2047 encoded words, jpayne@68: or it may be a Header object. jpayne@68: jpayne@68: An email.errors.HeaderParseError may be raised when certain decoding error jpayne@68: occurs (e.g. a base64 decoding exception). jpayne@68: """ jpayne@68: # If it is a Header object, we can just return the encoded chunks. jpayne@68: if hasattr(header, '_chunks'): jpayne@68: return [(_charset._encode(string, str(charset)), str(charset)) jpayne@68: for string, charset in header._chunks] jpayne@68: # If no encoding, just return the header with no charset. jpayne@68: if not ecre.search(header): jpayne@68: return [(header, None)] jpayne@68: # First step is to parse all the encoded parts into triplets of the form jpayne@68: # (encoded_string, encoding, charset). For unencoded strings, the last jpayne@68: # two parts will be None. jpayne@68: words = [] jpayne@68: for line in header.splitlines(): jpayne@68: parts = ecre.split(line) jpayne@68: first = True jpayne@68: while parts: jpayne@68: unencoded = parts.pop(0) jpayne@68: if first: jpayne@68: unencoded = unencoded.lstrip() jpayne@68: first = False jpayne@68: if unencoded: jpayne@68: words.append((unencoded, None, None)) jpayne@68: if parts: jpayne@68: charset = parts.pop(0).lower() jpayne@68: encoding = parts.pop(0).lower() jpayne@68: encoded = parts.pop(0) jpayne@68: words.append((encoded, encoding, charset)) jpayne@68: # Now loop over words and remove words that consist of whitespace jpayne@68: # between two encoded strings. jpayne@68: droplist = [] jpayne@68: for n, w in enumerate(words): jpayne@68: if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): jpayne@68: droplist.append(n-1) jpayne@68: for d in reversed(droplist): jpayne@68: del words[d] jpayne@68: jpayne@68: # The next step is to decode each encoded word by applying the reverse jpayne@68: # base64 or quopri transformation. decoded_words is now a list of the jpayne@68: # form (decoded_word, charset). jpayne@68: decoded_words = [] jpayne@68: for encoded_string, encoding, charset in words: jpayne@68: if encoding is None: jpayne@68: # This is an unencoded word. jpayne@68: decoded_words.append((encoded_string, charset)) jpayne@68: elif encoding == 'q': jpayne@68: word = email.quoprimime.header_decode(encoded_string) jpayne@68: decoded_words.append((word, charset)) jpayne@68: elif encoding == 'b': jpayne@68: paderr = len(encoded_string) % 4 # Postel's law: add missing padding jpayne@68: if paderr: jpayne@68: encoded_string += '==='[:4 - paderr] jpayne@68: try: jpayne@68: word = email.base64mime.decode(encoded_string) jpayne@68: except binascii.Error: jpayne@68: raise HeaderParseError('Base64 decoding error') jpayne@68: else: jpayne@68: decoded_words.append((word, charset)) jpayne@68: else: jpayne@68: raise AssertionError('Unexpected encoding: ' + encoding) jpayne@68: # Now convert all words to bytes and collapse consecutive runs of jpayne@68: # similarly encoded words. jpayne@68: collapsed = [] jpayne@68: last_word = last_charset = None jpayne@68: for word, charset in decoded_words: jpayne@68: if isinstance(word, str): jpayne@68: word = bytes(word, 'raw-unicode-escape') jpayne@68: if last_word is None: jpayne@68: last_word = word jpayne@68: last_charset = charset jpayne@68: elif charset != last_charset: jpayne@68: collapsed.append((last_word, last_charset)) jpayne@68: last_word = word jpayne@68: last_charset = charset jpayne@68: elif last_charset is None: jpayne@68: last_word += BSPACE + word jpayne@68: else: jpayne@68: last_word += word jpayne@68: collapsed.append((last_word, last_charset)) jpayne@68: return collapsed jpayne@68: jpayne@68: jpayne@68: jpayne@68: def make_header(decoded_seq, maxlinelen=None, header_name=None, jpayne@68: continuation_ws=' '): jpayne@68: """Create a Header from a sequence of pairs as returned by decode_header() jpayne@68: jpayne@68: decode_header() takes a header value string and returns a sequence of jpayne@68: pairs of the format (decoded_string, charset) where charset is the string jpayne@68: name of the character set. jpayne@68: jpayne@68: This function takes one of those sequence of pairs and returns a Header jpayne@68: instance. Optional maxlinelen, header_name, and continuation_ws are as in jpayne@68: the Header constructor. jpayne@68: """ jpayne@68: h = Header(maxlinelen=maxlinelen, header_name=header_name, jpayne@68: continuation_ws=continuation_ws) jpayne@68: for s, charset in decoded_seq: jpayne@68: # None means us-ascii but we can simply pass it on to h.append() jpayne@68: if charset is not None and not isinstance(charset, Charset): jpayne@68: charset = Charset(charset) jpayne@68: h.append(s, charset) jpayne@68: return h jpayne@68: jpayne@68: jpayne@68: jpayne@68: class Header: jpayne@68: def __init__(self, s=None, charset=None, jpayne@68: maxlinelen=None, header_name=None, jpayne@68: continuation_ws=' ', errors='strict'): jpayne@68: """Create a MIME-compliant header that can contain many character sets. jpayne@68: jpayne@68: Optional s is the initial header value. If None, the initial header jpayne@68: value is not set. You can later append to the header with .append() jpayne@68: method calls. s may be a byte string or a Unicode string, but see the jpayne@68: .append() documentation for semantics. jpayne@68: jpayne@68: Optional charset serves two purposes: it has the same meaning as the jpayne@68: charset argument to the .append() method. It also sets the default jpayne@68: character set for all subsequent .append() calls that omit the charset jpayne@68: argument. If charset is not provided in the constructor, the us-ascii jpayne@68: charset is used both as s's initial charset and as the default for jpayne@68: subsequent .append() calls. jpayne@68: jpayne@68: The maximum line length can be specified explicitly via maxlinelen. For jpayne@68: splitting the first line to a shorter value (to account for the field jpayne@68: header which isn't included in s, e.g. `Subject') pass in the name of jpayne@68: the field in header_name. The default maxlinelen is 78 as recommended jpayne@68: by RFC 2822. jpayne@68: jpayne@68: continuation_ws must be RFC 2822 compliant folding whitespace (usually jpayne@68: either a space or a hard tab) which will be prepended to continuation jpayne@68: lines. jpayne@68: jpayne@68: errors is passed through to the .append() call. jpayne@68: """ jpayne@68: if charset is None: jpayne@68: charset = USASCII jpayne@68: elif not isinstance(charset, Charset): jpayne@68: charset = Charset(charset) jpayne@68: self._charset = charset jpayne@68: self._continuation_ws = continuation_ws jpayne@68: self._chunks = [] jpayne@68: if s is not None: jpayne@68: self.append(s, charset, errors) jpayne@68: if maxlinelen is None: jpayne@68: maxlinelen = MAXLINELEN jpayne@68: self._maxlinelen = maxlinelen jpayne@68: if header_name is None: jpayne@68: self._headerlen = 0 jpayne@68: else: jpayne@68: # Take the separating colon and space into account. jpayne@68: self._headerlen = len(header_name) + 2 jpayne@68: jpayne@68: def __str__(self): jpayne@68: """Return the string value of the header.""" jpayne@68: self._normalize() jpayne@68: uchunks = [] jpayne@68: lastcs = None jpayne@68: lastspace = None jpayne@68: for string, charset in self._chunks: jpayne@68: # We must preserve spaces between encoded and non-encoded word jpayne@68: # boundaries, which means for us we need to add a space when we go jpayne@68: # from a charset to None/us-ascii, or from None/us-ascii to a jpayne@68: # charset. Only do this for the second and subsequent chunks. jpayne@68: # Don't add a space if the None/us-ascii string already has jpayne@68: # a space (trailing or leading depending on transition) jpayne@68: nextcs = charset jpayne@68: if nextcs == _charset.UNKNOWN8BIT: jpayne@68: original_bytes = string.encode('ascii', 'surrogateescape') jpayne@68: string = original_bytes.decode('ascii', 'replace') jpayne@68: if uchunks: jpayne@68: hasspace = string and self._nonctext(string[0]) jpayne@68: if lastcs not in (None, 'us-ascii'): jpayne@68: if nextcs in (None, 'us-ascii') and not hasspace: jpayne@68: uchunks.append(SPACE) jpayne@68: nextcs = None jpayne@68: elif nextcs not in (None, 'us-ascii') and not lastspace: jpayne@68: uchunks.append(SPACE) jpayne@68: lastspace = string and self._nonctext(string[-1]) jpayne@68: lastcs = nextcs jpayne@68: uchunks.append(string) jpayne@68: return EMPTYSTRING.join(uchunks) jpayne@68: jpayne@68: # Rich comparison operators for equality only. BAW: does it make sense to jpayne@68: # have or explicitly disable <, <=, >, >= operators? jpayne@68: def __eq__(self, other): jpayne@68: # other may be a Header or a string. Both are fine so coerce jpayne@68: # ourselves to a unicode (of the unencoded header value), swap the jpayne@68: # args and do another comparison. jpayne@68: return other == str(self) jpayne@68: jpayne@68: def append(self, s, charset=None, errors='strict'): jpayne@68: """Append a string to the MIME header. jpayne@68: jpayne@68: Optional charset, if given, should be a Charset instance or the name jpayne@68: of a character set (which will be converted to a Charset instance). A jpayne@68: value of None (the default) means that the charset given in the jpayne@68: constructor is used. jpayne@68: jpayne@68: s may be a byte string or a Unicode string. If it is a byte string jpayne@68: (i.e. isinstance(s, str) is false), then charset is the encoding of jpayne@68: that byte string, and a UnicodeError will be raised if the string jpayne@68: cannot be decoded with that charset. If s is a Unicode string, then jpayne@68: charset is a hint specifying the character set of the characters in jpayne@68: the string. In either case, when producing an RFC 2822 compliant jpayne@68: header using RFC 2047 rules, the string will be encoded using the jpayne@68: output codec of the charset. If the string cannot be encoded to the jpayne@68: output codec, a UnicodeError will be raised. jpayne@68: jpayne@68: Optional `errors' is passed as the errors argument to the decode jpayne@68: call if s is a byte string. jpayne@68: """ jpayne@68: if charset is None: jpayne@68: charset = self._charset jpayne@68: elif not isinstance(charset, Charset): jpayne@68: charset = Charset(charset) jpayne@68: if not isinstance(s, str): jpayne@68: input_charset = charset.input_codec or 'us-ascii' jpayne@68: if input_charset == _charset.UNKNOWN8BIT: jpayne@68: s = s.decode('us-ascii', 'surrogateescape') jpayne@68: else: jpayne@68: s = s.decode(input_charset, errors) jpayne@68: # Ensure that the bytes we're storing can be decoded to the output jpayne@68: # character set, otherwise an early error is raised. jpayne@68: output_charset = charset.output_codec or 'us-ascii' jpayne@68: if output_charset != _charset.UNKNOWN8BIT: jpayne@68: try: jpayne@68: s.encode(output_charset, errors) jpayne@68: except UnicodeEncodeError: jpayne@68: if output_charset!='us-ascii': jpayne@68: raise jpayne@68: charset = UTF8 jpayne@68: self._chunks.append((s, charset)) jpayne@68: jpayne@68: def _nonctext(self, s): jpayne@68: """True if string s is not a ctext character of RFC822. jpayne@68: """ jpayne@68: return s.isspace() or s in ('(', ')', '\\') jpayne@68: jpayne@68: def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): jpayne@68: r"""Encode a message header into an RFC-compliant format. jpayne@68: jpayne@68: There are many issues involved in converting a given string for use in jpayne@68: an email header. Only certain character sets are readable in most jpayne@68: email clients, and as header strings can only contain a subset of jpayne@68: 7-bit ASCII, care must be taken to properly convert and encode (with jpayne@68: Base64 or quoted-printable) header strings. In addition, there is a jpayne@68: 75-character length limit on any given encoded header field, so jpayne@68: line-wrapping must be performed, even with double-byte character sets. jpayne@68: jpayne@68: Optional maxlinelen specifies the maximum length of each generated jpayne@68: line, exclusive of the linesep string. Individual lines may be longer jpayne@68: than maxlinelen if a folding point cannot be found. The first line jpayne@68: will be shorter by the length of the header name plus ": " if a header jpayne@68: name was specified at Header construction time. The default value for jpayne@68: maxlinelen is determined at header construction time. jpayne@68: jpayne@68: Optional splitchars is a string containing characters which should be jpayne@68: given extra weight by the splitting algorithm during normal header jpayne@68: wrapping. This is in very rough support of RFC 2822's `higher level jpayne@68: syntactic breaks': split points preceded by a splitchar are preferred jpayne@68: during line splitting, with the characters preferred in the order in jpayne@68: which they appear in the string. Space and tab may be included in the jpayne@68: string to indicate whether preference should be given to one over the jpayne@68: other as a split point when other split chars do not appear in the line jpayne@68: being split. Splitchars does not affect RFC 2047 encoded lines. jpayne@68: jpayne@68: Optional linesep is a string to be used to separate the lines of jpayne@68: the value. The default value is the most useful for typical jpayne@68: Python applications, but it can be set to \r\n to produce RFC-compliant jpayne@68: line separators when needed. jpayne@68: """ jpayne@68: self._normalize() jpayne@68: if maxlinelen is None: jpayne@68: maxlinelen = self._maxlinelen jpayne@68: # A maxlinelen of 0 means don't wrap. For all practical purposes, jpayne@68: # choosing a huge number here accomplishes that and makes the jpayne@68: # _ValueFormatter algorithm much simpler. jpayne@68: if maxlinelen == 0: jpayne@68: maxlinelen = 1000000 jpayne@68: formatter = _ValueFormatter(self._headerlen, maxlinelen, jpayne@68: self._continuation_ws, splitchars) jpayne@68: lastcs = None jpayne@68: hasspace = lastspace = None jpayne@68: for string, charset in self._chunks: jpayne@68: if hasspace is not None: jpayne@68: hasspace = string and self._nonctext(string[0]) jpayne@68: if lastcs not in (None, 'us-ascii'): jpayne@68: if not hasspace or charset not in (None, 'us-ascii'): jpayne@68: formatter.add_transition() jpayne@68: elif charset not in (None, 'us-ascii') and not lastspace: jpayne@68: formatter.add_transition() jpayne@68: lastspace = string and self._nonctext(string[-1]) jpayne@68: lastcs = charset jpayne@68: hasspace = False jpayne@68: lines = string.splitlines() jpayne@68: if lines: jpayne@68: formatter.feed('', lines[0], charset) jpayne@68: else: jpayne@68: formatter.feed('', '', charset) jpayne@68: for line in lines[1:]: jpayne@68: formatter.newline() jpayne@68: if charset.header_encoding is not None: jpayne@68: formatter.feed(self._continuation_ws, ' ' + line.lstrip(), jpayne@68: charset) jpayne@68: else: jpayne@68: sline = line.lstrip() jpayne@68: fws = line[:len(line)-len(sline)] jpayne@68: formatter.feed(fws, sline, charset) jpayne@68: if len(lines) > 1: jpayne@68: formatter.newline() jpayne@68: if self._chunks: jpayne@68: formatter.add_transition() jpayne@68: value = formatter._str(linesep) jpayne@68: if _embedded_header.search(value): jpayne@68: raise HeaderParseError("header value appears to contain " jpayne@68: "an embedded header: {!r}".format(value)) jpayne@68: return value jpayne@68: jpayne@68: def _normalize(self): jpayne@68: # Step 1: Normalize the chunks so that all runs of identical charsets jpayne@68: # get collapsed into a single unicode string. jpayne@68: chunks = [] jpayne@68: last_charset = None jpayne@68: last_chunk = [] jpayne@68: for string, charset in self._chunks: jpayne@68: if charset == last_charset: jpayne@68: last_chunk.append(string) jpayne@68: else: jpayne@68: if last_charset is not None: jpayne@68: chunks.append((SPACE.join(last_chunk), last_charset)) jpayne@68: last_chunk = [string] jpayne@68: last_charset = charset jpayne@68: if last_chunk: jpayne@68: chunks.append((SPACE.join(last_chunk), last_charset)) jpayne@68: self._chunks = chunks jpayne@68: jpayne@68: jpayne@68: jpayne@68: class _ValueFormatter: jpayne@68: def __init__(self, headerlen, maxlen, continuation_ws, splitchars): jpayne@68: self._maxlen = maxlen jpayne@68: self._continuation_ws = continuation_ws jpayne@68: self._continuation_ws_len = len(continuation_ws) jpayne@68: self._splitchars = splitchars jpayne@68: self._lines = [] jpayne@68: self._current_line = _Accumulator(headerlen) jpayne@68: jpayne@68: def _str(self, linesep): jpayne@68: self.newline() jpayne@68: return linesep.join(self._lines) jpayne@68: jpayne@68: def __str__(self): jpayne@68: return self._str(NL) jpayne@68: jpayne@68: def newline(self): jpayne@68: end_of_line = self._current_line.pop() jpayne@68: if end_of_line != (' ', ''): jpayne@68: self._current_line.push(*end_of_line) jpayne@68: if len(self._current_line) > 0: jpayne@68: if self._current_line.is_onlyws() and self._lines: jpayne@68: self._lines[-1] += str(self._current_line) jpayne@68: else: jpayne@68: self._lines.append(str(self._current_line)) jpayne@68: self._current_line.reset() jpayne@68: jpayne@68: def add_transition(self): jpayne@68: self._current_line.push(' ', '') jpayne@68: jpayne@68: def feed(self, fws, string, charset): jpayne@68: # If the charset has no header encoding (i.e. it is an ASCII encoding) jpayne@68: # then we must split the header at the "highest level syntactic break" jpayne@68: # possible. Note that we don't have a lot of smarts about field jpayne@68: # syntax; we just try to break on semi-colons, then commas, then jpayne@68: # whitespace. Eventually, this should be pluggable. jpayne@68: if charset.header_encoding is None: jpayne@68: self._ascii_split(fws, string, self._splitchars) jpayne@68: return jpayne@68: # Otherwise, we're doing either a Base64 or a quoted-printable jpayne@68: # encoding which means we don't need to split the line on syntactic jpayne@68: # breaks. We can basically just find enough characters to fit on the jpayne@68: # current line, minus the RFC 2047 chrome. What makes this trickier jpayne@68: # though is that we have to split at octet boundaries, not character jpayne@68: # boundaries but it's only safe to split at character boundaries so at jpayne@68: # best we can only get close. jpayne@68: encoded_lines = charset.header_encode_lines(string, self._maxlengths()) jpayne@68: # The first element extends the current line, but if it's None then jpayne@68: # nothing more fit on the current line so start a new line. jpayne@68: try: jpayne@68: first_line = encoded_lines.pop(0) jpayne@68: except IndexError: jpayne@68: # There are no encoded lines, so we're done. jpayne@68: return jpayne@68: if first_line is not None: jpayne@68: self._append_chunk(fws, first_line) jpayne@68: try: jpayne@68: last_line = encoded_lines.pop() jpayne@68: except IndexError: jpayne@68: # There was only one line. jpayne@68: return jpayne@68: self.newline() jpayne@68: self._current_line.push(self._continuation_ws, last_line) jpayne@68: # Everything else are full lines in themselves. jpayne@68: for line in encoded_lines: jpayne@68: self._lines.append(self._continuation_ws + line) jpayne@68: jpayne@68: def _maxlengths(self): jpayne@68: # The first line's length. jpayne@68: yield self._maxlen - len(self._current_line) jpayne@68: while True: jpayne@68: yield self._maxlen - self._continuation_ws_len jpayne@68: jpayne@68: def _ascii_split(self, fws, string, splitchars): jpayne@68: # The RFC 2822 header folding algorithm is simple in principle but jpayne@68: # complex in practice. Lines may be folded any place where "folding jpayne@68: # white space" appears by inserting a linesep character in front of the jpayne@68: # FWS. The complication is that not all spaces or tabs qualify as FWS, jpayne@68: # and we are also supposed to prefer to break at "higher level jpayne@68: # syntactic breaks". We can't do either of these without intimate jpayne@68: # knowledge of the structure of structured headers, which we don't have jpayne@68: # here. So the best we can do here is prefer to break at the specified jpayne@68: # splitchars, and hope that we don't choose any spaces or tabs that jpayne@68: # aren't legal FWS. (This is at least better than the old algorithm, jpayne@68: # where we would sometimes *introduce* FWS after a splitchar, or the jpayne@68: # algorithm before that, where we would turn all white space runs into jpayne@68: # single spaces or tabs.) jpayne@68: parts = re.split("(["+FWS+"]+)", fws+string) jpayne@68: if parts[0]: jpayne@68: parts[:0] = [''] jpayne@68: else: jpayne@68: parts.pop(0) jpayne@68: for fws, part in zip(*[iter(parts)]*2): jpayne@68: self._append_chunk(fws, part) jpayne@68: jpayne@68: def _append_chunk(self, fws, string): jpayne@68: self._current_line.push(fws, string) jpayne@68: if len(self._current_line) > self._maxlen: jpayne@68: # Find the best split point, working backward from the end. jpayne@68: # There might be none, on a long first line. jpayne@68: for ch in self._splitchars: jpayne@68: for i in range(self._current_line.part_count()-1, 0, -1): jpayne@68: if ch.isspace(): jpayne@68: fws = self._current_line[i][0] jpayne@68: if fws and fws[0]==ch: jpayne@68: break jpayne@68: prevpart = self._current_line[i-1][1] jpayne@68: if prevpart and prevpart[-1]==ch: jpayne@68: break jpayne@68: else: jpayne@68: continue jpayne@68: break jpayne@68: else: jpayne@68: fws, part = self._current_line.pop() jpayne@68: if self._current_line._initial_size > 0: jpayne@68: # There will be a header, so leave it on a line by itself. jpayne@68: self.newline() jpayne@68: if not fws: jpayne@68: # We don't use continuation_ws here because the whitespace jpayne@68: # after a header should always be a space. jpayne@68: fws = ' ' jpayne@68: self._current_line.push(fws, part) jpayne@68: return jpayne@68: remainder = self._current_line.pop_from(i) jpayne@68: self._lines.append(str(self._current_line)) jpayne@68: self._current_line.reset(remainder) jpayne@68: jpayne@68: jpayne@68: class _Accumulator(list): jpayne@68: jpayne@68: def __init__(self, initial_size=0): jpayne@68: self._initial_size = initial_size jpayne@68: super().__init__() jpayne@68: jpayne@68: def push(self, fws, string): jpayne@68: self.append((fws, string)) jpayne@68: jpayne@68: def pop_from(self, i=0): jpayne@68: popped = self[i:] jpayne@68: self[i:] = [] jpayne@68: return popped jpayne@68: jpayne@68: def pop(self): jpayne@68: if self.part_count()==0: jpayne@68: return ('', '') jpayne@68: return super().pop() jpayne@68: jpayne@68: def __len__(self): jpayne@68: return sum((len(fws)+len(part) for fws, part in self), jpayne@68: self._initial_size) jpayne@68: jpayne@68: def __str__(self): jpayne@68: return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) jpayne@68: for fws, part in self)) jpayne@68: jpayne@68: def reset(self, startval=None): jpayne@68: if startval is None: jpayne@68: startval = [] jpayne@68: self[:] = startval jpayne@68: self._initial_size = 0 jpayne@68: jpayne@68: def is_onlyws(self): jpayne@68: return self._initial_size==0 and (not self or str(self).isspace()) jpayne@68: jpayne@68: def part_count(self): jpayne@68: return super().__len__()