jpayne@69: """This will be the home for the policy that hooks in the new jpayne@69: code that adds all the email6 features. jpayne@69: """ jpayne@69: jpayne@69: import re jpayne@69: import sys jpayne@69: from email._policybase import Policy, Compat32, compat32, _extend_docstrings jpayne@69: from email.utils import _has_surrogates jpayne@69: from email.headerregistry import HeaderRegistry as HeaderRegistry jpayne@69: from email.contentmanager import raw_data_manager jpayne@69: from email.message import EmailMessage jpayne@69: jpayne@69: __all__ = [ jpayne@69: 'Compat32', jpayne@69: 'compat32', jpayne@69: 'Policy', jpayne@69: 'EmailPolicy', jpayne@69: 'default', jpayne@69: 'strict', jpayne@69: 'SMTP', jpayne@69: 'HTTP', jpayne@69: ] jpayne@69: jpayne@69: linesep_splitter = re.compile(r'\n|\r') jpayne@69: jpayne@69: @_extend_docstrings jpayne@69: class EmailPolicy(Policy): jpayne@69: jpayne@69: """+ jpayne@69: PROVISIONAL jpayne@69: jpayne@69: The API extensions enabled by this policy are currently provisional. jpayne@69: Refer to the documentation for details. jpayne@69: jpayne@69: This policy adds new header parsing and folding algorithms. Instead of jpayne@69: simple strings, headers are custom objects with custom attributes jpayne@69: depending on the type of the field. The folding algorithm fully jpayne@69: implements RFCs 2047 and 5322. jpayne@69: jpayne@69: In addition to the settable attributes listed above that apply to jpayne@69: all Policies, this policy adds the following additional attributes: jpayne@69: jpayne@69: utf8 -- if False (the default) message headers will be jpayne@69: serialized as ASCII, using encoded words to encode jpayne@69: any non-ASCII characters in the source strings. If jpayne@69: True, the message headers will be serialized using jpayne@69: utf8 and will not contain encoded words (see RFC jpayne@69: 6532 for more on this serialization format). jpayne@69: jpayne@69: refold_source -- if the value for a header in the Message object jpayne@69: came from the parsing of some source, this attribute jpayne@69: indicates whether or not a generator should refold jpayne@69: that value when transforming the message back into jpayne@69: stream form. The possible values are: jpayne@69: jpayne@69: none -- all source values use original folding jpayne@69: long -- source values that have any line that is jpayne@69: longer than max_line_length will be jpayne@69: refolded jpayne@69: all -- all values are refolded. jpayne@69: jpayne@69: The default is 'long'. jpayne@69: jpayne@69: header_factory -- a callable that takes two arguments, 'name' and jpayne@69: 'value', where 'name' is a header field name and jpayne@69: 'value' is an unfolded header field value, and jpayne@69: returns a string-like object that represents that jpayne@69: header. A default header_factory is provided that jpayne@69: understands some of the RFC5322 header field types. jpayne@69: (Currently address fields and date fields have jpayne@69: special treatment, while all other fields are jpayne@69: treated as unstructured. This list will be jpayne@69: completed before the extension is marked stable.) jpayne@69: jpayne@69: content_manager -- an object with at least two methods: get_content jpayne@69: and set_content. When the get_content or jpayne@69: set_content method of a Message object is called, jpayne@69: it calls the corresponding method of this object, jpayne@69: passing it the message object as its first argument, jpayne@69: and any arguments or keywords that were passed to jpayne@69: it as additional arguments. The default jpayne@69: content_manager is jpayne@69: :data:`~email.contentmanager.raw_data_manager`. jpayne@69: jpayne@69: """ jpayne@69: jpayne@69: message_factory = EmailMessage jpayne@69: utf8 = False jpayne@69: refold_source = 'long' jpayne@69: header_factory = HeaderRegistry() jpayne@69: content_manager = raw_data_manager jpayne@69: jpayne@69: def __init__(self, **kw): jpayne@69: # Ensure that each new instance gets a unique header factory jpayne@69: # (as opposed to clones, which share the factory). jpayne@69: if 'header_factory' not in kw: jpayne@69: object.__setattr__(self, 'header_factory', HeaderRegistry()) jpayne@69: super().__init__(**kw) jpayne@69: jpayne@69: def header_max_count(self, name): jpayne@69: """+ jpayne@69: The implementation for this class returns the max_count attribute from jpayne@69: the specialized header class that would be used to construct a header jpayne@69: of type 'name'. jpayne@69: """ jpayne@69: return self.header_factory[name].max_count jpayne@69: jpayne@69: # The logic of the next three methods is chosen such that it is possible to jpayne@69: # switch a Message object between a Compat32 policy and a policy derived jpayne@69: # from this class and have the results stay consistent. This allows a jpayne@69: # Message object constructed with this policy to be passed to a library jpayne@69: # that only handles Compat32 objects, or to receive such an object and jpayne@69: # convert it to use the newer style by just changing its policy. It is jpayne@69: # also chosen because it postpones the relatively expensive full rfc5322 jpayne@69: # parse until as late as possible when parsing from source, since in many jpayne@69: # applications only a few headers will actually be inspected. jpayne@69: jpayne@69: def header_source_parse(self, sourcelines): jpayne@69: """+ jpayne@69: The name is parsed as everything up to the ':' and returned unmodified. jpayne@69: The value is determined by stripping leading whitespace off the jpayne@69: remainder of the first line, joining all subsequent lines together, and jpayne@69: stripping any trailing carriage return or linefeed characters. (This jpayne@69: is the same as Compat32). jpayne@69: jpayne@69: """ jpayne@69: name, value = sourcelines[0].split(':', 1) jpayne@69: value = value.lstrip(' \t') + ''.join(sourcelines[1:]) jpayne@69: return (name, value.rstrip('\r\n')) jpayne@69: jpayne@69: def header_store_parse(self, name, value): jpayne@69: """+ jpayne@69: The name is returned unchanged. If the input value has a 'name' jpayne@69: attribute and it matches the name ignoring case, the value is returned jpayne@69: unchanged. Otherwise the name and value are passed to header_factory jpayne@69: method, and the resulting custom header object is returned as the jpayne@69: value. In this case a ValueError is raised if the input value contains jpayne@69: CR or LF characters. jpayne@69: jpayne@69: """ jpayne@69: if hasattr(value, 'name') and value.name.lower() == name.lower(): jpayne@69: return (name, value) jpayne@69: if isinstance(value, str) and len(value.splitlines())>1: jpayne@69: # XXX this error message isn't quite right when we use splitlines jpayne@69: # (see issue 22233), but I'm not sure what should happen here. jpayne@69: raise ValueError("Header values may not contain linefeed " jpayne@69: "or carriage return characters") jpayne@69: return (name, self.header_factory(name, value)) jpayne@69: jpayne@69: def header_fetch_parse(self, name, value): jpayne@69: """+ jpayne@69: If the value has a 'name' attribute, it is returned to unmodified. jpayne@69: Otherwise the name and the value with any linesep characters removed jpayne@69: are passed to the header_factory method, and the resulting custom jpayne@69: header object is returned. Any surrogateescaped bytes get turned jpayne@69: into the unicode unknown-character glyph. jpayne@69: jpayne@69: """ jpayne@69: if hasattr(value, 'name'): jpayne@69: return value jpayne@69: # We can't use splitlines here because it splits on more than \r and \n. jpayne@69: value = ''.join(linesep_splitter.split(value)) jpayne@69: return self.header_factory(name, value) jpayne@69: jpayne@69: def fold(self, name, value): jpayne@69: """+ jpayne@69: Header folding is controlled by the refold_source policy setting. A jpayne@69: value is considered to be a 'source value' if and only if it does not jpayne@69: have a 'name' attribute (having a 'name' attribute means it is a header jpayne@69: object of some sort). If a source value needs to be refolded according jpayne@69: to the policy, it is converted into a custom header object by passing jpayne@69: the name and the value with any linesep characters removed to the jpayne@69: header_factory method. Folding of a custom header object is done by jpayne@69: calling its fold method with the current policy. jpayne@69: jpayne@69: Source values are split into lines using splitlines. If the value is jpayne@69: not to be refolded, the lines are rejoined using the linesep from the jpayne@69: policy and returned. The exception is lines containing non-ascii jpayne@69: binary data. In that case the value is refolded regardless of the jpayne@69: refold_source setting, which causes the binary data to be CTE encoded jpayne@69: using the unknown-8bit charset. jpayne@69: jpayne@69: """ jpayne@69: return self._fold(name, value, refold_binary=True) jpayne@69: jpayne@69: def fold_binary(self, name, value): jpayne@69: """+ jpayne@69: The same as fold if cte_type is 7bit, except that the returned value is jpayne@69: bytes. jpayne@69: jpayne@69: If cte_type is 8bit, non-ASCII binary data is converted back into jpayne@69: bytes. Headers with binary data are not refolded, regardless of the jpayne@69: refold_header setting, since there is no way to know whether the binary jpayne@69: data consists of single byte characters or multibyte characters. jpayne@69: jpayne@69: If utf8 is true, headers are encoded to utf8, otherwise to ascii with jpayne@69: non-ASCII unicode rendered as encoded words. jpayne@69: jpayne@69: """ jpayne@69: folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') jpayne@69: charset = 'utf8' if self.utf8 else 'ascii' jpayne@69: return folded.encode(charset, 'surrogateescape') jpayne@69: jpayne@69: def _fold(self, name, value, refold_binary=False): jpayne@69: if hasattr(value, 'name'): jpayne@69: return value.fold(policy=self) jpayne@69: maxlen = self.max_line_length if self.max_line_length else sys.maxsize jpayne@69: lines = value.splitlines() jpayne@69: refold = (self.refold_source == 'all' or jpayne@69: self.refold_source == 'long' and jpayne@69: (lines and len(lines[0])+len(name)+2 > maxlen or jpayne@69: any(len(x) > maxlen for x in lines[1:]))) jpayne@69: if refold or refold_binary and _has_surrogates(value): jpayne@69: return self.header_factory(name, ''.join(lines)).fold(policy=self) jpayne@69: return name + ': ' + self.linesep.join(lines) + self.linesep jpayne@69: jpayne@69: jpayne@69: default = EmailPolicy() jpayne@69: # Make the default policy use the class default header_factory jpayne@69: del default.header_factory jpayne@69: strict = default.clone(raise_on_defect=True) jpayne@69: SMTP = default.clone(linesep='\r\n') jpayne@69: HTTP = default.clone(linesep='\r\n', max_line_length=None) jpayne@69: SMTPUTF8 = SMTP.clone(utf8=True)