jpayne@68: """This will be the home for the policy that hooks in the new jpayne@68: code that adds all the email6 features. jpayne@68: """ jpayne@68: jpayne@68: import re jpayne@68: import sys jpayne@68: from email._policybase import Policy, Compat32, compat32, _extend_docstrings jpayne@68: from email.utils import _has_surrogates jpayne@68: from email.headerregistry import HeaderRegistry as HeaderRegistry jpayne@68: from email.contentmanager import raw_data_manager jpayne@68: from email.message import EmailMessage jpayne@68: jpayne@68: __all__ = [ jpayne@68: 'Compat32', jpayne@68: 'compat32', jpayne@68: 'Policy', jpayne@68: 'EmailPolicy', jpayne@68: 'default', jpayne@68: 'strict', jpayne@68: 'SMTP', jpayne@68: 'HTTP', jpayne@68: ] jpayne@68: jpayne@68: linesep_splitter = re.compile(r'\n|\r') jpayne@68: jpayne@68: @_extend_docstrings jpayne@68: class EmailPolicy(Policy): jpayne@68: jpayne@68: """+ jpayne@68: PROVISIONAL jpayne@68: jpayne@68: The API extensions enabled by this policy are currently provisional. jpayne@68: Refer to the documentation for details. jpayne@68: jpayne@68: This policy adds new header parsing and folding algorithms. Instead of jpayne@68: simple strings, headers are custom objects with custom attributes jpayne@68: depending on the type of the field. The folding algorithm fully jpayne@68: implements RFCs 2047 and 5322. jpayne@68: jpayne@68: In addition to the settable attributes listed above that apply to jpayne@68: all Policies, this policy adds the following additional attributes: jpayne@68: jpayne@68: utf8 -- if False (the default) message headers will be jpayne@68: serialized as ASCII, using encoded words to encode jpayne@68: any non-ASCII characters in the source strings. If jpayne@68: True, the message headers will be serialized using jpayne@68: utf8 and will not contain encoded words (see RFC jpayne@68: 6532 for more on this serialization format). jpayne@68: jpayne@68: refold_source -- if the value for a header in the Message object jpayne@68: came from the parsing of some source, this attribute jpayne@68: indicates whether or not a generator should refold jpayne@68: that value when transforming the message back into jpayne@68: stream form. The possible values are: jpayne@68: jpayne@68: none -- all source values use original folding jpayne@68: long -- source values that have any line that is jpayne@68: longer than max_line_length will be jpayne@68: refolded jpayne@68: all -- all values are refolded. jpayne@68: jpayne@68: The default is 'long'. jpayne@68: jpayne@68: header_factory -- a callable that takes two arguments, 'name' and jpayne@68: 'value', where 'name' is a header field name and jpayne@68: 'value' is an unfolded header field value, and jpayne@68: returns a string-like object that represents that jpayne@68: header. A default header_factory is provided that jpayne@68: understands some of the RFC5322 header field types. jpayne@68: (Currently address fields and date fields have jpayne@68: special treatment, while all other fields are jpayne@68: treated as unstructured. This list will be jpayne@68: completed before the extension is marked stable.) jpayne@68: jpayne@68: content_manager -- an object with at least two methods: get_content jpayne@68: and set_content. When the get_content or jpayne@68: set_content method of a Message object is called, jpayne@68: it calls the corresponding method of this object, jpayne@68: passing it the message object as its first argument, jpayne@68: and any arguments or keywords that were passed to jpayne@68: it as additional arguments. The default jpayne@68: content_manager is jpayne@68: :data:`~email.contentmanager.raw_data_manager`. jpayne@68: jpayne@68: """ jpayne@68: jpayne@68: message_factory = EmailMessage jpayne@68: utf8 = False jpayne@68: refold_source = 'long' jpayne@68: header_factory = HeaderRegistry() jpayne@68: content_manager = raw_data_manager jpayne@68: jpayne@68: def __init__(self, **kw): jpayne@68: # Ensure that each new instance gets a unique header factory jpayne@68: # (as opposed to clones, which share the factory). jpayne@68: if 'header_factory' not in kw: jpayne@68: object.__setattr__(self, 'header_factory', HeaderRegistry()) jpayne@68: super().__init__(**kw) jpayne@68: jpayne@68: def header_max_count(self, name): jpayne@68: """+ jpayne@68: The implementation for this class returns the max_count attribute from jpayne@68: the specialized header class that would be used to construct a header jpayne@68: of type 'name'. jpayne@68: """ jpayne@68: return self.header_factory[name].max_count jpayne@68: jpayne@68: # The logic of the next three methods is chosen such that it is possible to jpayne@68: # switch a Message object between a Compat32 policy and a policy derived jpayne@68: # from this class and have the results stay consistent. This allows a jpayne@68: # Message object constructed with this policy to be passed to a library jpayne@68: # that only handles Compat32 objects, or to receive such an object and jpayne@68: # convert it to use the newer style by just changing its policy. It is jpayne@68: # also chosen because it postpones the relatively expensive full rfc5322 jpayne@68: # parse until as late as possible when parsing from source, since in many jpayne@68: # applications only a few headers will actually be inspected. jpayne@68: jpayne@68: def header_source_parse(self, sourcelines): jpayne@68: """+ jpayne@68: The name is parsed as everything up to the ':' and returned unmodified. jpayne@68: The value is determined by stripping leading whitespace off the jpayne@68: remainder of the first line, joining all subsequent lines together, and jpayne@68: stripping any trailing carriage return or linefeed characters. (This jpayne@68: is the same as Compat32). jpayne@68: jpayne@68: """ jpayne@68: name, value = sourcelines[0].split(':', 1) jpayne@68: value = value.lstrip(' \t') + ''.join(sourcelines[1:]) jpayne@68: return (name, value.rstrip('\r\n')) jpayne@68: jpayne@68: def header_store_parse(self, name, value): jpayne@68: """+ jpayne@68: The name is returned unchanged. If the input value has a 'name' jpayne@68: attribute and it matches the name ignoring case, the value is returned jpayne@68: unchanged. Otherwise the name and value are passed to header_factory jpayne@68: method, and the resulting custom header object is returned as the jpayne@68: value. In this case a ValueError is raised if the input value contains jpayne@68: CR or LF characters. jpayne@68: jpayne@68: """ jpayne@68: if hasattr(value, 'name') and value.name.lower() == name.lower(): jpayne@68: return (name, value) jpayne@68: if isinstance(value, str) and len(value.splitlines())>1: jpayne@68: # XXX this error message isn't quite right when we use splitlines jpayne@68: # (see issue 22233), but I'm not sure what should happen here. jpayne@68: raise ValueError("Header values may not contain linefeed " jpayne@68: "or carriage return characters") jpayne@68: return (name, self.header_factory(name, value)) jpayne@68: jpayne@68: def header_fetch_parse(self, name, value): jpayne@68: """+ jpayne@68: If the value has a 'name' attribute, it is returned to unmodified. jpayne@68: Otherwise the name and the value with any linesep characters removed jpayne@68: are passed to the header_factory method, and the resulting custom jpayne@68: header object is returned. Any surrogateescaped bytes get turned jpayne@68: into the unicode unknown-character glyph. jpayne@68: jpayne@68: """ jpayne@68: if hasattr(value, 'name'): jpayne@68: return value jpayne@68: # We can't use splitlines here because it splits on more than \r and \n. jpayne@68: value = ''.join(linesep_splitter.split(value)) jpayne@68: return self.header_factory(name, value) jpayne@68: jpayne@68: def fold(self, name, value): jpayne@68: """+ jpayne@68: Header folding is controlled by the refold_source policy setting. A jpayne@68: value is considered to be a 'source value' if and only if it does not jpayne@68: have a 'name' attribute (having a 'name' attribute means it is a header jpayne@68: object of some sort). If a source value needs to be refolded according jpayne@68: to the policy, it is converted into a custom header object by passing jpayne@68: the name and the value with any linesep characters removed to the jpayne@68: header_factory method. Folding of a custom header object is done by jpayne@68: calling its fold method with the current policy. jpayne@68: jpayne@68: Source values are split into lines using splitlines. If the value is jpayne@68: not to be refolded, the lines are rejoined using the linesep from the jpayne@68: policy and returned. The exception is lines containing non-ascii jpayne@68: binary data. In that case the value is refolded regardless of the jpayne@68: refold_source setting, which causes the binary data to be CTE encoded jpayne@68: using the unknown-8bit charset. jpayne@68: jpayne@68: """ jpayne@68: return self._fold(name, value, refold_binary=True) jpayne@68: jpayne@68: def fold_binary(self, name, value): jpayne@68: """+ jpayne@68: The same as fold if cte_type is 7bit, except that the returned value is jpayne@68: bytes. jpayne@68: jpayne@68: If cte_type is 8bit, non-ASCII binary data is converted back into jpayne@68: bytes. Headers with binary data are not refolded, regardless of the jpayne@68: refold_header setting, since there is no way to know whether the binary jpayne@68: data consists of single byte characters or multibyte characters. jpayne@68: jpayne@68: If utf8 is true, headers are encoded to utf8, otherwise to ascii with jpayne@68: non-ASCII unicode rendered as encoded words. jpayne@68: jpayne@68: """ jpayne@68: folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') jpayne@68: charset = 'utf8' if self.utf8 else 'ascii' jpayne@68: return folded.encode(charset, 'surrogateescape') jpayne@68: jpayne@68: def _fold(self, name, value, refold_binary=False): jpayne@68: if hasattr(value, 'name'): jpayne@68: return value.fold(policy=self) jpayne@68: maxlen = self.max_line_length if self.max_line_length else sys.maxsize jpayne@68: lines = value.splitlines() jpayne@68: refold = (self.refold_source == 'all' or jpayne@68: self.refold_source == 'long' and jpayne@68: (lines and len(lines[0])+len(name)+2 > maxlen or jpayne@68: any(len(x) > maxlen for x in lines[1:]))) jpayne@68: if refold or refold_binary and _has_surrogates(value): jpayne@68: return self.header_factory(name, ''.join(lines)).fold(policy=self) jpayne@68: return name + ': ' + self.linesep.join(lines) + self.linesep jpayne@68: jpayne@68: jpayne@68: default = EmailPolicy() jpayne@68: # Make the default policy use the class default header_factory jpayne@68: del default.header_factory jpayne@68: strict = default.clone(raise_on_defect=True) jpayne@68: SMTP = default.clone(linesep='\r\n') jpayne@68: HTTP = default.clone(linesep='\r\n', max_line_length=None) jpayne@68: SMTPUTF8 = SMTP.clone(utf8=True)