jpayne@68
|
1 """This will be the home for the policy that hooks in the new
|
jpayne@68
|
2 code that adds all the email6 features.
|
jpayne@68
|
3 """
|
jpayne@68
|
4
|
jpayne@68
|
5 import re
|
jpayne@68
|
6 import sys
|
jpayne@68
|
7 from email._policybase import Policy, Compat32, compat32, _extend_docstrings
|
jpayne@68
|
8 from email.utils import _has_surrogates
|
jpayne@68
|
9 from email.headerregistry import HeaderRegistry as HeaderRegistry
|
jpayne@68
|
10 from email.contentmanager import raw_data_manager
|
jpayne@68
|
11 from email.message import EmailMessage
|
jpayne@68
|
12
|
jpayne@68
|
13 __all__ = [
|
jpayne@68
|
14 'Compat32',
|
jpayne@68
|
15 'compat32',
|
jpayne@68
|
16 'Policy',
|
jpayne@68
|
17 'EmailPolicy',
|
jpayne@68
|
18 'default',
|
jpayne@68
|
19 'strict',
|
jpayne@68
|
20 'SMTP',
|
jpayne@68
|
21 'HTTP',
|
jpayne@68
|
22 ]
|
jpayne@68
|
23
|
jpayne@68
|
24 linesep_splitter = re.compile(r'\n|\r')
|
jpayne@68
|
25
|
jpayne@68
|
26 @_extend_docstrings
|
jpayne@68
|
27 class EmailPolicy(Policy):
|
jpayne@68
|
28
|
jpayne@68
|
29 """+
|
jpayne@68
|
30 PROVISIONAL
|
jpayne@68
|
31
|
jpayne@68
|
32 The API extensions enabled by this policy are currently provisional.
|
jpayne@68
|
33 Refer to the documentation for details.
|
jpayne@68
|
34
|
jpayne@68
|
35 This policy adds new header parsing and folding algorithms. Instead of
|
jpayne@68
|
36 simple strings, headers are custom objects with custom attributes
|
jpayne@68
|
37 depending on the type of the field. The folding algorithm fully
|
jpayne@68
|
38 implements RFCs 2047 and 5322.
|
jpayne@68
|
39
|
jpayne@68
|
40 In addition to the settable attributes listed above that apply to
|
jpayne@68
|
41 all Policies, this policy adds the following additional attributes:
|
jpayne@68
|
42
|
jpayne@68
|
43 utf8 -- if False (the default) message headers will be
|
jpayne@68
|
44 serialized as ASCII, using encoded words to encode
|
jpayne@68
|
45 any non-ASCII characters in the source strings. If
|
jpayne@68
|
46 True, the message headers will be serialized using
|
jpayne@68
|
47 utf8 and will not contain encoded words (see RFC
|
jpayne@68
|
48 6532 for more on this serialization format).
|
jpayne@68
|
49
|
jpayne@68
|
50 refold_source -- if the value for a header in the Message object
|
jpayne@68
|
51 came from the parsing of some source, this attribute
|
jpayne@68
|
52 indicates whether or not a generator should refold
|
jpayne@68
|
53 that value when transforming the message back into
|
jpayne@68
|
54 stream form. The possible values are:
|
jpayne@68
|
55
|
jpayne@68
|
56 none -- all source values use original folding
|
jpayne@68
|
57 long -- source values that have any line that is
|
jpayne@68
|
58 longer than max_line_length will be
|
jpayne@68
|
59 refolded
|
jpayne@68
|
60 all -- all values are refolded.
|
jpayne@68
|
61
|
jpayne@68
|
62 The default is 'long'.
|
jpayne@68
|
63
|
jpayne@68
|
64 header_factory -- a callable that takes two arguments, 'name' and
|
jpayne@68
|
65 'value', where 'name' is a header field name and
|
jpayne@68
|
66 'value' is an unfolded header field value, and
|
jpayne@68
|
67 returns a string-like object that represents that
|
jpayne@68
|
68 header. A default header_factory is provided that
|
jpayne@68
|
69 understands some of the RFC5322 header field types.
|
jpayne@68
|
70 (Currently address fields and date fields have
|
jpayne@68
|
71 special treatment, while all other fields are
|
jpayne@68
|
72 treated as unstructured. This list will be
|
jpayne@68
|
73 completed before the extension is marked stable.)
|
jpayne@68
|
74
|
jpayne@68
|
75 content_manager -- an object with at least two methods: get_content
|
jpayne@68
|
76 and set_content. When the get_content or
|
jpayne@68
|
77 set_content method of a Message object is called,
|
jpayne@68
|
78 it calls the corresponding method of this object,
|
jpayne@68
|
79 passing it the message object as its first argument,
|
jpayne@68
|
80 and any arguments or keywords that were passed to
|
jpayne@68
|
81 it as additional arguments. The default
|
jpayne@68
|
82 content_manager is
|
jpayne@68
|
83 :data:`~email.contentmanager.raw_data_manager`.
|
jpayne@68
|
84
|
jpayne@68
|
85 """
|
jpayne@68
|
86
|
jpayne@68
|
87 message_factory = EmailMessage
|
jpayne@68
|
88 utf8 = False
|
jpayne@68
|
89 refold_source = 'long'
|
jpayne@68
|
90 header_factory = HeaderRegistry()
|
jpayne@68
|
91 content_manager = raw_data_manager
|
jpayne@68
|
92
|
jpayne@68
|
93 def __init__(self, **kw):
|
jpayne@68
|
94 # Ensure that each new instance gets a unique header factory
|
jpayne@68
|
95 # (as opposed to clones, which share the factory).
|
jpayne@68
|
96 if 'header_factory' not in kw:
|
jpayne@68
|
97 object.__setattr__(self, 'header_factory', HeaderRegistry())
|
jpayne@68
|
98 super().__init__(**kw)
|
jpayne@68
|
99
|
jpayne@68
|
100 def header_max_count(self, name):
|
jpayne@68
|
101 """+
|
jpayne@68
|
102 The implementation for this class returns the max_count attribute from
|
jpayne@68
|
103 the specialized header class that would be used to construct a header
|
jpayne@68
|
104 of type 'name'.
|
jpayne@68
|
105 """
|
jpayne@68
|
106 return self.header_factory[name].max_count
|
jpayne@68
|
107
|
jpayne@68
|
108 # The logic of the next three methods is chosen such that it is possible to
|
jpayne@68
|
109 # switch a Message object between a Compat32 policy and a policy derived
|
jpayne@68
|
110 # from this class and have the results stay consistent. This allows a
|
jpayne@68
|
111 # Message object constructed with this policy to be passed to a library
|
jpayne@68
|
112 # that only handles Compat32 objects, or to receive such an object and
|
jpayne@68
|
113 # convert it to use the newer style by just changing its policy. It is
|
jpayne@68
|
114 # also chosen because it postpones the relatively expensive full rfc5322
|
jpayne@68
|
115 # parse until as late as possible when parsing from source, since in many
|
jpayne@68
|
116 # applications only a few headers will actually be inspected.
|
jpayne@68
|
117
|
jpayne@68
|
118 def header_source_parse(self, sourcelines):
|
jpayne@68
|
119 """+
|
jpayne@68
|
120 The name is parsed as everything up to the ':' and returned unmodified.
|
jpayne@68
|
121 The value is determined by stripping leading whitespace off the
|
jpayne@68
|
122 remainder of the first line, joining all subsequent lines together, and
|
jpayne@68
|
123 stripping any trailing carriage return or linefeed characters. (This
|
jpayne@68
|
124 is the same as Compat32).
|
jpayne@68
|
125
|
jpayne@68
|
126 """
|
jpayne@68
|
127 name, value = sourcelines[0].split(':', 1)
|
jpayne@68
|
128 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
|
jpayne@68
|
129 return (name, value.rstrip('\r\n'))
|
jpayne@68
|
130
|
jpayne@68
|
131 def header_store_parse(self, name, value):
|
jpayne@68
|
132 """+
|
jpayne@68
|
133 The name is returned unchanged. If the input value has a 'name'
|
jpayne@68
|
134 attribute and it matches the name ignoring case, the value is returned
|
jpayne@68
|
135 unchanged. Otherwise the name and value are passed to header_factory
|
jpayne@68
|
136 method, and the resulting custom header object is returned as the
|
jpayne@68
|
137 value. In this case a ValueError is raised if the input value contains
|
jpayne@68
|
138 CR or LF characters.
|
jpayne@68
|
139
|
jpayne@68
|
140 """
|
jpayne@68
|
141 if hasattr(value, 'name') and value.name.lower() == name.lower():
|
jpayne@68
|
142 return (name, value)
|
jpayne@68
|
143 if isinstance(value, str) and len(value.splitlines())>1:
|
jpayne@68
|
144 # XXX this error message isn't quite right when we use splitlines
|
jpayne@68
|
145 # (see issue 22233), but I'm not sure what should happen here.
|
jpayne@68
|
146 raise ValueError("Header values may not contain linefeed "
|
jpayne@68
|
147 "or carriage return characters")
|
jpayne@68
|
148 return (name, self.header_factory(name, value))
|
jpayne@68
|
149
|
jpayne@68
|
150 def header_fetch_parse(self, name, value):
|
jpayne@68
|
151 """+
|
jpayne@68
|
152 If the value has a 'name' attribute, it is returned to unmodified.
|
jpayne@68
|
153 Otherwise the name and the value with any linesep characters removed
|
jpayne@68
|
154 are passed to the header_factory method, and the resulting custom
|
jpayne@68
|
155 header object is returned. Any surrogateescaped bytes get turned
|
jpayne@68
|
156 into the unicode unknown-character glyph.
|
jpayne@68
|
157
|
jpayne@68
|
158 """
|
jpayne@68
|
159 if hasattr(value, 'name'):
|
jpayne@68
|
160 return value
|
jpayne@68
|
161 # We can't use splitlines here because it splits on more than \r and \n.
|
jpayne@68
|
162 value = ''.join(linesep_splitter.split(value))
|
jpayne@68
|
163 return self.header_factory(name, value)
|
jpayne@68
|
164
|
jpayne@68
|
165 def fold(self, name, value):
|
jpayne@68
|
166 """+
|
jpayne@68
|
167 Header folding is controlled by the refold_source policy setting. A
|
jpayne@68
|
168 value is considered to be a 'source value' if and only if it does not
|
jpayne@68
|
169 have a 'name' attribute (having a 'name' attribute means it is a header
|
jpayne@68
|
170 object of some sort). If a source value needs to be refolded according
|
jpayne@68
|
171 to the policy, it is converted into a custom header object by passing
|
jpayne@68
|
172 the name and the value with any linesep characters removed to the
|
jpayne@68
|
173 header_factory method. Folding of a custom header object is done by
|
jpayne@68
|
174 calling its fold method with the current policy.
|
jpayne@68
|
175
|
jpayne@68
|
176 Source values are split into lines using splitlines. If the value is
|
jpayne@68
|
177 not to be refolded, the lines are rejoined using the linesep from the
|
jpayne@68
|
178 policy and returned. The exception is lines containing non-ascii
|
jpayne@68
|
179 binary data. In that case the value is refolded regardless of the
|
jpayne@68
|
180 refold_source setting, which causes the binary data to be CTE encoded
|
jpayne@68
|
181 using the unknown-8bit charset.
|
jpayne@68
|
182
|
jpayne@68
|
183 """
|
jpayne@68
|
184 return self._fold(name, value, refold_binary=True)
|
jpayne@68
|
185
|
jpayne@68
|
186 def fold_binary(self, name, value):
|
jpayne@68
|
187 """+
|
jpayne@68
|
188 The same as fold if cte_type is 7bit, except that the returned value is
|
jpayne@68
|
189 bytes.
|
jpayne@68
|
190
|
jpayne@68
|
191 If cte_type is 8bit, non-ASCII binary data is converted back into
|
jpayne@68
|
192 bytes. Headers with binary data are not refolded, regardless of the
|
jpayne@68
|
193 refold_header setting, since there is no way to know whether the binary
|
jpayne@68
|
194 data consists of single byte characters or multibyte characters.
|
jpayne@68
|
195
|
jpayne@68
|
196 If utf8 is true, headers are encoded to utf8, otherwise to ascii with
|
jpayne@68
|
197 non-ASCII unicode rendered as encoded words.
|
jpayne@68
|
198
|
jpayne@68
|
199 """
|
jpayne@68
|
200 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
|
jpayne@68
|
201 charset = 'utf8' if self.utf8 else 'ascii'
|
jpayne@68
|
202 return folded.encode(charset, 'surrogateescape')
|
jpayne@68
|
203
|
jpayne@68
|
204 def _fold(self, name, value, refold_binary=False):
|
jpayne@68
|
205 if hasattr(value, 'name'):
|
jpayne@68
|
206 return value.fold(policy=self)
|
jpayne@68
|
207 maxlen = self.max_line_length if self.max_line_length else sys.maxsize
|
jpayne@68
|
208 lines = value.splitlines()
|
jpayne@68
|
209 refold = (self.refold_source == 'all' or
|
jpayne@68
|
210 self.refold_source == 'long' and
|
jpayne@68
|
211 (lines and len(lines[0])+len(name)+2 > maxlen or
|
jpayne@68
|
212 any(len(x) > maxlen for x in lines[1:])))
|
jpayne@68
|
213 if refold or refold_binary and _has_surrogates(value):
|
jpayne@68
|
214 return self.header_factory(name, ''.join(lines)).fold(policy=self)
|
jpayne@68
|
215 return name + ': ' + self.linesep.join(lines) + self.linesep
|
jpayne@68
|
216
|
jpayne@68
|
217
|
jpayne@68
|
218 default = EmailPolicy()
|
jpayne@68
|
219 # Make the default policy use the class default header_factory
|
jpayne@68
|
220 del default.header_factory
|
jpayne@68
|
221 strict = default.clone(raise_on_defect=True)
|
jpayne@68
|
222 SMTP = default.clone(linesep='\r\n')
|
jpayne@68
|
223 HTTP = default.clone(linesep='\r\n', max_line_length=None)
|
jpayne@68
|
224 SMTPUTF8 = SMTP.clone(utf8=True)
|