jpayne@69
|
1 # Copyright (C) 2002-2007 Python Software Foundation
|
jpayne@69
|
2 # Contact: email-sig@python.org
|
jpayne@69
|
3
|
jpayne@69
|
4 """Email address parsing code.
|
jpayne@69
|
5
|
jpayne@69
|
6 Lifted directly from rfc822.py. This should eventually be rewritten.
|
jpayne@69
|
7 """
|
jpayne@69
|
8
|
jpayne@69
|
9 __all__ = [
|
jpayne@69
|
10 'mktime_tz',
|
jpayne@69
|
11 'parsedate',
|
jpayne@69
|
12 'parsedate_tz',
|
jpayne@69
|
13 'quote',
|
jpayne@69
|
14 ]
|
jpayne@69
|
15
|
jpayne@69
|
16 import time, calendar
|
jpayne@69
|
17
|
jpayne@69
|
18 SPACE = ' '
|
jpayne@69
|
19 EMPTYSTRING = ''
|
jpayne@69
|
20 COMMASPACE = ', '
|
jpayne@69
|
21
|
jpayne@69
|
22 # Parse a date field
|
jpayne@69
|
23 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
jpayne@69
|
24 'aug', 'sep', 'oct', 'nov', 'dec',
|
jpayne@69
|
25 'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
jpayne@69
|
26 'august', 'september', 'october', 'november', 'december']
|
jpayne@69
|
27
|
jpayne@69
|
28 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
jpayne@69
|
29
|
jpayne@69
|
30 # The timezone table does not include the military time zones defined
|
jpayne@69
|
31 # in RFC822, other than Z. According to RFC1123, the description in
|
jpayne@69
|
32 # RFC822 gets the signs wrong, so we can't rely on any such time
|
jpayne@69
|
33 # zones. RFC1123 recommends that numeric timezone indicators be used
|
jpayne@69
|
34 # instead of timezone names.
|
jpayne@69
|
35
|
jpayne@69
|
36 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
|
jpayne@69
|
37 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
|
jpayne@69
|
38 'EST': -500, 'EDT': -400, # Eastern
|
jpayne@69
|
39 'CST': -600, 'CDT': -500, # Central
|
jpayne@69
|
40 'MST': -700, 'MDT': -600, # Mountain
|
jpayne@69
|
41 'PST': -800, 'PDT': -700 # Pacific
|
jpayne@69
|
42 }
|
jpayne@69
|
43
|
jpayne@69
|
44
|
jpayne@69
|
45 def parsedate_tz(data):
|
jpayne@69
|
46 """Convert a date string to a time tuple.
|
jpayne@69
|
47
|
jpayne@69
|
48 Accounts for military timezones.
|
jpayne@69
|
49 """
|
jpayne@69
|
50 res = _parsedate_tz(data)
|
jpayne@69
|
51 if not res:
|
jpayne@69
|
52 return
|
jpayne@69
|
53 if res[9] is None:
|
jpayne@69
|
54 res[9] = 0
|
jpayne@69
|
55 return tuple(res)
|
jpayne@69
|
56
|
jpayne@69
|
57 def _parsedate_tz(data):
|
jpayne@69
|
58 """Convert date to extended time tuple.
|
jpayne@69
|
59
|
jpayne@69
|
60 The last (additional) element is the time zone offset in seconds, except if
|
jpayne@69
|
61 the timezone was specified as -0000. In that case the last element is
|
jpayne@69
|
62 None. This indicates a UTC timestamp that explicitly declaims knowledge of
|
jpayne@69
|
63 the source timezone, as opposed to a +0000 timestamp that indicates the
|
jpayne@69
|
64 source timezone really was UTC.
|
jpayne@69
|
65
|
jpayne@69
|
66 """
|
jpayne@69
|
67 if not data:
|
jpayne@69
|
68 return
|
jpayne@69
|
69 data = data.split()
|
jpayne@69
|
70 # The FWS after the comma after the day-of-week is optional, so search and
|
jpayne@69
|
71 # adjust for this.
|
jpayne@69
|
72 if data[0].endswith(',') or data[0].lower() in _daynames:
|
jpayne@69
|
73 # There's a dayname here. Skip it
|
jpayne@69
|
74 del data[0]
|
jpayne@69
|
75 else:
|
jpayne@69
|
76 i = data[0].rfind(',')
|
jpayne@69
|
77 if i >= 0:
|
jpayne@69
|
78 data[0] = data[0][i+1:]
|
jpayne@69
|
79 if len(data) == 3: # RFC 850 date, deprecated
|
jpayne@69
|
80 stuff = data[0].split('-')
|
jpayne@69
|
81 if len(stuff) == 3:
|
jpayne@69
|
82 data = stuff + data[1:]
|
jpayne@69
|
83 if len(data) == 4:
|
jpayne@69
|
84 s = data[3]
|
jpayne@69
|
85 i = s.find('+')
|
jpayne@69
|
86 if i == -1:
|
jpayne@69
|
87 i = s.find('-')
|
jpayne@69
|
88 if i > 0:
|
jpayne@69
|
89 data[3:] = [s[:i], s[i:]]
|
jpayne@69
|
90 else:
|
jpayne@69
|
91 data.append('') # Dummy tz
|
jpayne@69
|
92 if len(data) < 5:
|
jpayne@69
|
93 return None
|
jpayne@69
|
94 data = data[:5]
|
jpayne@69
|
95 [dd, mm, yy, tm, tz] = data
|
jpayne@69
|
96 mm = mm.lower()
|
jpayne@69
|
97 if mm not in _monthnames:
|
jpayne@69
|
98 dd, mm = mm, dd.lower()
|
jpayne@69
|
99 if mm not in _monthnames:
|
jpayne@69
|
100 return None
|
jpayne@69
|
101 mm = _monthnames.index(mm) + 1
|
jpayne@69
|
102 if mm > 12:
|
jpayne@69
|
103 mm -= 12
|
jpayne@69
|
104 if dd[-1] == ',':
|
jpayne@69
|
105 dd = dd[:-1]
|
jpayne@69
|
106 i = yy.find(':')
|
jpayne@69
|
107 if i > 0:
|
jpayne@69
|
108 yy, tm = tm, yy
|
jpayne@69
|
109 if yy[-1] == ',':
|
jpayne@69
|
110 yy = yy[:-1]
|
jpayne@69
|
111 if not yy[0].isdigit():
|
jpayne@69
|
112 yy, tz = tz, yy
|
jpayne@69
|
113 if tm[-1] == ',':
|
jpayne@69
|
114 tm = tm[:-1]
|
jpayne@69
|
115 tm = tm.split(':')
|
jpayne@69
|
116 if len(tm) == 2:
|
jpayne@69
|
117 [thh, tmm] = tm
|
jpayne@69
|
118 tss = '0'
|
jpayne@69
|
119 elif len(tm) == 3:
|
jpayne@69
|
120 [thh, tmm, tss] = tm
|
jpayne@69
|
121 elif len(tm) == 1 and '.' in tm[0]:
|
jpayne@69
|
122 # Some non-compliant MUAs use '.' to separate time elements.
|
jpayne@69
|
123 tm = tm[0].split('.')
|
jpayne@69
|
124 if len(tm) == 2:
|
jpayne@69
|
125 [thh, tmm] = tm
|
jpayne@69
|
126 tss = 0
|
jpayne@69
|
127 elif len(tm) == 3:
|
jpayne@69
|
128 [thh, tmm, tss] = tm
|
jpayne@69
|
129 else:
|
jpayne@69
|
130 return None
|
jpayne@69
|
131 try:
|
jpayne@69
|
132 yy = int(yy)
|
jpayne@69
|
133 dd = int(dd)
|
jpayne@69
|
134 thh = int(thh)
|
jpayne@69
|
135 tmm = int(tmm)
|
jpayne@69
|
136 tss = int(tss)
|
jpayne@69
|
137 except ValueError:
|
jpayne@69
|
138 return None
|
jpayne@69
|
139 # Check for a yy specified in two-digit format, then convert it to the
|
jpayne@69
|
140 # appropriate four-digit format, according to the POSIX standard. RFC 822
|
jpayne@69
|
141 # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
|
jpayne@69
|
142 # mandates a 4-digit yy. For more information, see the documentation for
|
jpayne@69
|
143 # the time module.
|
jpayne@69
|
144 if yy < 100:
|
jpayne@69
|
145 # The year is between 1969 and 1999 (inclusive).
|
jpayne@69
|
146 if yy > 68:
|
jpayne@69
|
147 yy += 1900
|
jpayne@69
|
148 # The year is between 2000 and 2068 (inclusive).
|
jpayne@69
|
149 else:
|
jpayne@69
|
150 yy += 2000
|
jpayne@69
|
151 tzoffset = None
|
jpayne@69
|
152 tz = tz.upper()
|
jpayne@69
|
153 if tz in _timezones:
|
jpayne@69
|
154 tzoffset = _timezones[tz]
|
jpayne@69
|
155 else:
|
jpayne@69
|
156 try:
|
jpayne@69
|
157 tzoffset = int(tz)
|
jpayne@69
|
158 except ValueError:
|
jpayne@69
|
159 pass
|
jpayne@69
|
160 if tzoffset==0 and tz.startswith('-'):
|
jpayne@69
|
161 tzoffset = None
|
jpayne@69
|
162 # Convert a timezone offset into seconds ; -0500 -> -18000
|
jpayne@69
|
163 if tzoffset:
|
jpayne@69
|
164 if tzoffset < 0:
|
jpayne@69
|
165 tzsign = -1
|
jpayne@69
|
166 tzoffset = -tzoffset
|
jpayne@69
|
167 else:
|
jpayne@69
|
168 tzsign = 1
|
jpayne@69
|
169 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
|
jpayne@69
|
170 # Daylight Saving Time flag is set to -1, since DST is unknown.
|
jpayne@69
|
171 return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
|
jpayne@69
|
172
|
jpayne@69
|
173
|
jpayne@69
|
174 def parsedate(data):
|
jpayne@69
|
175 """Convert a time string to a time tuple."""
|
jpayne@69
|
176 t = parsedate_tz(data)
|
jpayne@69
|
177 if isinstance(t, tuple):
|
jpayne@69
|
178 return t[:9]
|
jpayne@69
|
179 else:
|
jpayne@69
|
180 return t
|
jpayne@69
|
181
|
jpayne@69
|
182
|
jpayne@69
|
183 def mktime_tz(data):
|
jpayne@69
|
184 """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
|
jpayne@69
|
185 if data[9] is None:
|
jpayne@69
|
186 # No zone info, so localtime is better assumption than GMT
|
jpayne@69
|
187 return time.mktime(data[:8] + (-1,))
|
jpayne@69
|
188 else:
|
jpayne@69
|
189 t = calendar.timegm(data)
|
jpayne@69
|
190 return t - data[9]
|
jpayne@69
|
191
|
jpayne@69
|
192
|
jpayne@69
|
193 def quote(str):
|
jpayne@69
|
194 """Prepare string to be used in a quoted string.
|
jpayne@69
|
195
|
jpayne@69
|
196 Turns backslash and double quote characters into quoted pairs. These
|
jpayne@69
|
197 are the only characters that need to be quoted inside a quoted string.
|
jpayne@69
|
198 Does not add the surrounding double quotes.
|
jpayne@69
|
199 """
|
jpayne@69
|
200 return str.replace('\\', '\\\\').replace('"', '\\"')
|
jpayne@69
|
201
|
jpayne@69
|
202
|
jpayne@69
|
203 class AddrlistClass:
|
jpayne@69
|
204 """Address parser class by Ben Escoto.
|
jpayne@69
|
205
|
jpayne@69
|
206 To understand what this class does, it helps to have a copy of RFC 2822 in
|
jpayne@69
|
207 front of you.
|
jpayne@69
|
208
|
jpayne@69
|
209 Note: this class interface is deprecated and may be removed in the future.
|
jpayne@69
|
210 Use email.utils.AddressList instead.
|
jpayne@69
|
211 """
|
jpayne@69
|
212
|
jpayne@69
|
213 def __init__(self, field):
|
jpayne@69
|
214 """Initialize a new instance.
|
jpayne@69
|
215
|
jpayne@69
|
216 `field' is an unparsed address header field, containing
|
jpayne@69
|
217 one or more addresses.
|
jpayne@69
|
218 """
|
jpayne@69
|
219 self.specials = '()<>@,:;.\"[]'
|
jpayne@69
|
220 self.pos = 0
|
jpayne@69
|
221 self.LWS = ' \t'
|
jpayne@69
|
222 self.CR = '\r\n'
|
jpayne@69
|
223 self.FWS = self.LWS + self.CR
|
jpayne@69
|
224 self.atomends = self.specials + self.LWS + self.CR
|
jpayne@69
|
225 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
|
jpayne@69
|
226 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
|
jpayne@69
|
227 # syntax, so allow dots in phrases.
|
jpayne@69
|
228 self.phraseends = self.atomends.replace('.', '')
|
jpayne@69
|
229 self.field = field
|
jpayne@69
|
230 self.commentlist = []
|
jpayne@69
|
231
|
jpayne@69
|
232 def gotonext(self):
|
jpayne@69
|
233 """Skip white space and extract comments."""
|
jpayne@69
|
234 wslist = []
|
jpayne@69
|
235 while self.pos < len(self.field):
|
jpayne@69
|
236 if self.field[self.pos] in self.LWS + '\n\r':
|
jpayne@69
|
237 if self.field[self.pos] not in '\n\r':
|
jpayne@69
|
238 wslist.append(self.field[self.pos])
|
jpayne@69
|
239 self.pos += 1
|
jpayne@69
|
240 elif self.field[self.pos] == '(':
|
jpayne@69
|
241 self.commentlist.append(self.getcomment())
|
jpayne@69
|
242 else:
|
jpayne@69
|
243 break
|
jpayne@69
|
244 return EMPTYSTRING.join(wslist)
|
jpayne@69
|
245
|
jpayne@69
|
246 def getaddrlist(self):
|
jpayne@69
|
247 """Parse all addresses.
|
jpayne@69
|
248
|
jpayne@69
|
249 Returns a list containing all of the addresses.
|
jpayne@69
|
250 """
|
jpayne@69
|
251 result = []
|
jpayne@69
|
252 while self.pos < len(self.field):
|
jpayne@69
|
253 ad = self.getaddress()
|
jpayne@69
|
254 if ad:
|
jpayne@69
|
255 result += ad
|
jpayne@69
|
256 else:
|
jpayne@69
|
257 result.append(('', ''))
|
jpayne@69
|
258 return result
|
jpayne@69
|
259
|
jpayne@69
|
260 def getaddress(self):
|
jpayne@69
|
261 """Parse the next address."""
|
jpayne@69
|
262 self.commentlist = []
|
jpayne@69
|
263 self.gotonext()
|
jpayne@69
|
264
|
jpayne@69
|
265 oldpos = self.pos
|
jpayne@69
|
266 oldcl = self.commentlist
|
jpayne@69
|
267 plist = self.getphraselist()
|
jpayne@69
|
268
|
jpayne@69
|
269 self.gotonext()
|
jpayne@69
|
270 returnlist = []
|
jpayne@69
|
271
|
jpayne@69
|
272 if self.pos >= len(self.field):
|
jpayne@69
|
273 # Bad email address technically, no domain.
|
jpayne@69
|
274 if plist:
|
jpayne@69
|
275 returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
jpayne@69
|
276
|
jpayne@69
|
277 elif self.field[self.pos] in '.@':
|
jpayne@69
|
278 # email address is just an addrspec
|
jpayne@69
|
279 # this isn't very efficient since we start over
|
jpayne@69
|
280 self.pos = oldpos
|
jpayne@69
|
281 self.commentlist = oldcl
|
jpayne@69
|
282 addrspec = self.getaddrspec()
|
jpayne@69
|
283 returnlist = [(SPACE.join(self.commentlist), addrspec)]
|
jpayne@69
|
284
|
jpayne@69
|
285 elif self.field[self.pos] == ':':
|
jpayne@69
|
286 # address is a group
|
jpayne@69
|
287 returnlist = []
|
jpayne@69
|
288
|
jpayne@69
|
289 fieldlen = len(self.field)
|
jpayne@69
|
290 self.pos += 1
|
jpayne@69
|
291 while self.pos < len(self.field):
|
jpayne@69
|
292 self.gotonext()
|
jpayne@69
|
293 if self.pos < fieldlen and self.field[self.pos] == ';':
|
jpayne@69
|
294 self.pos += 1
|
jpayne@69
|
295 break
|
jpayne@69
|
296 returnlist = returnlist + self.getaddress()
|
jpayne@69
|
297
|
jpayne@69
|
298 elif self.field[self.pos] == '<':
|
jpayne@69
|
299 # Address is a phrase then a route addr
|
jpayne@69
|
300 routeaddr = self.getrouteaddr()
|
jpayne@69
|
301
|
jpayne@69
|
302 if self.commentlist:
|
jpayne@69
|
303 returnlist = [(SPACE.join(plist) + ' (' +
|
jpayne@69
|
304 ' '.join(self.commentlist) + ')', routeaddr)]
|
jpayne@69
|
305 else:
|
jpayne@69
|
306 returnlist = [(SPACE.join(plist), routeaddr)]
|
jpayne@69
|
307
|
jpayne@69
|
308 else:
|
jpayne@69
|
309 if plist:
|
jpayne@69
|
310 returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
jpayne@69
|
311 elif self.field[self.pos] in self.specials:
|
jpayne@69
|
312 self.pos += 1
|
jpayne@69
|
313
|
jpayne@69
|
314 self.gotonext()
|
jpayne@69
|
315 if self.pos < len(self.field) and self.field[self.pos] == ',':
|
jpayne@69
|
316 self.pos += 1
|
jpayne@69
|
317 return returnlist
|
jpayne@69
|
318
|
jpayne@69
|
319 def getrouteaddr(self):
|
jpayne@69
|
320 """Parse a route address (Return-path value).
|
jpayne@69
|
321
|
jpayne@69
|
322 This method just skips all the route stuff and returns the addrspec.
|
jpayne@69
|
323 """
|
jpayne@69
|
324 if self.field[self.pos] != '<':
|
jpayne@69
|
325 return
|
jpayne@69
|
326
|
jpayne@69
|
327 expectroute = False
|
jpayne@69
|
328 self.pos += 1
|
jpayne@69
|
329 self.gotonext()
|
jpayne@69
|
330 adlist = ''
|
jpayne@69
|
331 while self.pos < len(self.field):
|
jpayne@69
|
332 if expectroute:
|
jpayne@69
|
333 self.getdomain()
|
jpayne@69
|
334 expectroute = False
|
jpayne@69
|
335 elif self.field[self.pos] == '>':
|
jpayne@69
|
336 self.pos += 1
|
jpayne@69
|
337 break
|
jpayne@69
|
338 elif self.field[self.pos] == '@':
|
jpayne@69
|
339 self.pos += 1
|
jpayne@69
|
340 expectroute = True
|
jpayne@69
|
341 elif self.field[self.pos] == ':':
|
jpayne@69
|
342 self.pos += 1
|
jpayne@69
|
343 else:
|
jpayne@69
|
344 adlist = self.getaddrspec()
|
jpayne@69
|
345 self.pos += 1
|
jpayne@69
|
346 break
|
jpayne@69
|
347 self.gotonext()
|
jpayne@69
|
348
|
jpayne@69
|
349 return adlist
|
jpayne@69
|
350
|
jpayne@69
|
351 def getaddrspec(self):
|
jpayne@69
|
352 """Parse an RFC 2822 addr-spec."""
|
jpayne@69
|
353 aslist = []
|
jpayne@69
|
354
|
jpayne@69
|
355 self.gotonext()
|
jpayne@69
|
356 while self.pos < len(self.field):
|
jpayne@69
|
357 preserve_ws = True
|
jpayne@69
|
358 if self.field[self.pos] == '.':
|
jpayne@69
|
359 if aslist and not aslist[-1].strip():
|
jpayne@69
|
360 aslist.pop()
|
jpayne@69
|
361 aslist.append('.')
|
jpayne@69
|
362 self.pos += 1
|
jpayne@69
|
363 preserve_ws = False
|
jpayne@69
|
364 elif self.field[self.pos] == '"':
|
jpayne@69
|
365 aslist.append('"%s"' % quote(self.getquote()))
|
jpayne@69
|
366 elif self.field[self.pos] in self.atomends:
|
jpayne@69
|
367 if aslist and not aslist[-1].strip():
|
jpayne@69
|
368 aslist.pop()
|
jpayne@69
|
369 break
|
jpayne@69
|
370 else:
|
jpayne@69
|
371 aslist.append(self.getatom())
|
jpayne@69
|
372 ws = self.gotonext()
|
jpayne@69
|
373 if preserve_ws and ws:
|
jpayne@69
|
374 aslist.append(ws)
|
jpayne@69
|
375
|
jpayne@69
|
376 if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
jpayne@69
|
377 return EMPTYSTRING.join(aslist)
|
jpayne@69
|
378
|
jpayne@69
|
379 aslist.append('@')
|
jpayne@69
|
380 self.pos += 1
|
jpayne@69
|
381 self.gotonext()
|
jpayne@69
|
382 domain = self.getdomain()
|
jpayne@69
|
383 if not domain:
|
jpayne@69
|
384 # Invalid domain, return an empty address instead of returning a
|
jpayne@69
|
385 # local part to denote failed parsing.
|
jpayne@69
|
386 return EMPTYSTRING
|
jpayne@69
|
387 return EMPTYSTRING.join(aslist) + domain
|
jpayne@69
|
388
|
jpayne@69
|
389 def getdomain(self):
|
jpayne@69
|
390 """Get the complete domain name from an address."""
|
jpayne@69
|
391 sdlist = []
|
jpayne@69
|
392 while self.pos < len(self.field):
|
jpayne@69
|
393 if self.field[self.pos] in self.LWS:
|
jpayne@69
|
394 self.pos += 1
|
jpayne@69
|
395 elif self.field[self.pos] == '(':
|
jpayne@69
|
396 self.commentlist.append(self.getcomment())
|
jpayne@69
|
397 elif self.field[self.pos] == '[':
|
jpayne@69
|
398 sdlist.append(self.getdomainliteral())
|
jpayne@69
|
399 elif self.field[self.pos] == '.':
|
jpayne@69
|
400 self.pos += 1
|
jpayne@69
|
401 sdlist.append('.')
|
jpayne@69
|
402 elif self.field[self.pos] == '@':
|
jpayne@69
|
403 # bpo-34155: Don't parse domains with two `@` like
|
jpayne@69
|
404 # `a@malicious.org@important.com`.
|
jpayne@69
|
405 return EMPTYSTRING
|
jpayne@69
|
406 elif self.field[self.pos] in self.atomends:
|
jpayne@69
|
407 break
|
jpayne@69
|
408 else:
|
jpayne@69
|
409 sdlist.append(self.getatom())
|
jpayne@69
|
410 return EMPTYSTRING.join(sdlist)
|
jpayne@69
|
411
|
jpayne@69
|
412 def getdelimited(self, beginchar, endchars, allowcomments=True):
|
jpayne@69
|
413 """Parse a header fragment delimited by special characters.
|
jpayne@69
|
414
|
jpayne@69
|
415 `beginchar' is the start character for the fragment.
|
jpayne@69
|
416 If self is not looking at an instance of `beginchar' then
|
jpayne@69
|
417 getdelimited returns the empty string.
|
jpayne@69
|
418
|
jpayne@69
|
419 `endchars' is a sequence of allowable end-delimiting characters.
|
jpayne@69
|
420 Parsing stops when one of these is encountered.
|
jpayne@69
|
421
|
jpayne@69
|
422 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
|
jpayne@69
|
423 within the parsed fragment.
|
jpayne@69
|
424 """
|
jpayne@69
|
425 if self.field[self.pos] != beginchar:
|
jpayne@69
|
426 return ''
|
jpayne@69
|
427
|
jpayne@69
|
428 slist = ['']
|
jpayne@69
|
429 quote = False
|
jpayne@69
|
430 self.pos += 1
|
jpayne@69
|
431 while self.pos < len(self.field):
|
jpayne@69
|
432 if quote:
|
jpayne@69
|
433 slist.append(self.field[self.pos])
|
jpayne@69
|
434 quote = False
|
jpayne@69
|
435 elif self.field[self.pos] in endchars:
|
jpayne@69
|
436 self.pos += 1
|
jpayne@69
|
437 break
|
jpayne@69
|
438 elif allowcomments and self.field[self.pos] == '(':
|
jpayne@69
|
439 slist.append(self.getcomment())
|
jpayne@69
|
440 continue # have already advanced pos from getcomment
|
jpayne@69
|
441 elif self.field[self.pos] == '\\':
|
jpayne@69
|
442 quote = True
|
jpayne@69
|
443 else:
|
jpayne@69
|
444 slist.append(self.field[self.pos])
|
jpayne@69
|
445 self.pos += 1
|
jpayne@69
|
446
|
jpayne@69
|
447 return EMPTYSTRING.join(slist)
|
jpayne@69
|
448
|
jpayne@69
|
449 def getquote(self):
|
jpayne@69
|
450 """Get a quote-delimited fragment from self's field."""
|
jpayne@69
|
451 return self.getdelimited('"', '"\r', False)
|
jpayne@69
|
452
|
jpayne@69
|
453 def getcomment(self):
|
jpayne@69
|
454 """Get a parenthesis-delimited fragment from self's field."""
|
jpayne@69
|
455 return self.getdelimited('(', ')\r', True)
|
jpayne@69
|
456
|
jpayne@69
|
457 def getdomainliteral(self):
|
jpayne@69
|
458 """Parse an RFC 2822 domain-literal."""
|
jpayne@69
|
459 return '[%s]' % self.getdelimited('[', ']\r', False)
|
jpayne@69
|
460
|
jpayne@69
|
461 def getatom(self, atomends=None):
|
jpayne@69
|
462 """Parse an RFC 2822 atom.
|
jpayne@69
|
463
|
jpayne@69
|
464 Optional atomends specifies a different set of end token delimiters
|
jpayne@69
|
465 (the default is to use self.atomends). This is used e.g. in
|
jpayne@69
|
466 getphraselist() since phrase endings must not include the `.' (which
|
jpayne@69
|
467 is legal in phrases)."""
|
jpayne@69
|
468 atomlist = ['']
|
jpayne@69
|
469 if atomends is None:
|
jpayne@69
|
470 atomends = self.atomends
|
jpayne@69
|
471
|
jpayne@69
|
472 while self.pos < len(self.field):
|
jpayne@69
|
473 if self.field[self.pos] in atomends:
|
jpayne@69
|
474 break
|
jpayne@69
|
475 else:
|
jpayne@69
|
476 atomlist.append(self.field[self.pos])
|
jpayne@69
|
477 self.pos += 1
|
jpayne@69
|
478
|
jpayne@69
|
479 return EMPTYSTRING.join(atomlist)
|
jpayne@69
|
480
|
jpayne@69
|
481 def getphraselist(self):
|
jpayne@69
|
482 """Parse a sequence of RFC 2822 phrases.
|
jpayne@69
|
483
|
jpayne@69
|
484 A phrase is a sequence of words, which are in turn either RFC 2822
|
jpayne@69
|
485 atoms or quoted-strings. Phrases are canonicalized by squeezing all
|
jpayne@69
|
486 runs of continuous whitespace into one space.
|
jpayne@69
|
487 """
|
jpayne@69
|
488 plist = []
|
jpayne@69
|
489
|
jpayne@69
|
490 while self.pos < len(self.field):
|
jpayne@69
|
491 if self.field[self.pos] in self.FWS:
|
jpayne@69
|
492 self.pos += 1
|
jpayne@69
|
493 elif self.field[self.pos] == '"':
|
jpayne@69
|
494 plist.append(self.getquote())
|
jpayne@69
|
495 elif self.field[self.pos] == '(':
|
jpayne@69
|
496 self.commentlist.append(self.getcomment())
|
jpayne@69
|
497 elif self.field[self.pos] in self.phraseends:
|
jpayne@69
|
498 break
|
jpayne@69
|
499 else:
|
jpayne@69
|
500 plist.append(self.getatom(self.phraseends))
|
jpayne@69
|
501
|
jpayne@69
|
502 return plist
|
jpayne@69
|
503
|
jpayne@69
|
504 class AddressList(AddrlistClass):
|
jpayne@69
|
505 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
|
jpayne@69
|
506 def __init__(self, field):
|
jpayne@69
|
507 AddrlistClass.__init__(self, field)
|
jpayne@69
|
508 if field:
|
jpayne@69
|
509 self.addresslist = self.getaddrlist()
|
jpayne@69
|
510 else:
|
jpayne@69
|
511 self.addresslist = []
|
jpayne@69
|
512
|
jpayne@69
|
513 def __len__(self):
|
jpayne@69
|
514 return len(self.addresslist)
|
jpayne@69
|
515
|
jpayne@69
|
516 def __add__(self, other):
|
jpayne@69
|
517 # Set union
|
jpayne@69
|
518 newaddr = AddressList(None)
|
jpayne@69
|
519 newaddr.addresslist = self.addresslist[:]
|
jpayne@69
|
520 for x in other.addresslist:
|
jpayne@69
|
521 if not x in self.addresslist:
|
jpayne@69
|
522 newaddr.addresslist.append(x)
|
jpayne@69
|
523 return newaddr
|
jpayne@69
|
524
|
jpayne@69
|
525 def __iadd__(self, other):
|
jpayne@69
|
526 # Set union, in-place
|
jpayne@69
|
527 for x in other.addresslist:
|
jpayne@69
|
528 if not x in self.addresslist:
|
jpayne@69
|
529 self.addresslist.append(x)
|
jpayne@69
|
530 return self
|
jpayne@69
|
531
|
jpayne@69
|
532 def __sub__(self, other):
|
jpayne@69
|
533 # Set difference
|
jpayne@69
|
534 newaddr = AddressList(None)
|
jpayne@69
|
535 for x in self.addresslist:
|
jpayne@69
|
536 if not x in other.addresslist:
|
jpayne@69
|
537 newaddr.addresslist.append(x)
|
jpayne@69
|
538 return newaddr
|
jpayne@69
|
539
|
jpayne@69
|
540 def __isub__(self, other):
|
jpayne@69
|
541 # Set difference, in-place
|
jpayne@69
|
542 for x in other.addresslist:
|
jpayne@69
|
543 if x in self.addresslist:
|
jpayne@69
|
544 self.addresslist.remove(x)
|
jpayne@69
|
545 return self
|
jpayne@69
|
546
|
jpayne@69
|
547 def __getitem__(self, index):
|
jpayne@69
|
548 # Make indexing, slices, and 'in' work
|
jpayne@69
|
549 return self.addresslist[index]
|