Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/email/_parseaddr.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 # Copyright (C) 2002-2007 Python Software Foundation | |
2 # Contact: email-sig@python.org | |
3 | |
4 """Email address parsing code. | |
5 | |
6 Lifted directly from rfc822.py. This should eventually be rewritten. | |
7 """ | |
8 | |
9 __all__ = [ | |
10 'mktime_tz', | |
11 'parsedate', | |
12 'parsedate_tz', | |
13 'quote', | |
14 ] | |
15 | |
16 import time, calendar | |
17 | |
18 SPACE = ' ' | |
19 EMPTYSTRING = '' | |
20 COMMASPACE = ', ' | |
21 | |
22 # Parse a date field | |
23 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', | |
24 'aug', 'sep', 'oct', 'nov', 'dec', | |
25 'january', 'february', 'march', 'april', 'may', 'june', 'july', | |
26 'august', 'september', 'october', 'november', 'december'] | |
27 | |
28 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] | |
29 | |
30 # The timezone table does not include the military time zones defined | |
31 # in RFC822, other than Z. According to RFC1123, the description in | |
32 # RFC822 gets the signs wrong, so we can't rely on any such time | |
33 # zones. RFC1123 recommends that numeric timezone indicators be used | |
34 # instead of timezone names. | |
35 | |
36 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, | |
37 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) | |
38 'EST': -500, 'EDT': -400, # Eastern | |
39 'CST': -600, 'CDT': -500, # Central | |
40 'MST': -700, 'MDT': -600, # Mountain | |
41 'PST': -800, 'PDT': -700 # Pacific | |
42 } | |
43 | |
44 | |
45 def parsedate_tz(data): | |
46 """Convert a date string to a time tuple. | |
47 | |
48 Accounts for military timezones. | |
49 """ | |
50 res = _parsedate_tz(data) | |
51 if not res: | |
52 return | |
53 if res[9] is None: | |
54 res[9] = 0 | |
55 return tuple(res) | |
56 | |
57 def _parsedate_tz(data): | |
58 """Convert date to extended time tuple. | |
59 | |
60 The last (additional) element is the time zone offset in seconds, except if | |
61 the timezone was specified as -0000. In that case the last element is | |
62 None. This indicates a UTC timestamp that explicitly declaims knowledge of | |
63 the source timezone, as opposed to a +0000 timestamp that indicates the | |
64 source timezone really was UTC. | |
65 | |
66 """ | |
67 if not data: | |
68 return | |
69 data = data.split() | |
70 # The FWS after the comma after the day-of-week is optional, so search and | |
71 # adjust for this. | |
72 if data[0].endswith(',') or data[0].lower() in _daynames: | |
73 # There's a dayname here. Skip it | |
74 del data[0] | |
75 else: | |
76 i = data[0].rfind(',') | |
77 if i >= 0: | |
78 data[0] = data[0][i+1:] | |
79 if len(data) == 3: # RFC 850 date, deprecated | |
80 stuff = data[0].split('-') | |
81 if len(stuff) == 3: | |
82 data = stuff + data[1:] | |
83 if len(data) == 4: | |
84 s = data[3] | |
85 i = s.find('+') | |
86 if i == -1: | |
87 i = s.find('-') | |
88 if i > 0: | |
89 data[3:] = [s[:i], s[i:]] | |
90 else: | |
91 data.append('') # Dummy tz | |
92 if len(data) < 5: | |
93 return None | |
94 data = data[:5] | |
95 [dd, mm, yy, tm, tz] = data | |
96 mm = mm.lower() | |
97 if mm not in _monthnames: | |
98 dd, mm = mm, dd.lower() | |
99 if mm not in _monthnames: | |
100 return None | |
101 mm = _monthnames.index(mm) + 1 | |
102 if mm > 12: | |
103 mm -= 12 | |
104 if dd[-1] == ',': | |
105 dd = dd[:-1] | |
106 i = yy.find(':') | |
107 if i > 0: | |
108 yy, tm = tm, yy | |
109 if yy[-1] == ',': | |
110 yy = yy[:-1] | |
111 if not yy[0].isdigit(): | |
112 yy, tz = tz, yy | |
113 if tm[-1] == ',': | |
114 tm = tm[:-1] | |
115 tm = tm.split(':') | |
116 if len(tm) == 2: | |
117 [thh, tmm] = tm | |
118 tss = '0' | |
119 elif len(tm) == 3: | |
120 [thh, tmm, tss] = tm | |
121 elif len(tm) == 1 and '.' in tm[0]: | |
122 # Some non-compliant MUAs use '.' to separate time elements. | |
123 tm = tm[0].split('.') | |
124 if len(tm) == 2: | |
125 [thh, tmm] = tm | |
126 tss = 0 | |
127 elif len(tm) == 3: | |
128 [thh, tmm, tss] = tm | |
129 else: | |
130 return None | |
131 try: | |
132 yy = int(yy) | |
133 dd = int(dd) | |
134 thh = int(thh) | |
135 tmm = int(tmm) | |
136 tss = int(tss) | |
137 except ValueError: | |
138 return None | |
139 # Check for a yy specified in two-digit format, then convert it to the | |
140 # appropriate four-digit format, according to the POSIX standard. RFC 822 | |
141 # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) | |
142 # mandates a 4-digit yy. For more information, see the documentation for | |
143 # the time module. | |
144 if yy < 100: | |
145 # The year is between 1969 and 1999 (inclusive). | |
146 if yy > 68: | |
147 yy += 1900 | |
148 # The year is between 2000 and 2068 (inclusive). | |
149 else: | |
150 yy += 2000 | |
151 tzoffset = None | |
152 tz = tz.upper() | |
153 if tz in _timezones: | |
154 tzoffset = _timezones[tz] | |
155 else: | |
156 try: | |
157 tzoffset = int(tz) | |
158 except ValueError: | |
159 pass | |
160 if tzoffset==0 and tz.startswith('-'): | |
161 tzoffset = None | |
162 # Convert a timezone offset into seconds ; -0500 -> -18000 | |
163 if tzoffset: | |
164 if tzoffset < 0: | |
165 tzsign = -1 | |
166 tzoffset = -tzoffset | |
167 else: | |
168 tzsign = 1 | |
169 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) | |
170 # Daylight Saving Time flag is set to -1, since DST is unknown. | |
171 return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] | |
172 | |
173 | |
174 def parsedate(data): | |
175 """Convert a time string to a time tuple.""" | |
176 t = parsedate_tz(data) | |
177 if isinstance(t, tuple): | |
178 return t[:9] | |
179 else: | |
180 return t | |
181 | |
182 | |
183 def mktime_tz(data): | |
184 """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" | |
185 if data[9] is None: | |
186 # No zone info, so localtime is better assumption than GMT | |
187 return time.mktime(data[:8] + (-1,)) | |
188 else: | |
189 t = calendar.timegm(data) | |
190 return t - data[9] | |
191 | |
192 | |
193 def quote(str): | |
194 """Prepare string to be used in a quoted string. | |
195 | |
196 Turns backslash and double quote characters into quoted pairs. These | |
197 are the only characters that need to be quoted inside a quoted string. | |
198 Does not add the surrounding double quotes. | |
199 """ | |
200 return str.replace('\\', '\\\\').replace('"', '\\"') | |
201 | |
202 | |
203 class AddrlistClass: | |
204 """Address parser class by Ben Escoto. | |
205 | |
206 To understand what this class does, it helps to have a copy of RFC 2822 in | |
207 front of you. | |
208 | |
209 Note: this class interface is deprecated and may be removed in the future. | |
210 Use email.utils.AddressList instead. | |
211 """ | |
212 | |
213 def __init__(self, field): | |
214 """Initialize a new instance. | |
215 | |
216 `field' is an unparsed address header field, containing | |
217 one or more addresses. | |
218 """ | |
219 self.specials = '()<>@,:;.\"[]' | |
220 self.pos = 0 | |
221 self.LWS = ' \t' | |
222 self.CR = '\r\n' | |
223 self.FWS = self.LWS + self.CR | |
224 self.atomends = self.specials + self.LWS + self.CR | |
225 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it | |
226 # is obsolete syntax. RFC 2822 requires that we recognize obsolete | |
227 # syntax, so allow dots in phrases. | |
228 self.phraseends = self.atomends.replace('.', '') | |
229 self.field = field | |
230 self.commentlist = [] | |
231 | |
232 def gotonext(self): | |
233 """Skip white space and extract comments.""" | |
234 wslist = [] | |
235 while self.pos < len(self.field): | |
236 if self.field[self.pos] in self.LWS + '\n\r': | |
237 if self.field[self.pos] not in '\n\r': | |
238 wslist.append(self.field[self.pos]) | |
239 self.pos += 1 | |
240 elif self.field[self.pos] == '(': | |
241 self.commentlist.append(self.getcomment()) | |
242 else: | |
243 break | |
244 return EMPTYSTRING.join(wslist) | |
245 | |
246 def getaddrlist(self): | |
247 """Parse all addresses. | |
248 | |
249 Returns a list containing all of the addresses. | |
250 """ | |
251 result = [] | |
252 while self.pos < len(self.field): | |
253 ad = self.getaddress() | |
254 if ad: | |
255 result += ad | |
256 else: | |
257 result.append(('', '')) | |
258 return result | |
259 | |
260 def getaddress(self): | |
261 """Parse the next address.""" | |
262 self.commentlist = [] | |
263 self.gotonext() | |
264 | |
265 oldpos = self.pos | |
266 oldcl = self.commentlist | |
267 plist = self.getphraselist() | |
268 | |
269 self.gotonext() | |
270 returnlist = [] | |
271 | |
272 if self.pos >= len(self.field): | |
273 # Bad email address technically, no domain. | |
274 if plist: | |
275 returnlist = [(SPACE.join(self.commentlist), plist[0])] | |
276 | |
277 elif self.field[self.pos] in '.@': | |
278 # email address is just an addrspec | |
279 # this isn't very efficient since we start over | |
280 self.pos = oldpos | |
281 self.commentlist = oldcl | |
282 addrspec = self.getaddrspec() | |
283 returnlist = [(SPACE.join(self.commentlist), addrspec)] | |
284 | |
285 elif self.field[self.pos] == ':': | |
286 # address is a group | |
287 returnlist = [] | |
288 | |
289 fieldlen = len(self.field) | |
290 self.pos += 1 | |
291 while self.pos < len(self.field): | |
292 self.gotonext() | |
293 if self.pos < fieldlen and self.field[self.pos] == ';': | |
294 self.pos += 1 | |
295 break | |
296 returnlist = returnlist + self.getaddress() | |
297 | |
298 elif self.field[self.pos] == '<': | |
299 # Address is a phrase then a route addr | |
300 routeaddr = self.getrouteaddr() | |
301 | |
302 if self.commentlist: | |
303 returnlist = [(SPACE.join(plist) + ' (' + | |
304 ' '.join(self.commentlist) + ')', routeaddr)] | |
305 else: | |
306 returnlist = [(SPACE.join(plist), routeaddr)] | |
307 | |
308 else: | |
309 if plist: | |
310 returnlist = [(SPACE.join(self.commentlist), plist[0])] | |
311 elif self.field[self.pos] in self.specials: | |
312 self.pos += 1 | |
313 | |
314 self.gotonext() | |
315 if self.pos < len(self.field) and self.field[self.pos] == ',': | |
316 self.pos += 1 | |
317 return returnlist | |
318 | |
319 def getrouteaddr(self): | |
320 """Parse a route address (Return-path value). | |
321 | |
322 This method just skips all the route stuff and returns the addrspec. | |
323 """ | |
324 if self.field[self.pos] != '<': | |
325 return | |
326 | |
327 expectroute = False | |
328 self.pos += 1 | |
329 self.gotonext() | |
330 adlist = '' | |
331 while self.pos < len(self.field): | |
332 if expectroute: | |
333 self.getdomain() | |
334 expectroute = False | |
335 elif self.field[self.pos] == '>': | |
336 self.pos += 1 | |
337 break | |
338 elif self.field[self.pos] == '@': | |
339 self.pos += 1 | |
340 expectroute = True | |
341 elif self.field[self.pos] == ':': | |
342 self.pos += 1 | |
343 else: | |
344 adlist = self.getaddrspec() | |
345 self.pos += 1 | |
346 break | |
347 self.gotonext() | |
348 | |
349 return adlist | |
350 | |
351 def getaddrspec(self): | |
352 """Parse an RFC 2822 addr-spec.""" | |
353 aslist = [] | |
354 | |
355 self.gotonext() | |
356 while self.pos < len(self.field): | |
357 preserve_ws = True | |
358 if self.field[self.pos] == '.': | |
359 if aslist and not aslist[-1].strip(): | |
360 aslist.pop() | |
361 aslist.append('.') | |
362 self.pos += 1 | |
363 preserve_ws = False | |
364 elif self.field[self.pos] == '"': | |
365 aslist.append('"%s"' % quote(self.getquote())) | |
366 elif self.field[self.pos] in self.atomends: | |
367 if aslist and not aslist[-1].strip(): | |
368 aslist.pop() | |
369 break | |
370 else: | |
371 aslist.append(self.getatom()) | |
372 ws = self.gotonext() | |
373 if preserve_ws and ws: | |
374 aslist.append(ws) | |
375 | |
376 if self.pos >= len(self.field) or self.field[self.pos] != '@': | |
377 return EMPTYSTRING.join(aslist) | |
378 | |
379 aslist.append('@') | |
380 self.pos += 1 | |
381 self.gotonext() | |
382 domain = self.getdomain() | |
383 if not domain: | |
384 # Invalid domain, return an empty address instead of returning a | |
385 # local part to denote failed parsing. | |
386 return EMPTYSTRING | |
387 return EMPTYSTRING.join(aslist) + domain | |
388 | |
389 def getdomain(self): | |
390 """Get the complete domain name from an address.""" | |
391 sdlist = [] | |
392 while self.pos < len(self.field): | |
393 if self.field[self.pos] in self.LWS: | |
394 self.pos += 1 | |
395 elif self.field[self.pos] == '(': | |
396 self.commentlist.append(self.getcomment()) | |
397 elif self.field[self.pos] == '[': | |
398 sdlist.append(self.getdomainliteral()) | |
399 elif self.field[self.pos] == '.': | |
400 self.pos += 1 | |
401 sdlist.append('.') | |
402 elif self.field[self.pos] == '@': | |
403 # bpo-34155: Don't parse domains with two `@` like | |
404 # `a@malicious.org@important.com`. | |
405 return EMPTYSTRING | |
406 elif self.field[self.pos] in self.atomends: | |
407 break | |
408 else: | |
409 sdlist.append(self.getatom()) | |
410 return EMPTYSTRING.join(sdlist) | |
411 | |
412 def getdelimited(self, beginchar, endchars, allowcomments=True): | |
413 """Parse a header fragment delimited by special characters. | |
414 | |
415 `beginchar' is the start character for the fragment. | |
416 If self is not looking at an instance of `beginchar' then | |
417 getdelimited returns the empty string. | |
418 | |
419 `endchars' is a sequence of allowable end-delimiting characters. | |
420 Parsing stops when one of these is encountered. | |
421 | |
422 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed | |
423 within the parsed fragment. | |
424 """ | |
425 if self.field[self.pos] != beginchar: | |
426 return '' | |
427 | |
428 slist = [''] | |
429 quote = False | |
430 self.pos += 1 | |
431 while self.pos < len(self.field): | |
432 if quote: | |
433 slist.append(self.field[self.pos]) | |
434 quote = False | |
435 elif self.field[self.pos] in endchars: | |
436 self.pos += 1 | |
437 break | |
438 elif allowcomments and self.field[self.pos] == '(': | |
439 slist.append(self.getcomment()) | |
440 continue # have already advanced pos from getcomment | |
441 elif self.field[self.pos] == '\\': | |
442 quote = True | |
443 else: | |
444 slist.append(self.field[self.pos]) | |
445 self.pos += 1 | |
446 | |
447 return EMPTYSTRING.join(slist) | |
448 | |
449 def getquote(self): | |
450 """Get a quote-delimited fragment from self's field.""" | |
451 return self.getdelimited('"', '"\r', False) | |
452 | |
453 def getcomment(self): | |
454 """Get a parenthesis-delimited fragment from self's field.""" | |
455 return self.getdelimited('(', ')\r', True) | |
456 | |
457 def getdomainliteral(self): | |
458 """Parse an RFC 2822 domain-literal.""" | |
459 return '[%s]' % self.getdelimited('[', ']\r', False) | |
460 | |
461 def getatom(self, atomends=None): | |
462 """Parse an RFC 2822 atom. | |
463 | |
464 Optional atomends specifies a different set of end token delimiters | |
465 (the default is to use self.atomends). This is used e.g. in | |
466 getphraselist() since phrase endings must not include the `.' (which | |
467 is legal in phrases).""" | |
468 atomlist = [''] | |
469 if atomends is None: | |
470 atomends = self.atomends | |
471 | |
472 while self.pos < len(self.field): | |
473 if self.field[self.pos] in atomends: | |
474 break | |
475 else: | |
476 atomlist.append(self.field[self.pos]) | |
477 self.pos += 1 | |
478 | |
479 return EMPTYSTRING.join(atomlist) | |
480 | |
481 def getphraselist(self): | |
482 """Parse a sequence of RFC 2822 phrases. | |
483 | |
484 A phrase is a sequence of words, which are in turn either RFC 2822 | |
485 atoms or quoted-strings. Phrases are canonicalized by squeezing all | |
486 runs of continuous whitespace into one space. | |
487 """ | |
488 plist = [] | |
489 | |
490 while self.pos < len(self.field): | |
491 if self.field[self.pos] in self.FWS: | |
492 self.pos += 1 | |
493 elif self.field[self.pos] == '"': | |
494 plist.append(self.getquote()) | |
495 elif self.field[self.pos] == '(': | |
496 self.commentlist.append(self.getcomment()) | |
497 elif self.field[self.pos] in self.phraseends: | |
498 break | |
499 else: | |
500 plist.append(self.getatom(self.phraseends)) | |
501 | |
502 return plist | |
503 | |
504 class AddressList(AddrlistClass): | |
505 """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" | |
506 def __init__(self, field): | |
507 AddrlistClass.__init__(self, field) | |
508 if field: | |
509 self.addresslist = self.getaddrlist() | |
510 else: | |
511 self.addresslist = [] | |
512 | |
513 def __len__(self): | |
514 return len(self.addresslist) | |
515 | |
516 def __add__(self, other): | |
517 # Set union | |
518 newaddr = AddressList(None) | |
519 newaddr.addresslist = self.addresslist[:] | |
520 for x in other.addresslist: | |
521 if not x in self.addresslist: | |
522 newaddr.addresslist.append(x) | |
523 return newaddr | |
524 | |
525 def __iadd__(self, other): | |
526 # Set union, in-place | |
527 for x in other.addresslist: | |
528 if not x in self.addresslist: | |
529 self.addresslist.append(x) | |
530 return self | |
531 | |
532 def __sub__(self, other): | |
533 # Set difference | |
534 newaddr = AddressList(None) | |
535 for x in self.addresslist: | |
536 if not x in other.addresslist: | |
537 newaddr.addresslist.append(x) | |
538 return newaddr | |
539 | |
540 def __isub__(self, other): | |
541 # Set difference, in-place | |
542 for x in other.addresslist: | |
543 if x in self.addresslist: | |
544 self.addresslist.remove(x) | |
545 return self | |
546 | |
547 def __getitem__(self, index): | |
548 # Make indexing, slices, and 'in' work | |
549 return self.addresslist[index] |