csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/email/

comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/email/_parseaddr.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children

comparison

equal deleted inserted replaced

-:0e9998148a16
+:33d812a61356
+# Copyright (C) 2002-2007 Python Software Foundation
+# Contact: email-sig@python.org
+"""Email address parsing code.
+Lifted directly from rfc822.py.  This should eventually be rewritten.
+"""
+__all__ = [
+'mktime_tz',
+'parsedate',
+'parsedate_tz',
+'quote',
+]
+import time, calendar
+SPACE = ' '
+EMPTYSTRING = ''
+COMMASPACE = ', '
+# Parse a date field
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+'aug', 'sep', 'oct', 'nov', 'dec',
+'january', 'february', 'march', 'april', 'may', 'june', 'july',
+'august', 'september', 'october', 'november', 'december']
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z.  According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones.  RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
+'EST': -500, 'EDT': -400,  # Eastern
+'CST': -600, 'CDT': -500,  # Central
+'MST': -700, 'MDT': -600,  # Mountain
+'PST': -800, 'PDT': -700   # Pacific
+}
+def parsedate_tz(data):
+"""Convert a date string to a time tuple.
+Accounts for military timezones.
+"""
+res = _parsedate_tz(data)
+if not res:
+return
+if res[9] is None:
+res[9] = 0
+return tuple(res)
+def _parsedate_tz(data):
+"""Convert date to extended time tuple.
+The last (additional) element is the time zone offset in seconds, except if
+the timezone was specified as -0000.  In that case the last element is
+None.  This indicates a UTC timestamp that explicitly declaims knowledge of
+the source timezone, as opposed to a +0000 timestamp that indicates the
+source timezone really was UTC.
+"""
+if not data:
+return
+data = data.split()
+# The FWS after the comma after the day-of-week is optional, so search and
+# adjust for this.
+if data[0].endswith(',') or data[0].lower() in _daynames:
+# There's a dayname here. Skip it
+del data[0]
+else:
+i = data[0].rfind(',')
+if i >= 0:
+data[0] = data[0][i+1:]
+if len(data) == 3: # RFC 850 date, deprecated
+stuff = data[0].split('-')
+if len(stuff) == 3:
+data = stuff + data[1:]
+if len(data) == 4:
+s = data[3]
+i = s.find('+')
+if i == -1:
+i = s.find('-')
+if i > 0:
+data[3:] = [s[:i], s[i:]]
+else:
+data.append('') # Dummy tz
+if len(data) < 5:
+return None
+data = data[:5]
+[dd, mm, yy, tm, tz] = data
+mm = mm.lower()
+if mm not in _monthnames:
+dd, mm = mm, dd.lower()
+if mm not in _monthnames:
+return None
+mm = _monthnames.index(mm) + 1
+if mm > 12:
+mm -= 12
+if dd[-1] == ',':
+dd = dd[:-1]
+i = yy.find(':')
+if i > 0:
+yy, tm = tm, yy
+if yy[-1] == ',':
+yy = yy[:-1]
+if not yy[0].isdigit():
+yy, tz = tz, yy
+if tm[-1] == ',':
+tm = tm[:-1]
+tm = tm.split(':')
+if len(tm) == 2:
+[thh, tmm] = tm
+tss = '0'
+elif len(tm) == 3:
+[thh, tmm, tss] = tm
+elif len(tm) == 1 and '.' in tm[0]:
+# Some non-compliant MUAs use '.' to separate time elements.
+tm = tm[0].split('.')
+if len(tm) == 2:
+[thh, tmm] = tm
+tss = 0
+elif len(tm) == 3:
+[thh, tmm, tss] = tm
+else:
+return None
+try:
+yy = int(yy)
+dd = int(dd)
+thh = int(thh)
+tmm = int(tmm)
+tss = int(tss)
+except ValueError:
+return None
+# Check for a yy specified in two-digit format, then convert it to the
+# appropriate four-digit format, according to the POSIX standard. RFC 822
+# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
+# mandates a 4-digit yy. For more information, see the documentation for
+# the time module.
+if yy < 100:
+# The year is between 1969 and 1999 (inclusive).
+if yy > 68:
+yy += 1900
+# The year is between 2000 and 2068 (inclusive).
+else:
+yy += 2000
+tzoffset = None
+tz = tz.upper()
+if tz in _timezones:
+tzoffset = _timezones[tz]
+else:
+try:
+tzoffset = int(tz)
+except ValueError:
+pass
+if tzoffset==0 and tz.startswith('-'):
+tzoffset = None
+# Convert a timezone offset into seconds ; -0500 -> -18000
+if tzoffset:
+if tzoffset < 0:
+tzsign = -1
+tzoffset = -tzoffset
+else:
+tzsign = 1
+tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
+# Daylight Saving Time flag is set to -1, since DST is unknown.
+return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
+def parsedate(data):
+"""Convert a time string to a time tuple."""
+t = parsedate_tz(data)
+if isinstance(t, tuple):
+return t[:9]
+else:
+return t
+def mktime_tz(data):
+"""Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
+if data[9] is None:
+# No zone info, so localtime is better assumption than GMT
+return time.mktime(data[:8] + (-1,))
+else:
+t = calendar.timegm(data)
+return t - data[9]
+def quote(str):
+"""Prepare string to be used in a quoted string.
+Turns backslash and double quote characters into quoted pairs.  These
+are the only characters that need to be quoted inside a quoted string.
+Does not add the surrounding double quotes.
+"""
+return str.replace('\\', '\\\\').replace('"', '\\"')
+class AddrlistClass:
+"""Address parser class by Ben Escoto.
+To understand what this class does, it helps to have a copy of RFC 2822 in
+front of you.
+Note: this class interface is deprecated and may be removed in the future.
+Use email.utils.AddressList instead.
+"""
+def __init__(self, field):
+"""Initialize a new instance.
+`field' is an unparsed address header field, containing
+one or more addresses.
+"""
+self.specials = '()<>@,:;.\"[]'
+self.pos = 0
+self.LWS = ' \t'
+self.CR = '\r\n'
+self.FWS = self.LWS + self.CR
+self.atomends = self.specials + self.LWS + self.CR
+# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+# is obsolete syntax.  RFC 2822 requires that we recognize obsolete
+# syntax, so allow dots in phrases.
+self.phraseends = self.atomends.replace('.', '')
+self.field = field
+self.commentlist = []
+def gotonext(self):
+"""Skip white space and extract comments."""
+wslist = []
+while self.pos < len(self.field):
+if self.field[self.pos] in self.LWS + '\n\r':
+if self.field[self.pos] not in '\n\r':
+wslist.append(self.field[self.pos])
+self.pos += 1
+elif self.field[self.pos] == '(':
+self.commentlist.append(self.getcomment())
+else:
+break
+return EMPTYSTRING.join(wslist)
+def getaddrlist(self):
+"""Parse all addresses.
+Returns a list containing all of the addresses.
+"""
+result = []
+while self.pos < len(self.field):
+ad = self.getaddress()
+if ad:
+result += ad
+else:
+result.append(('', ''))
+return result
+def getaddress(self):
+"""Parse the next address."""
+self.commentlist = []
+self.gotonext()
+oldpos = self.pos
+oldcl = self.commentlist
+plist = self.getphraselist()
+self.gotonext()
+returnlist = []
+if self.pos >= len(self.field):
+# Bad email address technically, no domain.
+if plist:
+returnlist = [(SPACE.join(self.commentlist), plist[0])]
+elif self.field[self.pos] in '.@':
+# email address is just an addrspec
+# this isn't very efficient since we start over
+self.pos = oldpos
+self.commentlist = oldcl
+addrspec = self.getaddrspec()
+returnlist = [(SPACE.join(self.commentlist), addrspec)]
+elif self.field[self.pos] == ':':
+# address is a group
+returnlist = []
+fieldlen = len(self.field)
+self.pos += 1
+while self.pos < len(self.field):
+self.gotonext()
+if self.pos < fieldlen and self.field[self.pos] == ';':
+self.pos += 1
+break
+returnlist = returnlist + self.getaddress()
+elif self.field[self.pos] == '<':
+# Address is a phrase then a route addr
+routeaddr = self.getrouteaddr()
+if self.commentlist:
+returnlist = [(SPACE.join(plist) + ' (' +
+' '.join(self.commentlist) + ')', routeaddr)]
+else:
+returnlist = [(SPACE.join(plist), routeaddr)]
+else:
+if plist:
+returnlist = [(SPACE.join(self.commentlist), plist[0])]
+elif self.field[self.pos] in self.specials:
+self.pos += 1
+self.gotonext()
+if self.pos < len(self.field) and self.field[self.pos] == ',':
+self.pos += 1
+return returnlist
+def getrouteaddr(self):
+"""Parse a route address (Return-path value).
+This method just skips all the route stuff and returns the addrspec.
+"""
+if self.field[self.pos] != '<':
+return
+expectroute = False
+self.pos += 1
+self.gotonext()
+adlist = ''
+while self.pos < len(self.field):
+if expectroute:
+self.getdomain()
+expectroute = False
+elif self.field[self.pos] == '>':
+self.pos += 1
+break
+elif self.field[self.pos] == '@':
+self.pos += 1
+expectroute = True
+elif self.field[self.pos] == ':':
+self.pos += 1
+else:
+adlist = self.getaddrspec()
+self.pos += 1
+break
+self.gotonext()
+return adlist
+def getaddrspec(self):
+"""Parse an RFC 2822 addr-spec."""
+aslist = []
+self.gotonext()
+while self.pos < len(self.field):
+preserve_ws = True
+if self.field[self.pos] == '.':
+if aslist and not aslist[-1].strip():
+aslist.pop()
+aslist.append('.')
+self.pos += 1
+preserve_ws = False
+elif self.field[self.pos] == '"':
+aslist.append('"%s"' % quote(self.getquote()))
+elif self.field[self.pos] in self.atomends:
+if aslist and not aslist[-1].strip():
+aslist.pop()
+break
+else:
+aslist.append(self.getatom())
+ws = self.gotonext()
+if preserve_ws and ws:
+aslist.append(ws)
+if self.pos >= len(self.field) or self.field[self.pos] != '@':
+return EMPTYSTRING.join(aslist)
+aslist.append('@')
+self.pos += 1
+self.gotonext()
+domain = self.getdomain()
+if not domain:
+# Invalid domain, return an empty address instead of returning a
+# local part to denote failed parsing.
+return EMPTYSTRING
+return EMPTYSTRING.join(aslist) + domain
+def getdomain(self):
+"""Get the complete domain name from an address."""
+sdlist = []
+while self.pos < len(self.field):
+if self.field[self.pos] in self.LWS:
+self.pos += 1
+elif self.field[self.pos] == '(':
+self.commentlist.append(self.getcomment())
+elif self.field[self.pos] == '[':
+sdlist.append(self.getdomainliteral())
+elif self.field[self.pos] == '.':
+self.pos += 1
+sdlist.append('.')
+elif self.field[self.pos] == '@':
+# bpo-34155: Don't parse domains with two `@` like
+# `a@malicious.org@important.com`.
+return EMPTYSTRING
+elif self.field[self.pos] in self.atomends:
+break
+else:
+sdlist.append(self.getatom())
+return EMPTYSTRING.join(sdlist)
+def getdelimited(self, beginchar, endchars, allowcomments=True):
+"""Parse a header fragment delimited by special characters.
+`beginchar' is the start character for the fragment.
+If self is not looking at an instance of `beginchar' then
+getdelimited returns the empty string.
+`endchars' is a sequence of allowable end-delimiting characters.
+Parsing stops when one of these is encountered.
+If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+within the parsed fragment.
+"""
+if self.field[self.pos] != beginchar:
+return ''
+slist = ['']
+quote = False
+self.pos += 1
+while self.pos < len(self.field):
+if quote:
+slist.append(self.field[self.pos])
+quote = False
+elif self.field[self.pos] in endchars:
+self.pos += 1
+break
+elif allowcomments and self.field[self.pos] == '(':
+slist.append(self.getcomment())
+continue        # have already advanced pos from getcomment
+elif self.field[self.pos] == '\\':
+quote = True
+else:
+slist.append(self.field[self.pos])
+self.pos += 1
+return EMPTYSTRING.join(slist)
+def getquote(self):
+"""Get a quote-delimited fragment from self's field."""
+return self.getdelimited('"', '"\r', False)
+def getcomment(self):
+"""Get a parenthesis-delimited fragment from self's field."""
+return self.getdelimited('(', ')\r', True)
+def getdomainliteral(self):
+"""Parse an RFC 2822 domain-literal."""
+return '[%s]' % self.getdelimited('[', ']\r', False)
+def getatom(self, atomends=None):
+"""Parse an RFC 2822 atom.
+Optional atomends specifies a different set of end token delimiters
+(the default is to use self.atomends).  This is used e.g. in
+getphraselist() since phrase endings must not include the `.' (which
+is legal in phrases)."""
+atomlist = ['']
+if atomends is None:
+atomends = self.atomends
+while self.pos < len(self.field):
+if self.field[self.pos] in atomends:
+break
+else:
+atomlist.append(self.field[self.pos])
+self.pos += 1
+return EMPTYSTRING.join(atomlist)
+def getphraselist(self):
+"""Parse a sequence of RFC 2822 phrases.
+A phrase is a sequence of words, which are in turn either RFC 2822
+atoms or quoted-strings.  Phrases are canonicalized by squeezing all
+runs of continuous whitespace into one space.
+"""
+plist = []
+while self.pos < len(self.field):
+if self.field[self.pos] in self.FWS:
+self.pos += 1
+elif self.field[self.pos] == '"':
+plist.append(self.getquote())
+elif self.field[self.pos] == '(':
+self.commentlist.append(self.getcomment())
+elif self.field[self.pos] in self.phraseends:
+break
+else:
+plist.append(self.getatom(self.phraseends))
+return plist
+class AddressList(AddrlistClass):
+"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
+def __init__(self, field):
+AddrlistClass.__init__(self, field)
+if field:
+self.addresslist = self.getaddrlist()
+else:
+self.addresslist = []
+def __len__(self):
+return len(self.addresslist)
+def __add__(self, other):
+# Set union
+newaddr = AddressList(None)
+newaddr.addresslist = self.addresslist[:]
+for x in other.addresslist:
+if not x in self.addresslist:
+newaddr.addresslist.append(x)
+return newaddr
+def __iadd__(self, other):
+# Set union, in-place
+for x in other.addresslist:
+if not x in self.addresslist:
+self.addresslist.append(x)
+return self
+def __sub__(self, other):
+# Set difference
+newaddr = AddressList(None)
+for x in self.addresslist:
+if not x in other.addresslist:
+newaddr.addresslist.append(x)
+return newaddr
+def __isub__(self, other):
+# Set difference, in-place
+for x in other.addresslist:
+if x in self.addresslist:
+self.addresslist.remove(x)
+return self
+def __getitem__(self, index):
+# Make indexing, slices, and 'in' work
+return self.addresslist[index]

Mercurial > repos > rliterman > csp2

comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/email/_parseaddr.py @ 69:33d812a61356