csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/punycode.py annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/punycode.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children

rev	line source
jpayne@69	1 """ Codec for the Punicode encoding, as specified in RFC 3492
jpayne@69	2
jpayne@69	3 Written by Martin v. Löwis.
jpayne@69	4 """
jpayne@69	5
jpayne@69	6 import codecs
jpayne@69	7
jpayne@69	8 ##################### Encoding #####################################
jpayne@69	9
jpayne@69	10 def segregate(str):
jpayne@69	11 """3.1 Basic code point segregation"""
jpayne@69	12 base = bytearray()
jpayne@69	13 extended = set()
jpayne@69	14 for c in str:
jpayne@69	15 if ord(c) < 128:
jpayne@69	16 base.append(ord(c))
jpayne@69	17 else:
jpayne@69	18 extended.add(c)
jpayne@69	19 extended = sorted(extended)
jpayne@69	20 return bytes(base), extended
jpayne@69	21
jpayne@69	22 def selective_len(str, max):
jpayne@69	23 """Return the length of str, considering only characters below max."""
jpayne@69	24 res = 0
jpayne@69	25 for c in str:
jpayne@69	26 if ord(c) < max:
jpayne@69	27 res += 1
jpayne@69	28 return res
jpayne@69	29
jpayne@69	30 def selective_find(str, char, index, pos):
jpayne@69	31 """Return a pair (index, pos), indicating the next occurrence of
jpayne@69	32 char in str. index is the position of the character considering
jpayne@69	33 only ordinals up to and including char, and pos is the position in
jpayne@69	34 the full string. index/pos is the starting position in the full
jpayne@69	35 string."""
jpayne@69	36
jpayne@69	37 l = len(str)
jpayne@69	38 while 1:
jpayne@69	39 pos += 1
jpayne@69	40 if pos == l:
jpayne@69	41 return (-1, -1)
jpayne@69	42 c = str[pos]
jpayne@69	43 if c == char:
jpayne@69	44 return index+1, pos
jpayne@69	45 elif c < char:
jpayne@69	46 index += 1
jpayne@69	47
jpayne@69	48 def insertion_unsort(str, extended):
jpayne@69	49 """3.2 Insertion unsort coding"""
jpayne@69	50 oldchar = 0x80
jpayne@69	51 result = []
jpayne@69	52 oldindex = -1
jpayne@69	53 for c in extended:
jpayne@69	54 index = pos = -1
jpayne@69	55 char = ord(c)
jpayne@69	56 curlen = selective_len(str, char)
jpayne@69	57 delta = (curlen+1) * (char - oldchar)
jpayne@69	58 while 1:
jpayne@69	59 index,pos = selective_find(str,c,index,pos)
jpayne@69	60 if index == -1:
jpayne@69	61 break
jpayne@69	62 delta += index - oldindex
jpayne@69	63 result.append(delta-1)
jpayne@69	64 oldindex = index
jpayne@69	65 delta = 0
jpayne@69	66 oldchar = char
jpayne@69	67
jpayne@69	68 return result
jpayne@69	69
jpayne@69	70 def T(j, bias):
jpayne@69	71 # Punycode parameters: tmin = 1, tmax = 26, base = 36
jpayne@69	72 res = 36 * (j + 1) - bias
jpayne@69	73 if res < 1: return 1
jpayne@69	74 if res > 26: return 26
jpayne@69	75 return res
jpayne@69	76
jpayne@69	77 digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
jpayne@69	78 def generate_generalized_integer(N, bias):
jpayne@69	79 """3.3 Generalized variable-length integers"""
jpayne@69	80 result = bytearray()
jpayne@69	81 j = 0
jpayne@69	82 while 1:
jpayne@69	83 t = T(j, bias)
jpayne@69	84 if N < t:
jpayne@69	85 result.append(digits[N])
jpayne@69	86 return bytes(result)
jpayne@69	87 result.append(digits[t + ((N - t) % (36 - t))])
jpayne@69	88 N = (N - t) // (36 - t)
jpayne@69	89 j += 1
jpayne@69	90
jpayne@69	91 def adapt(delta, first, numchars):
jpayne@69	92 if first:
jpayne@69	93 delta //= 700
jpayne@69	94 else:
jpayne@69	95 delta //= 2
jpayne@69	96 delta += delta // numchars
jpayne@69	97 # ((base - tmin) * tmax) // 2 == 455
jpayne@69	98 divisions = 0
jpayne@69	99 while delta > 455:
jpayne@69	100 delta = delta // 35 # base - tmin
jpayne@69	101 divisions += 36
jpayne@69	102 bias = divisions + (36 * delta // (delta + 38))
jpayne@69	103 return bias
jpayne@69	104
jpayne@69	105
jpayne@69	106 def generate_integers(baselen, deltas):
jpayne@69	107 """3.4 Bias adaptation"""
jpayne@69	108 # Punycode parameters: initial bias = 72, damp = 700, skew = 38
jpayne@69	109 result = bytearray()
jpayne@69	110 bias = 72
jpayne@69	111 for points, delta in enumerate(deltas):
jpayne@69	112 s = generate_generalized_integer(delta, bias)
jpayne@69	113 result.extend(s)
jpayne@69	114 bias = adapt(delta, points==0, baselen+points+1)
jpayne@69	115 return bytes(result)
jpayne@69	116
jpayne@69	117 def punycode_encode(text):
jpayne@69	118 base, extended = segregate(text)
jpayne@69	119 deltas = insertion_unsort(text, extended)
jpayne@69	120 extended = generate_integers(len(base), deltas)
jpayne@69	121 if base:
jpayne@69	122 return base + b"-" + extended
jpayne@69	123 return extended
jpayne@69	124
jpayne@69	125 ##################### Decoding #####################################
jpayne@69	126
jpayne@69	127 def decode_generalized_number(extended, extpos, bias, errors):
jpayne@69	128 """3.3 Generalized variable-length integers"""
jpayne@69	129 result = 0
jpayne@69	130 w = 1
jpayne@69	131 j = 0
jpayne@69	132 while 1:
jpayne@69	133 try:
jpayne@69	134 char = ord(extended[extpos])
jpayne@69	135 except IndexError:
jpayne@69	136 if errors == "strict":
jpayne@69	137 raise UnicodeError("incomplete punicode string")
jpayne@69	138 return extpos + 1, None
jpayne@69	139 extpos += 1
jpayne@69	140 if 0x41 <= char <= 0x5A: # A-Z
jpayne@69	141 digit = char - 0x41
jpayne@69	142 elif 0x30 <= char <= 0x39:
jpayne@69	143 digit = char - 22 # 0x30-26
jpayne@69	144 elif errors == "strict":
jpayne@69	145 raise UnicodeError("Invalid extended code point '%s'"
jpayne@69	146 % extended[extpos])
jpayne@69	147 else:
jpayne@69	148 return extpos, None
jpayne@69	149 t = T(j, bias)
jpayne@69	150 result += digit * w
jpayne@69	151 if digit < t:
jpayne@69	152 return extpos, result
jpayne@69	153 w = w * (36 - t)
jpayne@69	154 j += 1
jpayne@69	155
jpayne@69	156
jpayne@69	157 def insertion_sort(base, extended, errors):
jpayne@69	158 """3.2 Insertion unsort coding"""
jpayne@69	159 char = 0x80
jpayne@69	160 pos = -1
jpayne@69	161 bias = 72
jpayne@69	162 extpos = 0
jpayne@69	163 while extpos < len(extended):
jpayne@69	164 newpos, delta = decode_generalized_number(extended, extpos,
jpayne@69	165 bias, errors)
jpayne@69	166 if delta is None:
jpayne@69	167 # There was an error in decoding. We can't continue because
jpayne@69	168 # synchronization is lost.
jpayne@69	169 return base
jpayne@69	170 pos += delta+1
jpayne@69	171 char += pos // (len(base) + 1)
jpayne@69	172 if char > 0x10FFFF:
jpayne@69	173 if errors == "strict":
jpayne@69	174 raise UnicodeError("Invalid character U+%x" % char)
jpayne@69	175 char = ord('?')
jpayne@69	176 pos = pos % (len(base) + 1)
jpayne@69	177 base = base[:pos] + chr(char) + base[pos:]
jpayne@69	178 bias = adapt(delta, (extpos == 0), len(base))
jpayne@69	179 extpos = newpos
jpayne@69	180 return base
jpayne@69	181
jpayne@69	182 def punycode_decode(text, errors):
jpayne@69	183 if isinstance(text, str):
jpayne@69	184 text = text.encode("ascii")
jpayne@69	185 if isinstance(text, memoryview):
jpayne@69	186 text = bytes(text)
jpayne@69	187 pos = text.rfind(b"-")
jpayne@69	188 if pos == -1:
jpayne@69	189 base = ""
jpayne@69	190 extended = str(text, "ascii").upper()
jpayne@69	191 else:
jpayne@69	192 base = str(text[:pos], "ascii", errors)
jpayne@69	193 extended = str(text[pos+1:], "ascii").upper()
jpayne@69	194 return insertion_sort(base, extended, errors)
jpayne@69	195
jpayne@69	196 ### Codec APIs
jpayne@69	197
jpayne@69	198 class Codec(codecs.Codec):
jpayne@69	199
jpayne@69	200 def encode(self, input, errors='strict'):
jpayne@69	201 res = punycode_encode(input)
jpayne@69	202 return res, len(input)
jpayne@69	203
jpayne@69	204 def decode(self, input, errors='strict'):
jpayne@69	205 if errors not in ('strict', 'replace', 'ignore'):
jpayne@69	206 raise UnicodeError("Unsupported error handling "+errors)
jpayne@69	207 res = punycode_decode(input, errors)
jpayne@69	208 return res, len(input)
jpayne@69	209
jpayne@69	210 class IncrementalEncoder(codecs.IncrementalEncoder):
jpayne@69	211 def encode(self, input, final=False):
jpayne@69	212 return punycode_encode(input)
jpayne@69	213
jpayne@69	214 class IncrementalDecoder(codecs.IncrementalDecoder):
jpayne@69	215 def decode(self, input, final=False):
jpayne@69	216 if self.errors not in ('strict', 'replace', 'ignore'):
jpayne@69	217 raise UnicodeError("Unsupported error handling "+self.errors)
jpayne@69	218 return punycode_decode(input, self.errors)
jpayne@69	219
jpayne@69	220 class StreamWriter(Codec,codecs.StreamWriter):
jpayne@69	221 pass
jpayne@69	222
jpayne@69	223 class StreamReader(Codec,codecs.StreamReader):
jpayne@69	224 pass
jpayne@69	225
jpayne@69	226 ### encodings module API
jpayne@69	227
jpayne@69	228 def getregentry():
jpayne@69	229 return codecs.CodecInfo(
jpayne@69	230 name='punycode',
jpayne@69	231 encode=Codec().encode,
jpayne@69	232 decode=Codec().decode,
jpayne@69	233 incrementalencoder=IncrementalEncoder,
jpayne@69	234 incrementaldecoder=IncrementalDecoder,
jpayne@69	235 streamwriter=StreamWriter,
jpayne@69	236 streamreader=StreamReader,
jpayne@69	237 )

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/punycode.py @ 69:33d812a61356