comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/punycode.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 """ Codec for the Punicode encoding, as specified in RFC 3492
2
3 Written by Martin v. Löwis.
4 """
5
6 import codecs
7
8 ##################### Encoding #####################################
9
10 def segregate(str):
11 """3.1 Basic code point segregation"""
12 base = bytearray()
13 extended = set()
14 for c in str:
15 if ord(c) < 128:
16 base.append(ord(c))
17 else:
18 extended.add(c)
19 extended = sorted(extended)
20 return bytes(base), extended
21
22 def selective_len(str, max):
23 """Return the length of str, considering only characters below max."""
24 res = 0
25 for c in str:
26 if ord(c) < max:
27 res += 1
28 return res
29
30 def selective_find(str, char, index, pos):
31 """Return a pair (index, pos), indicating the next occurrence of
32 char in str. index is the position of the character considering
33 only ordinals up to and including char, and pos is the position in
34 the full string. index/pos is the starting position in the full
35 string."""
36
37 l = len(str)
38 while 1:
39 pos += 1
40 if pos == l:
41 return (-1, -1)
42 c = str[pos]
43 if c == char:
44 return index+1, pos
45 elif c < char:
46 index += 1
47
48 def insertion_unsort(str, extended):
49 """3.2 Insertion unsort coding"""
50 oldchar = 0x80
51 result = []
52 oldindex = -1
53 for c in extended:
54 index = pos = -1
55 char = ord(c)
56 curlen = selective_len(str, char)
57 delta = (curlen+1) * (char - oldchar)
58 while 1:
59 index,pos = selective_find(str,c,index,pos)
60 if index == -1:
61 break
62 delta += index - oldindex
63 result.append(delta-1)
64 oldindex = index
65 delta = 0
66 oldchar = char
67
68 return result
69
70 def T(j, bias):
71 # Punycode parameters: tmin = 1, tmax = 26, base = 36
72 res = 36 * (j + 1) - bias
73 if res < 1: return 1
74 if res > 26: return 26
75 return res
76
77 digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
78 def generate_generalized_integer(N, bias):
79 """3.3 Generalized variable-length integers"""
80 result = bytearray()
81 j = 0
82 while 1:
83 t = T(j, bias)
84 if N < t:
85 result.append(digits[N])
86 return bytes(result)
87 result.append(digits[t + ((N - t) % (36 - t))])
88 N = (N - t) // (36 - t)
89 j += 1
90
91 def adapt(delta, first, numchars):
92 if first:
93 delta //= 700
94 else:
95 delta //= 2
96 delta += delta // numchars
97 # ((base - tmin) * tmax) // 2 == 455
98 divisions = 0
99 while delta > 455:
100 delta = delta // 35 # base - tmin
101 divisions += 36
102 bias = divisions + (36 * delta // (delta + 38))
103 return bias
104
105
106 def generate_integers(baselen, deltas):
107 """3.4 Bias adaptation"""
108 # Punycode parameters: initial bias = 72, damp = 700, skew = 38
109 result = bytearray()
110 bias = 72
111 for points, delta in enumerate(deltas):
112 s = generate_generalized_integer(delta, bias)
113 result.extend(s)
114 bias = adapt(delta, points==0, baselen+points+1)
115 return bytes(result)
116
117 def punycode_encode(text):
118 base, extended = segregate(text)
119 deltas = insertion_unsort(text, extended)
120 extended = generate_integers(len(base), deltas)
121 if base:
122 return base + b"-" + extended
123 return extended
124
125 ##################### Decoding #####################################
126
127 def decode_generalized_number(extended, extpos, bias, errors):
128 """3.3 Generalized variable-length integers"""
129 result = 0
130 w = 1
131 j = 0
132 while 1:
133 try:
134 char = ord(extended[extpos])
135 except IndexError:
136 if errors == "strict":
137 raise UnicodeError("incomplete punicode string")
138 return extpos + 1, None
139 extpos += 1
140 if 0x41 <= char <= 0x5A: # A-Z
141 digit = char - 0x41
142 elif 0x30 <= char <= 0x39:
143 digit = char - 22 # 0x30-26
144 elif errors == "strict":
145 raise UnicodeError("Invalid extended code point '%s'"
146 % extended[extpos])
147 else:
148 return extpos, None
149 t = T(j, bias)
150 result += digit * w
151 if digit < t:
152 return extpos, result
153 w = w * (36 - t)
154 j += 1
155
156
157 def insertion_sort(base, extended, errors):
158 """3.2 Insertion unsort coding"""
159 char = 0x80
160 pos = -1
161 bias = 72
162 extpos = 0
163 while extpos < len(extended):
164 newpos, delta = decode_generalized_number(extended, extpos,
165 bias, errors)
166 if delta is None:
167 # There was an error in decoding. We can't continue because
168 # synchronization is lost.
169 return base
170 pos += delta+1
171 char += pos // (len(base) + 1)
172 if char > 0x10FFFF:
173 if errors == "strict":
174 raise UnicodeError("Invalid character U+%x" % char)
175 char = ord('?')
176 pos = pos % (len(base) + 1)
177 base = base[:pos] + chr(char) + base[pos:]
178 bias = adapt(delta, (extpos == 0), len(base))
179 extpos = newpos
180 return base
181
182 def punycode_decode(text, errors):
183 if isinstance(text, str):
184 text = text.encode("ascii")
185 if isinstance(text, memoryview):
186 text = bytes(text)
187 pos = text.rfind(b"-")
188 if pos == -1:
189 base = ""
190 extended = str(text, "ascii").upper()
191 else:
192 base = str(text[:pos], "ascii", errors)
193 extended = str(text[pos+1:], "ascii").upper()
194 return insertion_sort(base, extended, errors)
195
196 ### Codec APIs
197
198 class Codec(codecs.Codec):
199
200 def encode(self, input, errors='strict'):
201 res = punycode_encode(input)
202 return res, len(input)
203
204 def decode(self, input, errors='strict'):
205 if errors not in ('strict', 'replace', 'ignore'):
206 raise UnicodeError("Unsupported error handling "+errors)
207 res = punycode_decode(input, errors)
208 return res, len(input)
209
210 class IncrementalEncoder(codecs.IncrementalEncoder):
211 def encode(self, input, final=False):
212 return punycode_encode(input)
213
214 class IncrementalDecoder(codecs.IncrementalDecoder):
215 def decode(self, input, final=False):
216 if self.errors not in ('strict', 'replace', 'ignore'):
217 raise UnicodeError("Unsupported error handling "+self.errors)
218 return punycode_decode(input, self.errors)
219
220 class StreamWriter(Codec,codecs.StreamWriter):
221 pass
222
223 class StreamReader(Codec,codecs.StreamReader):
224 pass
225
226 ### encodings module API
227
228 def getregentry():
229 return codecs.CodecInfo(
230 name='punycode',
231 encode=Codec().encode,
232 decode=Codec().decode,
233 incrementalencoder=IncrementalEncoder,
234 incrementaldecoder=IncrementalDecoder,
235 streamwriter=StreamWriter,
236 streamreader=StreamReader,
237 )