Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/punycode.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 """ Codec for the Punicode encoding, as specified in RFC 3492 | |
2 | |
3 Written by Martin v. Löwis. | |
4 """ | |
5 | |
6 import codecs | |
7 | |
8 ##################### Encoding ##################################### | |
9 | |
10 def segregate(str): | |
11 """3.1 Basic code point segregation""" | |
12 base = bytearray() | |
13 extended = set() | |
14 for c in str: | |
15 if ord(c) < 128: | |
16 base.append(ord(c)) | |
17 else: | |
18 extended.add(c) | |
19 extended = sorted(extended) | |
20 return bytes(base), extended | |
21 | |
22 def selective_len(str, max): | |
23 """Return the length of str, considering only characters below max.""" | |
24 res = 0 | |
25 for c in str: | |
26 if ord(c) < max: | |
27 res += 1 | |
28 return res | |
29 | |
30 def selective_find(str, char, index, pos): | |
31 """Return a pair (index, pos), indicating the next occurrence of | |
32 char in str. index is the position of the character considering | |
33 only ordinals up to and including char, and pos is the position in | |
34 the full string. index/pos is the starting position in the full | |
35 string.""" | |
36 | |
37 l = len(str) | |
38 while 1: | |
39 pos += 1 | |
40 if pos == l: | |
41 return (-1, -1) | |
42 c = str[pos] | |
43 if c == char: | |
44 return index+1, pos | |
45 elif c < char: | |
46 index += 1 | |
47 | |
48 def insertion_unsort(str, extended): | |
49 """3.2 Insertion unsort coding""" | |
50 oldchar = 0x80 | |
51 result = [] | |
52 oldindex = -1 | |
53 for c in extended: | |
54 index = pos = -1 | |
55 char = ord(c) | |
56 curlen = selective_len(str, char) | |
57 delta = (curlen+1) * (char - oldchar) | |
58 while 1: | |
59 index,pos = selective_find(str,c,index,pos) | |
60 if index == -1: | |
61 break | |
62 delta += index - oldindex | |
63 result.append(delta-1) | |
64 oldindex = index | |
65 delta = 0 | |
66 oldchar = char | |
67 | |
68 return result | |
69 | |
70 def T(j, bias): | |
71 # Punycode parameters: tmin = 1, tmax = 26, base = 36 | |
72 res = 36 * (j + 1) - bias | |
73 if res < 1: return 1 | |
74 if res > 26: return 26 | |
75 return res | |
76 | |
77 digits = b"abcdefghijklmnopqrstuvwxyz0123456789" | |
78 def generate_generalized_integer(N, bias): | |
79 """3.3 Generalized variable-length integers""" | |
80 result = bytearray() | |
81 j = 0 | |
82 while 1: | |
83 t = T(j, bias) | |
84 if N < t: | |
85 result.append(digits[N]) | |
86 return bytes(result) | |
87 result.append(digits[t + ((N - t) % (36 - t))]) | |
88 N = (N - t) // (36 - t) | |
89 j += 1 | |
90 | |
91 def adapt(delta, first, numchars): | |
92 if first: | |
93 delta //= 700 | |
94 else: | |
95 delta //= 2 | |
96 delta += delta // numchars | |
97 # ((base - tmin) * tmax) // 2 == 455 | |
98 divisions = 0 | |
99 while delta > 455: | |
100 delta = delta // 35 # base - tmin | |
101 divisions += 36 | |
102 bias = divisions + (36 * delta // (delta + 38)) | |
103 return bias | |
104 | |
105 | |
106 def generate_integers(baselen, deltas): | |
107 """3.4 Bias adaptation""" | |
108 # Punycode parameters: initial bias = 72, damp = 700, skew = 38 | |
109 result = bytearray() | |
110 bias = 72 | |
111 for points, delta in enumerate(deltas): | |
112 s = generate_generalized_integer(delta, bias) | |
113 result.extend(s) | |
114 bias = adapt(delta, points==0, baselen+points+1) | |
115 return bytes(result) | |
116 | |
117 def punycode_encode(text): | |
118 base, extended = segregate(text) | |
119 deltas = insertion_unsort(text, extended) | |
120 extended = generate_integers(len(base), deltas) | |
121 if base: | |
122 return base + b"-" + extended | |
123 return extended | |
124 | |
125 ##################### Decoding ##################################### | |
126 | |
127 def decode_generalized_number(extended, extpos, bias, errors): | |
128 """3.3 Generalized variable-length integers""" | |
129 result = 0 | |
130 w = 1 | |
131 j = 0 | |
132 while 1: | |
133 try: | |
134 char = ord(extended[extpos]) | |
135 except IndexError: | |
136 if errors == "strict": | |
137 raise UnicodeError("incomplete punicode string") | |
138 return extpos + 1, None | |
139 extpos += 1 | |
140 if 0x41 <= char <= 0x5A: # A-Z | |
141 digit = char - 0x41 | |
142 elif 0x30 <= char <= 0x39: | |
143 digit = char - 22 # 0x30-26 | |
144 elif errors == "strict": | |
145 raise UnicodeError("Invalid extended code point '%s'" | |
146 % extended[extpos]) | |
147 else: | |
148 return extpos, None | |
149 t = T(j, bias) | |
150 result += digit * w | |
151 if digit < t: | |
152 return extpos, result | |
153 w = w * (36 - t) | |
154 j += 1 | |
155 | |
156 | |
157 def insertion_sort(base, extended, errors): | |
158 """3.2 Insertion unsort coding""" | |
159 char = 0x80 | |
160 pos = -1 | |
161 bias = 72 | |
162 extpos = 0 | |
163 while extpos < len(extended): | |
164 newpos, delta = decode_generalized_number(extended, extpos, | |
165 bias, errors) | |
166 if delta is None: | |
167 # There was an error in decoding. We can't continue because | |
168 # synchronization is lost. | |
169 return base | |
170 pos += delta+1 | |
171 char += pos // (len(base) + 1) | |
172 if char > 0x10FFFF: | |
173 if errors == "strict": | |
174 raise UnicodeError("Invalid character U+%x" % char) | |
175 char = ord('?') | |
176 pos = pos % (len(base) + 1) | |
177 base = base[:pos] + chr(char) + base[pos:] | |
178 bias = adapt(delta, (extpos == 0), len(base)) | |
179 extpos = newpos | |
180 return base | |
181 | |
182 def punycode_decode(text, errors): | |
183 if isinstance(text, str): | |
184 text = text.encode("ascii") | |
185 if isinstance(text, memoryview): | |
186 text = bytes(text) | |
187 pos = text.rfind(b"-") | |
188 if pos == -1: | |
189 base = "" | |
190 extended = str(text, "ascii").upper() | |
191 else: | |
192 base = str(text[:pos], "ascii", errors) | |
193 extended = str(text[pos+1:], "ascii").upper() | |
194 return insertion_sort(base, extended, errors) | |
195 | |
196 ### Codec APIs | |
197 | |
198 class Codec(codecs.Codec): | |
199 | |
200 def encode(self, input, errors='strict'): | |
201 res = punycode_encode(input) | |
202 return res, len(input) | |
203 | |
204 def decode(self, input, errors='strict'): | |
205 if errors not in ('strict', 'replace', 'ignore'): | |
206 raise UnicodeError("Unsupported error handling "+errors) | |
207 res = punycode_decode(input, errors) | |
208 return res, len(input) | |
209 | |
210 class IncrementalEncoder(codecs.IncrementalEncoder): | |
211 def encode(self, input, final=False): | |
212 return punycode_encode(input) | |
213 | |
214 class IncrementalDecoder(codecs.IncrementalDecoder): | |
215 def decode(self, input, final=False): | |
216 if self.errors not in ('strict', 'replace', 'ignore'): | |
217 raise UnicodeError("Unsupported error handling "+self.errors) | |
218 return punycode_decode(input, self.errors) | |
219 | |
220 class StreamWriter(Codec,codecs.StreamWriter): | |
221 pass | |
222 | |
223 class StreamReader(Codec,codecs.StreamReader): | |
224 pass | |
225 | |
226 ### encodings module API | |
227 | |
228 def getregentry(): | |
229 return codecs.CodecInfo( | |
230 name='punycode', | |
231 encode=Codec().encode, | |
232 decode=Codec().decode, | |
233 incrementalencoder=IncrementalEncoder, | |
234 incrementaldecoder=IncrementalDecoder, | |
235 streamwriter=StreamWriter, | |
236 streamreader=StreamReader, | |
237 ) |