csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/idna.py annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/idna.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 16:23:26 -0400
parents
children

rev	line source
jpayne@68	1 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
jpayne@68	2
jpayne@68	3 import stringprep, re, codecs
jpayne@68	4 from unicodedata import ucd_3_2_0 as unicodedata
jpayne@68	5
jpayne@68	6 # IDNA section 3.1
jpayne@68	7 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
jpayne@68	8
jpayne@68	9 # IDNA section 5
jpayne@68	10 ace_prefix = b"xn--"
jpayne@68	11 sace_prefix = "xn--"
jpayne@68	12
jpayne@68	13 # This assumes query strings, so AllowUnassigned is true
jpayne@68	14 def nameprep(label):
jpayne@68	15 # Map
jpayne@68	16 newlabel = []
jpayne@68	17 for c in label:
jpayne@68	18 if stringprep.in_table_b1(c):
jpayne@68	19 # Map to nothing
jpayne@68	20 continue
jpayne@68	21 newlabel.append(stringprep.map_table_b2(c))
jpayne@68	22 label = "".join(newlabel)
jpayne@68	23
jpayne@68	24 # Normalize
jpayne@68	25 label = unicodedata.normalize("NFKC", label)
jpayne@68	26
jpayne@68	27 # Prohibit
jpayne@68	28 for c in label:
jpayne@68	29 if stringprep.in_table_c12(c) or \
jpayne@68	30 stringprep.in_table_c22(c) or \
jpayne@68	31 stringprep.in_table_c3(c) or \
jpayne@68	32 stringprep.in_table_c4(c) or \
jpayne@68	33 stringprep.in_table_c5(c) or \
jpayne@68	34 stringprep.in_table_c6(c) or \
jpayne@68	35 stringprep.in_table_c7(c) or \
jpayne@68	36 stringprep.in_table_c8(c) or \
jpayne@68	37 stringprep.in_table_c9(c):
jpayne@68	38 raise UnicodeError("Invalid character %r" % c)
jpayne@68	39
jpayne@68	40 # Check bidi
jpayne@68	41 RandAL = [stringprep.in_table_d1(x) for x in label]
jpayne@68	42 for c in RandAL:
jpayne@68	43 if c:
jpayne@68	44 # There is a RandAL char in the string. Must perform further
jpayne@68	45 # tests:
jpayne@68	46 # 1) The characters in section 5.8 MUST be prohibited.
jpayne@68	47 # This is table C.8, which was already checked
jpayne@68	48 # 2) If a string contains any RandALCat character, the string
jpayne@68	49 # MUST NOT contain any LCat character.
jpayne@68	50 if any(stringprep.in_table_d2(x) for x in label):
jpayne@68	51 raise UnicodeError("Violation of BIDI requirement 2")
jpayne@68	52
jpayne@68	53 # 3) If a string contains any RandALCat character, a
jpayne@68	54 # RandALCat character MUST be the first character of the
jpayne@68	55 # string, and a RandALCat character MUST be the last
jpayne@68	56 # character of the string.
jpayne@68	57 if not RandAL[0] or not RandAL[-1]:
jpayne@68	58 raise UnicodeError("Violation of BIDI requirement 3")
jpayne@68	59
jpayne@68	60 return label
jpayne@68	61
jpayne@68	62 def ToASCII(label):
jpayne@68	63 try:
jpayne@68	64 # Step 1: try ASCII
jpayne@68	65 label = label.encode("ascii")
jpayne@68	66 except UnicodeError:
jpayne@68	67 pass
jpayne@68	68 else:
jpayne@68	69 # Skip to step 3: UseSTD3ASCIIRules is false, so
jpayne@68	70 # Skip to step 8.
jpayne@68	71 if 0 < len(label) < 64:
jpayne@68	72 return label
jpayne@68	73 raise UnicodeError("label empty or too long")
jpayne@68	74
jpayne@68	75 # Step 2: nameprep
jpayne@68	76 label = nameprep(label)
jpayne@68	77
jpayne@68	78 # Step 3: UseSTD3ASCIIRules is false
jpayne@68	79 # Step 4: try ASCII
jpayne@68	80 try:
jpayne@68	81 label = label.encode("ascii")
jpayne@68	82 except UnicodeError:
jpayne@68	83 pass
jpayne@68	84 else:
jpayne@68	85 # Skip to step 8.
jpayne@68	86 if 0 < len(label) < 64:
jpayne@68	87 return label
jpayne@68	88 raise UnicodeError("label empty or too long")
jpayne@68	89
jpayne@68	90 # Step 5: Check ACE prefix
jpayne@68	91 if label.startswith(sace_prefix):
jpayne@68	92 raise UnicodeError("Label starts with ACE prefix")
jpayne@68	93
jpayne@68	94 # Step 6: Encode with PUNYCODE
jpayne@68	95 label = label.encode("punycode")
jpayne@68	96
jpayne@68	97 # Step 7: Prepend ACE prefix
jpayne@68	98 label = ace_prefix + label
jpayne@68	99
jpayne@68	100 # Step 8: Check size
jpayne@68	101 if 0 < len(label) < 64:
jpayne@68	102 return label
jpayne@68	103 raise UnicodeError("label empty or too long")
jpayne@68	104
jpayne@68	105 def ToUnicode(label):
jpayne@68	106 # Step 1: Check for ASCII
jpayne@68	107 if isinstance(label, bytes):
jpayne@68	108 pure_ascii = True
jpayne@68	109 else:
jpayne@68	110 try:
jpayne@68	111 label = label.encode("ascii")
jpayne@68	112 pure_ascii = True
jpayne@68	113 except UnicodeError:
jpayne@68	114 pure_ascii = False
jpayne@68	115 if not pure_ascii:
jpayne@68	116 # Step 2: Perform nameprep
jpayne@68	117 label = nameprep(label)
jpayne@68	118 # It doesn't say this, but apparently, it should be ASCII now
jpayne@68	119 try:
jpayne@68	120 label = label.encode("ascii")
jpayne@68	121 except UnicodeError:
jpayne@68	122 raise UnicodeError("Invalid character in IDN label")
jpayne@68	123 # Step 3: Check for ACE prefix
jpayne@68	124 if not label.startswith(ace_prefix):
jpayne@68	125 return str(label, "ascii")
jpayne@68	126
jpayne@68	127 # Step 4: Remove ACE prefix
jpayne@68	128 label1 = label[len(ace_prefix):]
jpayne@68	129
jpayne@68	130 # Step 5: Decode using PUNYCODE
jpayne@68	131 result = label1.decode("punycode")
jpayne@68	132
jpayne@68	133 # Step 6: Apply ToASCII
jpayne@68	134 label2 = ToASCII(result)
jpayne@68	135
jpayne@68	136 # Step 7: Compare the result of step 6 with the one of step 3
jpayne@68	137 # label2 will already be in lower case.
jpayne@68	138 if str(label, "ascii").lower() != str(label2, "ascii"):
jpayne@68	139 raise UnicodeError("IDNA does not round-trip", label, label2)
jpayne@68	140
jpayne@68	141 # Step 8: return the result of step 5
jpayne@68	142 return result
jpayne@68	143
jpayne@68	144 ### Codec APIs
jpayne@68	145
jpayne@68	146 class Codec(codecs.Codec):
jpayne@68	147 def encode(self, input, errors='strict'):
jpayne@68	148
jpayne@68	149 if errors != 'strict':
jpayne@68	150 # IDNA is quite clear that implementations must be strict
jpayne@68	151 raise UnicodeError("unsupported error handling "+errors)
jpayne@68	152
jpayne@68	153 if not input:
jpayne@68	154 return b'', 0
jpayne@68	155
jpayne@68	156 try:
jpayne@68	157 result = input.encode('ascii')
jpayne@68	158 except UnicodeEncodeError:
jpayne@68	159 pass
jpayne@68	160 else:
jpayne@68	161 # ASCII name: fast path
jpayne@68	162 labels = result.split(b'.')
jpayne@68	163 for label in labels[:-1]:
jpayne@68	164 if not (0 < len(label) < 64):
jpayne@68	165 raise UnicodeError("label empty or too long")
jpayne@68	166 if len(labels[-1]) >= 64:
jpayne@68	167 raise UnicodeError("label too long")
jpayne@68	168 return result, len(input)
jpayne@68	169
jpayne@68	170 result = bytearray()
jpayne@68	171 labels = dots.split(input)
jpayne@68	172 if labels and not labels[-1]:
jpayne@68	173 trailing_dot = b'.'
jpayne@68	174 del labels[-1]
jpayne@68	175 else:
jpayne@68	176 trailing_dot = b''
jpayne@68	177 for label in labels:
jpayne@68	178 if result:
jpayne@68	179 # Join with U+002E
jpayne@68	180 result.extend(b'.')
jpayne@68	181 result.extend(ToASCII(label))
jpayne@68	182 return bytes(result+trailing_dot), len(input)
jpayne@68	183
jpayne@68	184 def decode(self, input, errors='strict'):
jpayne@68	185
jpayne@68	186 if errors != 'strict':
jpayne@68	187 raise UnicodeError("Unsupported error handling "+errors)
jpayne@68	188
jpayne@68	189 if not input:
jpayne@68	190 return "", 0
jpayne@68	191
jpayne@68	192 # IDNA allows decoding to operate on Unicode strings, too.
jpayne@68	193 if not isinstance(input, bytes):
jpayne@68	194 # XXX obviously wrong, see #3232
jpayne@68	195 input = bytes(input)
jpayne@68	196
jpayne@68	197 if ace_prefix not in input:
jpayne@68	198 # Fast path
jpayne@68	199 try:
jpayne@68	200 return input.decode('ascii'), len(input)
jpayne@68	201 except UnicodeDecodeError:
jpayne@68	202 pass
jpayne@68	203
jpayne@68	204 labels = input.split(b".")
jpayne@68	205
jpayne@68	206 if labels and len(labels[-1]) == 0:
jpayne@68	207 trailing_dot = '.'
jpayne@68	208 del labels[-1]
jpayne@68	209 else:
jpayne@68	210 trailing_dot = ''
jpayne@68	211
jpayne@68	212 result = []
jpayne@68	213 for label in labels:
jpayne@68	214 result.append(ToUnicode(label))
jpayne@68	215
jpayne@68	216 return ".".join(result)+trailing_dot, len(input)
jpayne@68	217
jpayne@68	218 class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
jpayne@68	219 def _buffer_encode(self, input, errors, final):
jpayne@68	220 if errors != 'strict':
jpayne@68	221 # IDNA is quite clear that implementations must be strict
jpayne@68	222 raise UnicodeError("unsupported error handling "+errors)
jpayne@68	223
jpayne@68	224 if not input:
jpayne@68	225 return (b'', 0)
jpayne@68	226
jpayne@68	227 labels = dots.split(input)
jpayne@68	228 trailing_dot = b''
jpayne@68	229 if labels:
jpayne@68	230 if not labels[-1]:
jpayne@68	231 trailing_dot = b'.'
jpayne@68	232 del labels[-1]
jpayne@68	233 elif not final:
jpayne@68	234 # Keep potentially unfinished label until the next call
jpayne@68	235 del labels[-1]
jpayne@68	236 if labels:
jpayne@68	237 trailing_dot = b'.'
jpayne@68	238
jpayne@68	239 result = bytearray()
jpayne@68	240 size = 0
jpayne@68	241 for label in labels:
jpayne@68	242 if size:
jpayne@68	243 # Join with U+002E
jpayne@68	244 result.extend(b'.')
jpayne@68	245 size += 1
jpayne@68	246 result.extend(ToASCII(label))
jpayne@68	247 size += len(label)
jpayne@68	248
jpayne@68	249 result += trailing_dot
jpayne@68	250 size += len(trailing_dot)
jpayne@68	251 return (bytes(result), size)
jpayne@68	252
jpayne@68	253 class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
jpayne@68	254 def _buffer_decode(self, input, errors, final):
jpayne@68	255 if errors != 'strict':
jpayne@68	256 raise UnicodeError("Unsupported error handling "+errors)
jpayne@68	257
jpayne@68	258 if not input:
jpayne@68	259 return ("", 0)
jpayne@68	260
jpayne@68	261 # IDNA allows decoding to operate on Unicode strings, too.
jpayne@68	262 if isinstance(input, str):
jpayne@68	263 labels = dots.split(input)
jpayne@68	264 else:
jpayne@68	265 # Must be ASCII string
jpayne@68	266 input = str(input, "ascii")
jpayne@68	267 labels = input.split(".")
jpayne@68	268
jpayne@68	269 trailing_dot = ''
jpayne@68	270 if labels:
jpayne@68	271 if not labels[-1]:
jpayne@68	272 trailing_dot = '.'
jpayne@68	273 del labels[-1]
jpayne@68	274 elif not final:
jpayne@68	275 # Keep potentially unfinished label until the next call
jpayne@68	276 del labels[-1]
jpayne@68	277 if labels:
jpayne@68	278 trailing_dot = '.'
jpayne@68	279
jpayne@68	280 result = []
jpayne@68	281 size = 0
jpayne@68	282 for label in labels:
jpayne@68	283 result.append(ToUnicode(label))
jpayne@68	284 if size:
jpayne@68	285 size += 1
jpayne@68	286 size += len(label)
jpayne@68	287
jpayne@68	288 result = ".".join(result) + trailing_dot
jpayne@68	289 size += len(trailing_dot)
jpayne@68	290 return (result, size)
jpayne@68	291
jpayne@68	292 class StreamWriter(Codec,codecs.StreamWriter):
jpayne@68	293 pass
jpayne@68	294
jpayne@68	295 class StreamReader(Codec,codecs.StreamReader):
jpayne@68	296 pass
jpayne@68	297
jpayne@68	298 ### encodings module API
jpayne@68	299
jpayne@68	300 def getregentry():
jpayne@68	301 return codecs.CodecInfo(
jpayne@68	302 name='idna',
jpayne@68	303 encode=Codec().encode,
jpayne@68	304 decode=Codec().decode,
jpayne@68	305 incrementalencoder=IncrementalEncoder,
jpayne@68	306 incrementaldecoder=IncrementalDecoder,
jpayne@68	307 streamwriter=StreamWriter,
jpayne@68	308 streamreader=StreamReader,
jpayne@68	309 )

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/idna.py @ 68:5028fdace37b