csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/idna.py comparison

comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/idna.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children

comparison

equal deleted inserted replaced

-:0e9998148a16
+:33d812a61356
+# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
+import stringprep, re, codecs
+from unicodedata import ucd_3_2_0 as unicodedata
+# IDNA section 3.1
+dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
+# IDNA section 5
+ace_prefix = b"xn--"
+sace_prefix = "xn--"
+# This assumes query strings, so AllowUnassigned is true
+def nameprep(label):
+# Map
+newlabel = []
+for c in label:
+if stringprep.in_table_b1(c):
+# Map to nothing
+continue
+newlabel.append(stringprep.map_table_b2(c))
+label = "".join(newlabel)
+# Normalize
+label = unicodedata.normalize("NFKC", label)
+# Prohibit
+for c in label:
+if stringprep.in_table_c12(c) or \
+stringprep.in_table_c22(c) or \
+stringprep.in_table_c3(c) or \
+stringprep.in_table_c4(c) or \
+stringprep.in_table_c5(c) or \
+stringprep.in_table_c6(c) or \
+stringprep.in_table_c7(c) or \
+stringprep.in_table_c8(c) or \
+stringprep.in_table_c9(c):
+raise UnicodeError("Invalid character %r" % c)
+# Check bidi
+RandAL = [stringprep.in_table_d1(x) for x in label]
+for c in RandAL:
+if c:
+# There is a RandAL char in the string. Must perform further
+# tests:
+# 1) The characters in section 5.8 MUST be prohibited.
+# This is table C.8, which was already checked
+# 2) If a string contains any RandALCat character, the string
+# MUST NOT contain any LCat character.
+if any(stringprep.in_table_d2(x) for x in label):
+raise UnicodeError("Violation of BIDI requirement 2")
+# 3) If a string contains any RandALCat character, a
+# RandALCat character MUST be the first character of the
+# string, and a RandALCat character MUST be the last
+# character of the string.
+if not RandAL[0] or not RandAL[-1]:
+raise UnicodeError("Violation of BIDI requirement 3")
+return label
+def ToASCII(label):
+try:
+# Step 1: try ASCII
+label = label.encode("ascii")
+except UnicodeError:
+pass
+else:
+# Skip to step 3: UseSTD3ASCIIRules is false, so
+# Skip to step 8.
+if 0 < len(label) < 64:
+return label
+raise UnicodeError("label empty or too long")
+# Step 2: nameprep
+label = nameprep(label)
+# Step 3: UseSTD3ASCIIRules is false
+# Step 4: try ASCII
+try:
+label = label.encode("ascii")
+except UnicodeError:
+pass
+else:
+# Skip to step 8.
+if 0 < len(label) < 64:
+return label
+raise UnicodeError("label empty or too long")
+# Step 5: Check ACE prefix
+if label.startswith(sace_prefix):
+raise UnicodeError("Label starts with ACE prefix")
+# Step 6: Encode with PUNYCODE
+label = label.encode("punycode")
+# Step 7: Prepend ACE prefix
+label = ace_prefix + label
+# Step 8: Check size
+if 0 < len(label) < 64:
+return label
+raise UnicodeError("label empty or too long")
+def ToUnicode(label):
+# Step 1: Check for ASCII
+if isinstance(label, bytes):
+pure_ascii = True
+else:
+try:
+label = label.encode("ascii")
+pure_ascii = True
+except UnicodeError:
+pure_ascii = False
+if not pure_ascii:
+# Step 2: Perform nameprep
+label = nameprep(label)
+# It doesn't say this, but apparently, it should be ASCII now
+try:
+label = label.encode("ascii")
+except UnicodeError:
+raise UnicodeError("Invalid character in IDN label")
+# Step 3: Check for ACE prefix
+if not label.startswith(ace_prefix):
+return str(label, "ascii")
+# Step 4: Remove ACE prefix
+label1 = label[len(ace_prefix):]
+# Step 5: Decode using PUNYCODE
+result = label1.decode("punycode")
+# Step 6: Apply ToASCII
+label2 = ToASCII(result)
+# Step 7: Compare the result of step 6 with the one of step 3
+# label2 will already be in lower case.
+if str(label, "ascii").lower() != str(label2, "ascii"):
+raise UnicodeError("IDNA does not round-trip", label, label2)
+# Step 8: return the result of step 5
+return result
+### Codec APIs
+class Codec(codecs.Codec):
+def encode(self, input, errors='strict'):
+if errors != 'strict':
+# IDNA is quite clear that implementations must be strict
+raise UnicodeError("unsupported error handling "+errors)
+if not input:
+return b'', 0
+try:
+result = input.encode('ascii')
+except UnicodeEncodeError:
+pass
+else:
+# ASCII name: fast path
+labels = result.split(b'.')
+for label in labels[:-1]:
+if not (0 < len(label) < 64):
+raise UnicodeError("label empty or too long")
+if len(labels[-1]) >= 64:
+raise UnicodeError("label too long")
+return result, len(input)
+result = bytearray()
+labels = dots.split(input)
+if labels and not labels[-1]:
+trailing_dot = b'.'
+del labels[-1]
+else:
+trailing_dot = b''
+for label in labels:
+if result:
+# Join with U+002E
+result.extend(b'.')
+result.extend(ToASCII(label))
+return bytes(result+trailing_dot), len(input)
+def decode(self, input, errors='strict'):
+if errors != 'strict':
+raise UnicodeError("Unsupported error handling "+errors)
+if not input:
+return "", 0
+# IDNA allows decoding to operate on Unicode strings, too.
+if not isinstance(input, bytes):
+# XXX obviously wrong, see #3232
+input = bytes(input)
+if ace_prefix not in input:
+# Fast path
+try:
+return input.decode('ascii'), len(input)
+except UnicodeDecodeError:
+pass
+labels = input.split(b".")
+if labels and len(labels[-1]) == 0:
+trailing_dot = '.'
+del labels[-1]
+else:
+trailing_dot = ''
+result = []
+for label in labels:
+result.append(ToUnicode(label))
+return ".".join(result)+trailing_dot, len(input)
+class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
+def _buffer_encode(self, input, errors, final):
+if errors != 'strict':
+# IDNA is quite clear that implementations must be strict
+raise UnicodeError("unsupported error handling "+errors)
+if not input:
+return (b'', 0)
+labels = dots.split(input)
+trailing_dot = b''
+if labels:
+if not labels[-1]:
+trailing_dot = b'.'
+del labels[-1]
+elif not final:
+# Keep potentially unfinished label until the next call
+del labels[-1]
+if labels:
+trailing_dot = b'.'
+result = bytearray()
+size = 0
+for label in labels:
+if size:
+# Join with U+002E
+result.extend(b'.')
+size += 1
+result.extend(ToASCII(label))
+size += len(label)
+result += trailing_dot
+size += len(trailing_dot)
+return (bytes(result), size)
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+def _buffer_decode(self, input, errors, final):
+if errors != 'strict':
+raise UnicodeError("Unsupported error handling "+errors)
+if not input:
+return ("", 0)
+# IDNA allows decoding to operate on Unicode strings, too.
+if isinstance(input, str):
+labels = dots.split(input)
+else:
+# Must be ASCII string
+input = str(input, "ascii")
+labels = input.split(".")
+trailing_dot = ''
+if labels:
+if not labels[-1]:
+trailing_dot = '.'
+del labels[-1]
+elif not final:
+# Keep potentially unfinished label until the next call
+del labels[-1]
+if labels:
+trailing_dot = '.'
+result = []
+size = 0
+for label in labels:
+result.append(ToUnicode(label))
+if size:
+size += 1
+size += len(label)
+result = ".".join(result) + trailing_dot
+size += len(trailing_dot)
+return (result, size)
+class StreamWriter(Codec,codecs.StreamWriter):
+pass
+class StreamReader(Codec,codecs.StreamReader):
+pass
+### encodings module API
+def getregentry():
+return codecs.CodecInfo(
+name='idna',
+encode=Codec().encode,
+decode=Codec().decode,
+incrementalencoder=IncrementalEncoder,
+incrementaldecoder=IncrementalDecoder,
+streamwriter=StreamWriter,
+streamreader=StreamReader,
+)

Mercurial > repos > rliterman > csp2

comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/idna.py @ 69:33d812a61356