Mercurial > repos > rliterman > csp2

diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/idna.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author: jpayne
date: Tue, 18 Mar 2025 17:55:14 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/encodings/idna.py	Tue Mar 18 17:55:14 2025 -0400
@@ -0,0 +1,309 @@
+# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
+
+import stringprep, re, codecs
+from unicodedata import ucd_3_2_0 as unicodedata
+
+# IDNA section 3.1
+dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
+
+# IDNA section 5
+ace_prefix = b"xn--"
+sace_prefix = "xn--"
+
+# This assumes query strings, so AllowUnassigned is true
+def nameprep(label):
+    # Map
+    newlabel = []
+    for c in label:
+        if stringprep.in_table_b1(c):
+            # Map to nothing
+            continue
+        newlabel.append(stringprep.map_table_b2(c))
+    label = "".join(newlabel)
+
+    # Normalize
+    label = unicodedata.normalize("NFKC", label)
+
+    # Prohibit
+    for c in label:
+        if stringprep.in_table_c12(c) or \
+           stringprep.in_table_c22(c) or \
+           stringprep.in_table_c3(c) or \
+           stringprep.in_table_c4(c) or \
+           stringprep.in_table_c5(c) or \
+           stringprep.in_table_c6(c) or \
+           stringprep.in_table_c7(c) or \
+           stringprep.in_table_c8(c) or \
+           stringprep.in_table_c9(c):
+            raise UnicodeError("Invalid character %r" % c)
+
+    # Check bidi
+    RandAL = [stringprep.in_table_d1(x) for x in label]
+    for c in RandAL:
+        if c:
+            # There is a RandAL char in the string. Must perform further
+            # tests:
+            # 1) The characters in section 5.8 MUST be prohibited.
+            # This is table C.8, which was already checked
+            # 2) If a string contains any RandALCat character, the string
+            # MUST NOT contain any LCat character.
+            if any(stringprep.in_table_d2(x) for x in label):
+                raise UnicodeError("Violation of BIDI requirement 2")
+
+            # 3) If a string contains any RandALCat character, a
+            # RandALCat character MUST be the first character of the
+            # string, and a RandALCat character MUST be the last
+            # character of the string.
+            if not RandAL[0] or not RandAL[-1]:
+                raise UnicodeError("Violation of BIDI requirement 3")
+
+    return label
+
+def ToASCII(label):
+    try:
+        # Step 1: try ASCII
+        label = label.encode("ascii")
+    except UnicodeError:
+        pass
+    else:
+        # Skip to step 3: UseSTD3ASCIIRules is false, so
+        # Skip to step 8.
+        if 0 < len(label) < 64:
+            return label
+        raise UnicodeError("label empty or too long")
+
+    # Step 2: nameprep
+    label = nameprep(label)
+
+    # Step 3: UseSTD3ASCIIRules is false
+    # Step 4: try ASCII
+    try:
+        label = label.encode("ascii")
+    except UnicodeError:
+        pass
+    else:
+        # Skip to step 8.
+        if 0 < len(label) < 64:
+            return label
+        raise UnicodeError("label empty or too long")
+
+    # Step 5: Check ACE prefix
+    if label.startswith(sace_prefix):
+        raise UnicodeError("Label starts with ACE prefix")
+
+    # Step 6: Encode with PUNYCODE
+    label = label.encode("punycode")
+
+    # Step 7: Prepend ACE prefix
+    label = ace_prefix + label
+
+    # Step 8: Check size
+    if 0 < len(label) < 64:
+        return label
+    raise UnicodeError("label empty or too long")
+
+def ToUnicode(label):
+    # Step 1: Check for ASCII
+    if isinstance(label, bytes):
+        pure_ascii = True
+    else:
+        try:
+            label = label.encode("ascii")
+            pure_ascii = True
+        except UnicodeError:
+            pure_ascii = False
+    if not pure_ascii:
+        # Step 2: Perform nameprep
+        label = nameprep(label)
+        # It doesn't say this, but apparently, it should be ASCII now
+        try:
+            label = label.encode("ascii")
+        except UnicodeError:
+            raise UnicodeError("Invalid character in IDN label")
+    # Step 3: Check for ACE prefix
+    if not label.startswith(ace_prefix):
+        return str(label, "ascii")
+
+    # Step 4: Remove ACE prefix
+    label1 = label[len(ace_prefix):]
+
+    # Step 5: Decode using PUNYCODE
+    result = label1.decode("punycode")
+
+    # Step 6: Apply ToASCII
+    label2 = ToASCII(result)
+
+    # Step 7: Compare the result of step 6 with the one of step 3
+    # label2 will already be in lower case.
+    if str(label, "ascii").lower() != str(label2, "ascii"):
+        raise UnicodeError("IDNA does not round-trip", label, label2)
+
+    # Step 8: return the result of step 5
+    return result
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+
+        if errors != 'strict':
+            # IDNA is quite clear that implementations must be strict
+            raise UnicodeError("unsupported error handling "+errors)
+
+        if not input:
+            return b'', 0
+
+        try:
+            result = input.encode('ascii')
+        except UnicodeEncodeError:
+            pass
+        else:
+            # ASCII name: fast path
+            labels = result.split(b'.')
+            for label in labels[:-1]:
+                if not (0 < len(label) < 64):
+                    raise UnicodeError("label empty or too long")
+            if len(labels[-1]) >= 64:
+                raise UnicodeError("label too long")
+            return result, len(input)
+
+        result = bytearray()
+        labels = dots.split(input)
+        if labels and not labels[-1]:
+            trailing_dot = b'.'
+            del labels[-1]
+        else:
+            trailing_dot = b''
+        for label in labels:
+            if result:
+                # Join with U+002E
+                result.extend(b'.')
+            result.extend(ToASCII(label))
+        return bytes(result+trailing_dot), len(input)
+
+    def decode(self, input, errors='strict'):
+
+        if errors != 'strict':
+            raise UnicodeError("Unsupported error handling "+errors)
+
+        if not input:
+            return "", 0
+
+        # IDNA allows decoding to operate on Unicode strings, too.
+        if not isinstance(input, bytes):
+            # XXX obviously wrong, see #3232
+            input = bytes(input)
+
+        if ace_prefix not in input:
+            # Fast path
+            try:
+                return input.decode('ascii'), len(input)
+            except UnicodeDecodeError:
+                pass
+
+        labels = input.split(b".")
+
+        if labels and len(labels[-1]) == 0:
+            trailing_dot = '.'
+            del labels[-1]
+        else:
+            trailing_dot = ''
+
+        result = []
+        for label in labels:
+            result.append(ToUnicode(label))
+
+        return ".".join(result)+trailing_dot, len(input)
+
+class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
+    def _buffer_encode(self, input, errors, final):
+        if errors != 'strict':
+            # IDNA is quite clear that implementations must be strict
+            raise UnicodeError("unsupported error handling "+errors)
+
+        if not input:
+            return (b'', 0)
+
+        labels = dots.split(input)
+        trailing_dot = b''
+        if labels:
+            if not labels[-1]:
+                trailing_dot = b'.'
+                del labels[-1]
+            elif not final:
+                # Keep potentially unfinished label until the next call
+                del labels[-1]
+                if labels:
+                    trailing_dot = b'.'
+
+        result = bytearray()
+        size = 0
+        for label in labels:
+            if size:
+                # Join with U+002E
+                result.extend(b'.')
+                size += 1
+            result.extend(ToASCII(label))
+            size += len(label)
+
+        result += trailing_dot
+        size += len(trailing_dot)
+        return (bytes(result), size)
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+    def _buffer_decode(self, input, errors, final):
+        if errors != 'strict':
+            raise UnicodeError("Unsupported error handling "+errors)
+
+        if not input:
+            return ("", 0)
+
+        # IDNA allows decoding to operate on Unicode strings, too.
+        if isinstance(input, str):
+            labels = dots.split(input)
+        else:
+            # Must be ASCII string
+            input = str(input, "ascii")
+            labels = input.split(".")
+
+        trailing_dot = ''
+        if labels:
+            if not labels[-1]:
+                trailing_dot = '.'
+                del labels[-1]
+            elif not final:
+                # Keep potentially unfinished label until the next call
+                del labels[-1]
+                if labels:
+                    trailing_dot = '.'
+
+        result = []
+        size = 0
+        for label in labels:
+            result.append(ToUnicode(label))
+            if size:
+                size += 1
+            size += len(label)
+
+        result = ".".join(result) + trailing_dot
+        size += len(trailing_dot)
+        return (result, size)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='idna',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children