annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/uidna.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 *******************************************************************************
jpayne@69 5 *
jpayne@69 6 * Copyright (C) 2003-2014, International Business Machines
jpayne@69 7 * Corporation and others. All Rights Reserved.
jpayne@69 8 *
jpayne@69 9 *******************************************************************************
jpayne@69 10 * file name: uidna.h
jpayne@69 11 * encoding: UTF-8
jpayne@69 12 * tab size: 8 (not used)
jpayne@69 13 * indentation:4
jpayne@69 14 *
jpayne@69 15 * created on: 2003feb1
jpayne@69 16 * created by: Ram Viswanadha
jpayne@69 17 */
jpayne@69 18
jpayne@69 19 #ifndef __UIDNA_H__
jpayne@69 20 #define __UIDNA_H__
jpayne@69 21
jpayne@69 22 #include "unicode/utypes.h"
jpayne@69 23
jpayne@69 24 #if !UCONFIG_NO_IDNA
jpayne@69 25
jpayne@69 26 #include "unicode/localpointer.h"
jpayne@69 27 #include "unicode/parseerr.h"
jpayne@69 28
jpayne@69 29 /**
jpayne@69 30 * \file
jpayne@69 31 * \brief C API: Internationalizing Domain Names in Applications (IDNA)
jpayne@69 32 *
jpayne@69 33 * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
jpayne@69 34 *
jpayne@69 35 * The C API functions which do take a UIDNA * service object pointer
jpayne@69 36 * implement UTS #46 and IDNA2008.
jpayne@69 37 *
jpayne@69 38 * IDNA2003 is obsolete.
jpayne@69 39 * The C API functions which do not take a service object pointer
jpayne@69 40 * implement IDNA2003. They are all deprecated.
jpayne@69 41 */
jpayne@69 42
jpayne@69 43 /*
jpayne@69 44 * IDNA option bit set values.
jpayne@69 45 */
jpayne@69 46 enum {
jpayne@69 47 /**
jpayne@69 48 * Default options value: None of the other options are set.
jpayne@69 49 * For use in static worker and factory methods.
jpayne@69 50 * @stable ICU 2.6
jpayne@69 51 */
jpayne@69 52 UIDNA_DEFAULT=0,
jpayne@69 53 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 54 /**
jpayne@69 55 * Option to allow unassigned code points in domain names and labels.
jpayne@69 56 * For use in static worker and factory methods.
jpayne@69 57 * <p>This option is ignored by the UTS46 implementation.
jpayne@69 58 * (UTS #46 disallows unassigned code points.)
jpayne@69 59 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
jpayne@69 60 */
jpayne@69 61 UIDNA_ALLOW_UNASSIGNED=1,
jpayne@69 62 #endif /* U_HIDE_DEPRECATED_API */
jpayne@69 63 /**
jpayne@69 64 * Option to check whether the input conforms to the STD3 ASCII rules,
jpayne@69 65 * for example the restriction of labels to LDH characters
jpayne@69 66 * (ASCII Letters, Digits and Hyphen-Minus).
jpayne@69 67 * For use in static worker and factory methods.
jpayne@69 68 * @stable ICU 2.6
jpayne@69 69 */
jpayne@69 70 UIDNA_USE_STD3_RULES=2,
jpayne@69 71 /**
jpayne@69 72 * IDNA option to check for whether the input conforms to the BiDi rules.
jpayne@69 73 * For use in static worker and factory methods.
jpayne@69 74 * <p>This option is ignored by the IDNA2003 implementation.
jpayne@69 75 * (IDNA2003 always performs a BiDi check.)
jpayne@69 76 * @stable ICU 4.6
jpayne@69 77 */
jpayne@69 78 UIDNA_CHECK_BIDI=4,
jpayne@69 79 /**
jpayne@69 80 * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
jpayne@69 81 * For use in static worker and factory methods.
jpayne@69 82 * <p>This option is ignored by the IDNA2003 implementation.
jpayne@69 83 * (The CONTEXTJ check is new in IDNA2008.)
jpayne@69 84 * @stable ICU 4.6
jpayne@69 85 */
jpayne@69 86 UIDNA_CHECK_CONTEXTJ=8,
jpayne@69 87 /**
jpayne@69 88 * IDNA option for nontransitional processing in ToASCII().
jpayne@69 89 * For use in static worker and factory methods.
jpayne@69 90 * <p>By default, ToASCII() uses transitional processing.
jpayne@69 91 * <p>This option is ignored by the IDNA2003 implementation.
jpayne@69 92 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
jpayne@69 93 * @stable ICU 4.6
jpayne@69 94 */
jpayne@69 95 UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
jpayne@69 96 /**
jpayne@69 97 * IDNA option for nontransitional processing in ToUnicode().
jpayne@69 98 * For use in static worker and factory methods.
jpayne@69 99 * <p>By default, ToUnicode() uses transitional processing.
jpayne@69 100 * <p>This option is ignored by the IDNA2003 implementation.
jpayne@69 101 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
jpayne@69 102 * @stable ICU 4.6
jpayne@69 103 */
jpayne@69 104 UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
jpayne@69 105 /**
jpayne@69 106 * IDNA option to check for whether the input conforms to the CONTEXTO rules.
jpayne@69 107 * For use in static worker and factory methods.
jpayne@69 108 * <p>This option is ignored by the IDNA2003 implementation.
jpayne@69 109 * (The CONTEXTO check is new in IDNA2008.)
jpayne@69 110 * <p>This is for use by registries for IDNA2008 conformance.
jpayne@69 111 * UTS #46 does not require the CONTEXTO check.
jpayne@69 112 * @stable ICU 49
jpayne@69 113 */
jpayne@69 114 UIDNA_CHECK_CONTEXTO=0x40
jpayne@69 115 };
jpayne@69 116
jpayne@69 117 /**
jpayne@69 118 * Opaque C service object type for the new IDNA API.
jpayne@69 119 * @stable ICU 4.6
jpayne@69 120 */
jpayne@69 121 struct UIDNA;
jpayne@69 122 typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
jpayne@69 123
jpayne@69 124 /**
jpayne@69 125 * Returns a UIDNA instance which implements UTS #46.
jpayne@69 126 * Returns an unmodifiable instance, owned by the caller.
jpayne@69 127 * Cache it for multiple operations, and uidna_close() it when done.
jpayne@69 128 * The instance is thread-safe, that is, it can be used concurrently.
jpayne@69 129 *
jpayne@69 130 * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
jpayne@69 131 *
jpayne@69 132 * @param options Bit set to modify the processing and error checking.
jpayne@69 133 * See option bit set values in uidna.h.
jpayne@69 134 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 135 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 136 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 137 * function chaining. (See User Guide for details.)
jpayne@69 138 * @return the UTS #46 UIDNA instance, if successful
jpayne@69 139 * @stable ICU 4.6
jpayne@69 140 */
jpayne@69 141 U_STABLE UIDNA * U_EXPORT2
jpayne@69 142 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
jpayne@69 143
jpayne@69 144 /**
jpayne@69 145 * Closes a UIDNA instance.
jpayne@69 146 * @param idna UIDNA instance to be closed
jpayne@69 147 * @stable ICU 4.6
jpayne@69 148 */
jpayne@69 149 U_STABLE void U_EXPORT2
jpayne@69 150 uidna_close(UIDNA *idna);
jpayne@69 151
jpayne@69 152 #if U_SHOW_CPLUSPLUS_API
jpayne@69 153
jpayne@69 154 U_NAMESPACE_BEGIN
jpayne@69 155
jpayne@69 156 /**
jpayne@69 157 * \class LocalUIDNAPointer
jpayne@69 158 * "Smart pointer" class, closes a UIDNA via uidna_close().
jpayne@69 159 * For most methods see the LocalPointerBase base class.
jpayne@69 160 *
jpayne@69 161 * @see LocalPointerBase
jpayne@69 162 * @see LocalPointer
jpayne@69 163 * @stable ICU 4.6
jpayne@69 164 */
jpayne@69 165 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
jpayne@69 166
jpayne@69 167 U_NAMESPACE_END
jpayne@69 168
jpayne@69 169 #endif
jpayne@69 170
jpayne@69 171 /**
jpayne@69 172 * Output container for IDNA processing errors.
jpayne@69 173 * Initialize with UIDNA_INFO_INITIALIZER:
jpayne@69 174 * \code
jpayne@69 175 * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
jpayne@69 176 * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
jpayne@69 177 * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
jpayne@69 178 * \endcode
jpayne@69 179 * @stable ICU 4.6
jpayne@69 180 */
jpayne@69 181 typedef struct UIDNAInfo {
jpayne@69 182 /** sizeof(UIDNAInfo) @stable ICU 4.6 */
jpayne@69 183 int16_t size;
jpayne@69 184 /**
jpayne@69 185 * Set to TRUE if transitional and nontransitional processing produce different results.
jpayne@69 186 * For details see C++ IDNAInfo::isTransitionalDifferent().
jpayne@69 187 * @stable ICU 4.6
jpayne@69 188 */
jpayne@69 189 UBool isTransitionalDifferent;
jpayne@69 190 UBool reservedB3; /**< Reserved field, do not use. @internal */
jpayne@69 191 /**
jpayne@69 192 * Bit set indicating IDNA processing errors. 0 if no errors.
jpayne@69 193 * See UIDNA_ERROR_... constants.
jpayne@69 194 * @stable ICU 4.6
jpayne@69 195 */
jpayne@69 196 uint32_t errors;
jpayne@69 197 int32_t reservedI2; /**< Reserved field, do not use. @internal */
jpayne@69 198 int32_t reservedI3; /**< Reserved field, do not use. @internal */
jpayne@69 199 } UIDNAInfo;
jpayne@69 200
jpayne@69 201 /**
jpayne@69 202 * Static initializer for a UIDNAInfo struct.
jpayne@69 203 * @stable ICU 4.6
jpayne@69 204 */
jpayne@69 205 #define UIDNA_INFO_INITIALIZER { \
jpayne@69 206 (int16_t)sizeof(UIDNAInfo), \
jpayne@69 207 FALSE, FALSE, \
jpayne@69 208 0, 0, 0 }
jpayne@69 209
jpayne@69 210 /**
jpayne@69 211 * Converts a single domain name label into its ASCII form for DNS lookup.
jpayne@69 212 * If any processing step fails, then pInfo->errors will be non-zero and
jpayne@69 213 * the result might not be an ASCII string.
jpayne@69 214 * The label might be modified according to the types of errors.
jpayne@69 215 * Labels with severe errors will be left in (or turned into) their Unicode form.
jpayne@69 216 *
jpayne@69 217 * The UErrorCode indicates an error only in exceptional cases,
jpayne@69 218 * such as a U_MEMORY_ALLOCATION_ERROR.
jpayne@69 219 *
jpayne@69 220 * @param idna UIDNA instance
jpayne@69 221 * @param label Input domain name label
jpayne@69 222 * @param length Label length, or -1 if NUL-terminated
jpayne@69 223 * @param dest Destination string buffer
jpayne@69 224 * @param capacity Destination buffer capacity
jpayne@69 225 * @param pInfo Output container of IDNA processing details.
jpayne@69 226 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 227 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 228 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 229 * function chaining. (See User Guide for details.)
jpayne@69 230 * @return destination string length
jpayne@69 231 * @stable ICU 4.6
jpayne@69 232 */
jpayne@69 233 U_STABLE int32_t U_EXPORT2
jpayne@69 234 uidna_labelToASCII(const UIDNA *idna,
jpayne@69 235 const UChar *label, int32_t length,
jpayne@69 236 UChar *dest, int32_t capacity,
jpayne@69 237 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
jpayne@69 238
jpayne@69 239 /**
jpayne@69 240 * Converts a single domain name label into its Unicode form for human-readable display.
jpayne@69 241 * If any processing step fails, then pInfo->errors will be non-zero.
jpayne@69 242 * The label might be modified according to the types of errors.
jpayne@69 243 *
jpayne@69 244 * The UErrorCode indicates an error only in exceptional cases,
jpayne@69 245 * such as a U_MEMORY_ALLOCATION_ERROR.
jpayne@69 246 *
jpayne@69 247 * @param idna UIDNA instance
jpayne@69 248 * @param label Input domain name label
jpayne@69 249 * @param length Label length, or -1 if NUL-terminated
jpayne@69 250 * @param dest Destination string buffer
jpayne@69 251 * @param capacity Destination buffer capacity
jpayne@69 252 * @param pInfo Output container of IDNA processing details.
jpayne@69 253 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 254 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 255 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 256 * function chaining. (See User Guide for details.)
jpayne@69 257 * @return destination string length
jpayne@69 258 * @stable ICU 4.6
jpayne@69 259 */
jpayne@69 260 U_STABLE int32_t U_EXPORT2
jpayne@69 261 uidna_labelToUnicode(const UIDNA *idna,
jpayne@69 262 const UChar *label, int32_t length,
jpayne@69 263 UChar *dest, int32_t capacity,
jpayne@69 264 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
jpayne@69 265
jpayne@69 266 /**
jpayne@69 267 * Converts a whole domain name into its ASCII form for DNS lookup.
jpayne@69 268 * If any processing step fails, then pInfo->errors will be non-zero and
jpayne@69 269 * the result might not be an ASCII string.
jpayne@69 270 * The domain name might be modified according to the types of errors.
jpayne@69 271 * Labels with severe errors will be left in (or turned into) their Unicode form.
jpayne@69 272 *
jpayne@69 273 * The UErrorCode indicates an error only in exceptional cases,
jpayne@69 274 * such as a U_MEMORY_ALLOCATION_ERROR.
jpayne@69 275 *
jpayne@69 276 * @param idna UIDNA instance
jpayne@69 277 * @param name Input domain name
jpayne@69 278 * @param length Domain name length, or -1 if NUL-terminated
jpayne@69 279 * @param dest Destination string buffer
jpayne@69 280 * @param capacity Destination buffer capacity
jpayne@69 281 * @param pInfo Output container of IDNA processing details.
jpayne@69 282 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 283 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 284 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 285 * function chaining. (See User Guide for details.)
jpayne@69 286 * @return destination string length
jpayne@69 287 * @stable ICU 4.6
jpayne@69 288 */
jpayne@69 289 U_STABLE int32_t U_EXPORT2
jpayne@69 290 uidna_nameToASCII(const UIDNA *idna,
jpayne@69 291 const UChar *name, int32_t length,
jpayne@69 292 UChar *dest, int32_t capacity,
jpayne@69 293 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
jpayne@69 294
jpayne@69 295 /**
jpayne@69 296 * Converts a whole domain name into its Unicode form for human-readable display.
jpayne@69 297 * If any processing step fails, then pInfo->errors will be non-zero.
jpayne@69 298 * The domain name might be modified according to the types of errors.
jpayne@69 299 *
jpayne@69 300 * The UErrorCode indicates an error only in exceptional cases,
jpayne@69 301 * such as a U_MEMORY_ALLOCATION_ERROR.
jpayne@69 302 *
jpayne@69 303 * @param idna UIDNA instance
jpayne@69 304 * @param name Input domain name
jpayne@69 305 * @param length Domain name length, or -1 if NUL-terminated
jpayne@69 306 * @param dest Destination string buffer
jpayne@69 307 * @param capacity Destination buffer capacity
jpayne@69 308 * @param pInfo Output container of IDNA processing details.
jpayne@69 309 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 310 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 311 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 312 * function chaining. (See User Guide for details.)
jpayne@69 313 * @return destination string length
jpayne@69 314 * @stable ICU 4.6
jpayne@69 315 */
jpayne@69 316 U_STABLE int32_t U_EXPORT2
jpayne@69 317 uidna_nameToUnicode(const UIDNA *idna,
jpayne@69 318 const UChar *name, int32_t length,
jpayne@69 319 UChar *dest, int32_t capacity,
jpayne@69 320 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
jpayne@69 321
jpayne@69 322 /* UTF-8 versions of the processing methods --------------------------------- */
jpayne@69 323
jpayne@69 324 /**
jpayne@69 325 * Converts a single domain name label into its ASCII form for DNS lookup.
jpayne@69 326 * UTF-8 version of uidna_labelToASCII(), same behavior.
jpayne@69 327 *
jpayne@69 328 * @param idna UIDNA instance
jpayne@69 329 * @param label Input domain name label
jpayne@69 330 * @param length Label length, or -1 if NUL-terminated
jpayne@69 331 * @param dest Destination string buffer
jpayne@69 332 * @param capacity Destination buffer capacity
jpayne@69 333 * @param pInfo Output container of IDNA processing details.
jpayne@69 334 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 335 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 336 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 337 * function chaining. (See User Guide for details.)
jpayne@69 338 * @return destination string length
jpayne@69 339 * @stable ICU 4.6
jpayne@69 340 */
jpayne@69 341 U_STABLE int32_t U_EXPORT2
jpayne@69 342 uidna_labelToASCII_UTF8(const UIDNA *idna,
jpayne@69 343 const char *label, int32_t length,
jpayne@69 344 char *dest, int32_t capacity,
jpayne@69 345 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
jpayne@69 346
jpayne@69 347 /**
jpayne@69 348 * Converts a single domain name label into its Unicode form for human-readable display.
jpayne@69 349 * UTF-8 version of uidna_labelToUnicode(), same behavior.
jpayne@69 350 *
jpayne@69 351 * @param idna UIDNA instance
jpayne@69 352 * @param label Input domain name label
jpayne@69 353 * @param length Label length, or -1 if NUL-terminated
jpayne@69 354 * @param dest Destination string buffer
jpayne@69 355 * @param capacity Destination buffer capacity
jpayne@69 356 * @param pInfo Output container of IDNA processing details.
jpayne@69 357 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 358 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 359 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 360 * function chaining. (See User Guide for details.)
jpayne@69 361 * @return destination string length
jpayne@69 362 * @stable ICU 4.6
jpayne@69 363 */
jpayne@69 364 U_STABLE int32_t U_EXPORT2
jpayne@69 365 uidna_labelToUnicodeUTF8(const UIDNA *idna,
jpayne@69 366 const char *label, int32_t length,
jpayne@69 367 char *dest, int32_t capacity,
jpayne@69 368 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
jpayne@69 369
jpayne@69 370 /**
jpayne@69 371 * Converts a whole domain name into its ASCII form for DNS lookup.
jpayne@69 372 * UTF-8 version of uidna_nameToASCII(), same behavior.
jpayne@69 373 *
jpayne@69 374 * @param idna UIDNA instance
jpayne@69 375 * @param name Input domain name
jpayne@69 376 * @param length Domain name length, or -1 if NUL-terminated
jpayne@69 377 * @param dest Destination string buffer
jpayne@69 378 * @param capacity Destination buffer capacity
jpayne@69 379 * @param pInfo Output container of IDNA processing details.
jpayne@69 380 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 381 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 382 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 383 * function chaining. (See User Guide for details.)
jpayne@69 384 * @return destination string length
jpayne@69 385 * @stable ICU 4.6
jpayne@69 386 */
jpayne@69 387 U_STABLE int32_t U_EXPORT2
jpayne@69 388 uidna_nameToASCII_UTF8(const UIDNA *idna,
jpayne@69 389 const char *name, int32_t length,
jpayne@69 390 char *dest, int32_t capacity,
jpayne@69 391 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
jpayne@69 392
jpayne@69 393 /**
jpayne@69 394 * Converts a whole domain name into its Unicode form for human-readable display.
jpayne@69 395 * UTF-8 version of uidna_nameToUnicode(), same behavior.
jpayne@69 396 *
jpayne@69 397 * @param idna UIDNA instance
jpayne@69 398 * @param name Input domain name
jpayne@69 399 * @param length Domain name length, or -1 if NUL-terminated
jpayne@69 400 * @param dest Destination string buffer
jpayne@69 401 * @param capacity Destination buffer capacity
jpayne@69 402 * @param pInfo Output container of IDNA processing details.
jpayne@69 403 * @param pErrorCode Standard ICU error code. Its input value must
jpayne@69 404 * pass the U_SUCCESS() test, or else the function returns
jpayne@69 405 * immediately. Check for U_FAILURE() on output or use with
jpayne@69 406 * function chaining. (See User Guide for details.)
jpayne@69 407 * @return destination string length
jpayne@69 408 * @stable ICU 4.6
jpayne@69 409 */
jpayne@69 410 U_STABLE int32_t U_EXPORT2
jpayne@69 411 uidna_nameToUnicodeUTF8(const UIDNA *idna,
jpayne@69 412 const char *name, int32_t length,
jpayne@69 413 char *dest, int32_t capacity,
jpayne@69 414 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
jpayne@69 415
jpayne@69 416 /*
jpayne@69 417 * IDNA error bit set values.
jpayne@69 418 * When a domain name or label fails a processing step or does not meet the
jpayne@69 419 * validity criteria, then one or more of these error bits are set.
jpayne@69 420 */
jpayne@69 421 enum {
jpayne@69 422 /**
jpayne@69 423 * A non-final domain name label (or the whole domain name) is empty.
jpayne@69 424 * @stable ICU 4.6
jpayne@69 425 */
jpayne@69 426 UIDNA_ERROR_EMPTY_LABEL=1,
jpayne@69 427 /**
jpayne@69 428 * A domain name label is longer than 63 bytes.
jpayne@69 429 * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
jpayne@69 430 * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
jpayne@69 431 * @stable ICU 4.6
jpayne@69 432 */
jpayne@69 433 UIDNA_ERROR_LABEL_TOO_LONG=2,
jpayne@69 434 /**
jpayne@69 435 * A domain name is longer than 255 bytes in its storage form.
jpayne@69 436 * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
jpayne@69 437 * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
jpayne@69 438 * @stable ICU 4.6
jpayne@69 439 */
jpayne@69 440 UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
jpayne@69 441 /**
jpayne@69 442 * A label starts with a hyphen-minus ('-').
jpayne@69 443 * @stable ICU 4.6
jpayne@69 444 */
jpayne@69 445 UIDNA_ERROR_LEADING_HYPHEN=8,
jpayne@69 446 /**
jpayne@69 447 * A label ends with a hyphen-minus ('-').
jpayne@69 448 * @stable ICU 4.6
jpayne@69 449 */
jpayne@69 450 UIDNA_ERROR_TRAILING_HYPHEN=0x10,
jpayne@69 451 /**
jpayne@69 452 * A label contains hyphen-minus ('-') in the third and fourth positions.
jpayne@69 453 * @stable ICU 4.6
jpayne@69 454 */
jpayne@69 455 UIDNA_ERROR_HYPHEN_3_4=0x20,
jpayne@69 456 /**
jpayne@69 457 * A label starts with a combining mark.
jpayne@69 458 * @stable ICU 4.6
jpayne@69 459 */
jpayne@69 460 UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
jpayne@69 461 /**
jpayne@69 462 * A label or domain name contains disallowed characters.
jpayne@69 463 * @stable ICU 4.6
jpayne@69 464 */
jpayne@69 465 UIDNA_ERROR_DISALLOWED=0x80,
jpayne@69 466 /**
jpayne@69 467 * A label starts with "xn--" but does not contain valid Punycode.
jpayne@69 468 * That is, an xn-- label failed Punycode decoding.
jpayne@69 469 * @stable ICU 4.6
jpayne@69 470 */
jpayne@69 471 UIDNA_ERROR_PUNYCODE=0x100,
jpayne@69 472 /**
jpayne@69 473 * A label contains a dot=full stop.
jpayne@69 474 * This can occur in an input string for a single-label function.
jpayne@69 475 * @stable ICU 4.6
jpayne@69 476 */
jpayne@69 477 UIDNA_ERROR_LABEL_HAS_DOT=0x200,
jpayne@69 478 /**
jpayne@69 479 * An ACE label does not contain a valid label string.
jpayne@69 480 * The label was successfully ACE (Punycode) decoded but the resulting
jpayne@69 481 * string had severe validation errors. For example,
jpayne@69 482 * it might contain characters that are not allowed in ACE labels,
jpayne@69 483 * or it might not be normalized.
jpayne@69 484 * @stable ICU 4.6
jpayne@69 485 */
jpayne@69 486 UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
jpayne@69 487 /**
jpayne@69 488 * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
jpayne@69 489 * @stable ICU 4.6
jpayne@69 490 */
jpayne@69 491 UIDNA_ERROR_BIDI=0x800,
jpayne@69 492 /**
jpayne@69 493 * A label does not meet the IDNA CONTEXTJ requirements.
jpayne@69 494 * @stable ICU 4.6
jpayne@69 495 */
jpayne@69 496 UIDNA_ERROR_CONTEXTJ=0x1000,
jpayne@69 497 /**
jpayne@69 498 * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
jpayne@69 499 * Some punctuation characters "Would otherwise have been DISALLOWED"
jpayne@69 500 * but are allowed in certain contexts. (RFC 5892)
jpayne@69 501 * @stable ICU 49
jpayne@69 502 */
jpayne@69 503 UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
jpayne@69 504 /**
jpayne@69 505 * A label does not meet the IDNA CONTEXTO requirements for digits.
jpayne@69 506 * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
jpayne@69 507 * @stable ICU 49
jpayne@69 508 */
jpayne@69 509 UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
jpayne@69 510 };
jpayne@69 511
jpayne@69 512 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 513
jpayne@69 514 /* IDNA2003 API ------------------------------------------------------------- */
jpayne@69 515
jpayne@69 516 /**
jpayne@69 517 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
jpayne@69 518 * This operation is done on <b>single labels</b> before sending it to something that expects
jpayne@69 519 * ASCII names. A label is an individual part of a domain name. Labels are usually
jpayne@69 520 * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
jpayne@69 521 *
jpayne@69 522 * IDNA2003 API Overview:
jpayne@69 523 *
jpayne@69 524 * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
jpayne@69 525 * (http://www.ietf.org/rfc/rfc3490.txt).
jpayne@69 526 * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
jpayne@69 527 * containing non-ASCII code points are processed by the
jpayne@69 528 * ToASCII operation before passing it to resolver libraries. Domain names
jpayne@69 529 * that are obtained from resolver libraries are processed by the
jpayne@69 530 * ToUnicode operation before displaying the domain name to the user.
jpayne@69 531 * IDNA requires that implementations process input strings with Nameprep
jpayne@69 532 * (http://www.ietf.org/rfc/rfc3491.txt),
jpayne@69 533 * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
jpayne@69 534 * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
jpayne@69 535 * Implementations of IDNA MUST fully implement Nameprep and Punycode;
jpayne@69 536 * neither Nameprep nor Punycode are optional.
jpayne@69 537 * The input and output of ToASCII and ToUnicode operations are Unicode
jpayne@69 538 * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
jpayne@69 539 * multiple times to an input string will yield the same result as applying the operation
jpayne@69 540 * once.
jpayne@69 541 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
jpayne@69 542 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
jpayne@69 543 *
jpayne@69 544 * @param src Input UChar array containing label in Unicode.
jpayne@69 545 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
jpayne@69 546 * @param dest Output UChar array with ASCII (ACE encoded) label.
jpayne@69 547 * @param destCapacity Size of dest.
jpayne@69 548 * @param options A bit set of options:
jpayne@69 549 *
jpayne@69 550 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
jpayne@69 551 * and do not use STD3 ASCII rules
jpayne@69 552 * If unassigned code points are found the operation fails with
jpayne@69 553 * U_UNASSIGNED_ERROR error code.
jpayne@69 554 *
jpayne@69 555 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
jpayne@69 556 * If this option is set, the unassigned code points are in the input
jpayne@69 557 * are treated as normal Unicode code points.
jpayne@69 558 *
jpayne@69 559 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
jpayne@69 560 * If this option is set and the input does not satisfy STD3 rules,
jpayne@69 561 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
jpayne@69 562 *
jpayne@69 563 * @param parseError Pointer to UParseError struct to receive information on position
jpayne@69 564 * of error if an error is encountered. Can be NULL.
jpayne@69 565 * @param status ICU in/out error code parameter.
jpayne@69 566 * U_INVALID_CHAR_FOUND if src contains
jpayne@69 567 * unmatched single surrogates.
jpayne@69 568 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
jpayne@69 569 * too many code points.
jpayne@69 570 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
jpayne@69 571 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 572 * in which case it will be greater than destCapacity.
jpayne@69 573 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
jpayne@69 574 */
jpayne@69 575 U_DEPRECATED int32_t U_EXPORT2
jpayne@69 576 uidna_toASCII(const UChar* src, int32_t srcLength,
jpayne@69 577 UChar* dest, int32_t destCapacity,
jpayne@69 578 int32_t options,
jpayne@69 579 UParseError* parseError,
jpayne@69 580 UErrorCode* status);
jpayne@69 581
jpayne@69 582
jpayne@69 583 /**
jpayne@69 584 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
jpayne@69 585 * This operation is done on <b>single labels</b> before sending it to something that expects
jpayne@69 586 * Unicode names. A label is an individual part of a domain name. Labels are usually
jpayne@69 587 * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
jpayne@69 588 *
jpayne@69 589 * @param src Input UChar array containing ASCII (ACE encoded) label.
jpayne@69 590 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
jpayne@69 591 * @param dest Output Converted UChar array containing Unicode equivalent of label.
jpayne@69 592 * @param destCapacity Size of dest.
jpayne@69 593 * @param options A bit set of options:
jpayne@69 594 *
jpayne@69 595 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
jpayne@69 596 * and do not use STD3 ASCII rules
jpayne@69 597 * If unassigned code points are found the operation fails with
jpayne@69 598 * U_UNASSIGNED_ERROR error code.
jpayne@69 599 *
jpayne@69 600 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
jpayne@69 601 * If this option is set, the unassigned code points are in the input
jpayne@69 602 * are treated as normal Unicode code points. <b> Note: </b> This option is
jpayne@69 603 * required on toUnicode operation because the RFC mandates
jpayne@69 604 * verification of decoded ACE input by applying toASCII and comparing
jpayne@69 605 * its output with source
jpayne@69 606 *
jpayne@69 607 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
jpayne@69 608 * If this option is set and the input does not satisfy STD3 rules,
jpayne@69 609 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
jpayne@69 610 *
jpayne@69 611 * @param parseError Pointer to UParseError struct to receive information on position
jpayne@69 612 * of error if an error is encountered. Can be NULL.
jpayne@69 613 * @param status ICU in/out error code parameter.
jpayne@69 614 * U_INVALID_CHAR_FOUND if src contains
jpayne@69 615 * unmatched single surrogates.
jpayne@69 616 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
jpayne@69 617 * too many code points.
jpayne@69 618 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
jpayne@69 619 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 620 * in which case it will be greater than destCapacity.
jpayne@69 621 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
jpayne@69 622 */
jpayne@69 623 U_DEPRECATED int32_t U_EXPORT2
jpayne@69 624 uidna_toUnicode(const UChar* src, int32_t srcLength,
jpayne@69 625 UChar* dest, int32_t destCapacity,
jpayne@69 626 int32_t options,
jpayne@69 627 UParseError* parseError,
jpayne@69 628 UErrorCode* status);
jpayne@69 629
jpayne@69 630
jpayne@69 631 /**
jpayne@69 632 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
jpayne@69 633 * This operation is done on complete domain names, e.g: "www.example.com".
jpayne@69 634 * It is important to note that this operation can fail. If it fails, then the input
jpayne@69 635 * domain name cannot be used as an Internationalized Domain Name and the application
jpayne@69 636 * should have methods defined to deal with the failure.
jpayne@69 637 *
jpayne@69 638 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
jpayne@69 639 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
jpayne@69 640 * and then convert. This function does not offer that level of granularity. The options once
jpayne@69 641 * set will apply to all labels in the domain name
jpayne@69 642 *
jpayne@69 643 * @param src Input UChar array containing IDN in Unicode.
jpayne@69 644 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
jpayne@69 645 * @param dest Output UChar array with ASCII (ACE encoded) IDN.
jpayne@69 646 * @param destCapacity Size of dest.
jpayne@69 647 * @param options A bit set of options:
jpayne@69 648 *
jpayne@69 649 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
jpayne@69 650 * and do not use STD3 ASCII rules
jpayne@69 651 * If unassigned code points are found the operation fails with
jpayne@69 652 * U_UNASSIGNED_CODE_POINT_FOUND error code.
jpayne@69 653 *
jpayne@69 654 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
jpayne@69 655 * If this option is set, the unassigned code points are in the input
jpayne@69 656 * are treated as normal Unicode code points.
jpayne@69 657 *
jpayne@69 658 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
jpayne@69 659 * If this option is set and the input does not satisfy STD3 rules,
jpayne@69 660 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
jpayne@69 661 *
jpayne@69 662 * @param parseError Pointer to UParseError struct to receive information on position
jpayne@69 663 * of error if an error is encountered. Can be NULL.
jpayne@69 664 * @param status ICU in/out error code parameter.
jpayne@69 665 * U_INVALID_CHAR_FOUND if src contains
jpayne@69 666 * unmatched single surrogates.
jpayne@69 667 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
jpayne@69 668 * too many code points.
jpayne@69 669 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
jpayne@69 670 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 671 * in which case it will be greater than destCapacity.
jpayne@69 672 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
jpayne@69 673 */
jpayne@69 674 U_DEPRECATED int32_t U_EXPORT2
jpayne@69 675 uidna_IDNToASCII( const UChar* src, int32_t srcLength,
jpayne@69 676 UChar* dest, int32_t destCapacity,
jpayne@69 677 int32_t options,
jpayne@69 678 UParseError* parseError,
jpayne@69 679 UErrorCode* status);
jpayne@69 680
jpayne@69 681 /**
jpayne@69 682 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
jpayne@69 683 * This operation is done on complete domain names, e.g: "www.example.com".
jpayne@69 684 *
jpayne@69 685 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
jpayne@69 686 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
jpayne@69 687 * and then convert. This function does not offer that level of granularity. The options once
jpayne@69 688 * set will apply to all labels in the domain name
jpayne@69 689 *
jpayne@69 690 * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
jpayne@69 691 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
jpayne@69 692 * @param dest Output UChar array containing Unicode equivalent of source IDN.
jpayne@69 693 * @param destCapacity Size of dest.
jpayne@69 694 * @param options A bit set of options:
jpayne@69 695 *
jpayne@69 696 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
jpayne@69 697 * and do not use STD3 ASCII rules
jpayne@69 698 * If unassigned code points are found the operation fails with
jpayne@69 699 * U_UNASSIGNED_CODE_POINT_FOUND error code.
jpayne@69 700 *
jpayne@69 701 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
jpayne@69 702 * If this option is set, the unassigned code points are in the input
jpayne@69 703 * are treated as normal Unicode code points.
jpayne@69 704 *
jpayne@69 705 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
jpayne@69 706 * If this option is set and the input does not satisfy STD3 rules,
jpayne@69 707 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
jpayne@69 708 *
jpayne@69 709 * @param parseError Pointer to UParseError struct to receive information on position
jpayne@69 710 * of error if an error is encountered. Can be NULL.
jpayne@69 711 * @param status ICU in/out error code parameter.
jpayne@69 712 * U_INVALID_CHAR_FOUND if src contains
jpayne@69 713 * unmatched single surrogates.
jpayne@69 714 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
jpayne@69 715 * too many code points.
jpayne@69 716 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
jpayne@69 717 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 718 * in which case it will be greater than destCapacity.
jpayne@69 719 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
jpayne@69 720 */
jpayne@69 721 U_DEPRECATED int32_t U_EXPORT2
jpayne@69 722 uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
jpayne@69 723 UChar* dest, int32_t destCapacity,
jpayne@69 724 int32_t options,
jpayne@69 725 UParseError* parseError,
jpayne@69 726 UErrorCode* status);
jpayne@69 727
jpayne@69 728 /**
jpayne@69 729 * IDNA2003: Compare two IDN strings for equivalence.
jpayne@69 730 * This function splits the domain names into labels and compares them.
jpayne@69 731 * According to IDN RFC, whenever two labels are compared, they are
jpayne@69 732 * considered equal if and only if their ASCII forms (obtained by
jpayne@69 733 * applying toASCII) match using an case-insensitive ASCII comparison.
jpayne@69 734 * Two domain names are considered a match if and only if all labels
jpayne@69 735 * match regardless of whether label separators match.
jpayne@69 736 *
jpayne@69 737 * @param s1 First source string.
jpayne@69 738 * @param length1 Length of first source string, or -1 if NUL-terminated.
jpayne@69 739 *
jpayne@69 740 * @param s2 Second source string.
jpayne@69 741 * @param length2 Length of second source string, or -1 if NUL-terminated.
jpayne@69 742 * @param options A bit set of options:
jpayne@69 743 *
jpayne@69 744 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
jpayne@69 745 * and do not use STD3 ASCII rules
jpayne@69 746 * If unassigned code points are found the operation fails with
jpayne@69 747 * U_UNASSIGNED_CODE_POINT_FOUND error code.
jpayne@69 748 *
jpayne@69 749 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
jpayne@69 750 * If this option is set, the unassigned code points are in the input
jpayne@69 751 * are treated as normal Unicode code points.
jpayne@69 752 *
jpayne@69 753 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
jpayne@69 754 * If this option is set and the input does not satisfy STD3 rules,
jpayne@69 755 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
jpayne@69 756 *
jpayne@69 757 * @param status ICU error code in/out parameter.
jpayne@69 758 * Must fulfill U_SUCCESS before the function call.
jpayne@69 759 * @return <0 or 0 or >0 as usual for string comparisons
jpayne@69 760 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
jpayne@69 761 */
jpayne@69 762 U_DEPRECATED int32_t U_EXPORT2
jpayne@69 763 uidna_compare( const UChar *s1, int32_t length1,
jpayne@69 764 const UChar *s2, int32_t length2,
jpayne@69 765 int32_t options,
jpayne@69 766 UErrorCode* status);
jpayne@69 767
jpayne@69 768 #endif /* U_HIDE_DEPRECATED_API */
jpayne@69 769
jpayne@69 770 #endif /* #if !UCONFIG_NO_IDNA */
jpayne@69 771
jpayne@69 772 #endif