jpayne@69
|
1 // © 2016 and later: Unicode, Inc. and others.
|
jpayne@69
|
2 // License & terms of use: http://www.unicode.org/copyright.html
|
jpayne@69
|
3 /*
|
jpayne@69
|
4 *******************************************************************************
|
jpayne@69
|
5 * Copyright (C) 2010-2012, International Business Machines
|
jpayne@69
|
6 * Corporation and others. All Rights Reserved.
|
jpayne@69
|
7 *******************************************************************************
|
jpayne@69
|
8 * file name: idna.h
|
jpayne@69
|
9 * encoding: UTF-8
|
jpayne@69
|
10 * tab size: 8 (not used)
|
jpayne@69
|
11 * indentation:4
|
jpayne@69
|
12 *
|
jpayne@69
|
13 * created on: 2010mar05
|
jpayne@69
|
14 * created by: Markus W. Scherer
|
jpayne@69
|
15 */
|
jpayne@69
|
16
|
jpayne@69
|
17 #ifndef __IDNA_H__
|
jpayne@69
|
18 #define __IDNA_H__
|
jpayne@69
|
19
|
jpayne@69
|
20 /**
|
jpayne@69
|
21 * \file
|
jpayne@69
|
22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
|
jpayne@69
|
23 */
|
jpayne@69
|
24
|
jpayne@69
|
25 #include "unicode/utypes.h"
|
jpayne@69
|
26
|
jpayne@69
|
27 #if U_SHOW_CPLUSPLUS_API
|
jpayne@69
|
28
|
jpayne@69
|
29 #if !UCONFIG_NO_IDNA
|
jpayne@69
|
30
|
jpayne@69
|
31 #include "unicode/bytestream.h"
|
jpayne@69
|
32 #include "unicode/stringpiece.h"
|
jpayne@69
|
33 #include "unicode/uidna.h"
|
jpayne@69
|
34 #include "unicode/unistr.h"
|
jpayne@69
|
35
|
jpayne@69
|
36 U_NAMESPACE_BEGIN
|
jpayne@69
|
37
|
jpayne@69
|
38 class IDNAInfo;
|
jpayne@69
|
39
|
jpayne@69
|
40 /**
|
jpayne@69
|
41 * Abstract base class for IDNA processing.
|
jpayne@69
|
42 * See http://www.unicode.org/reports/tr46/
|
jpayne@69
|
43 * and http://www.ietf.org/rfc/rfc3490.txt
|
jpayne@69
|
44 *
|
jpayne@69
|
45 * The IDNA class is not intended for public subclassing.
|
jpayne@69
|
46 *
|
jpayne@69
|
47 * This C++ API currently only implements UTS #46.
|
jpayne@69
|
48 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
|
jpayne@69
|
49 * and IDNA2003 (functions that do not use a service object).
|
jpayne@69
|
50 * @stable ICU 4.6
|
jpayne@69
|
51 */
|
jpayne@69
|
52 class U_COMMON_API IDNA : public UObject {
|
jpayne@69
|
53 public:
|
jpayne@69
|
54 /**
|
jpayne@69
|
55 * Destructor.
|
jpayne@69
|
56 * @stable ICU 4.6
|
jpayne@69
|
57 */
|
jpayne@69
|
58 ~IDNA();
|
jpayne@69
|
59
|
jpayne@69
|
60 /**
|
jpayne@69
|
61 * Returns an IDNA instance which implements UTS #46.
|
jpayne@69
|
62 * Returns an unmodifiable instance, owned by the caller.
|
jpayne@69
|
63 * Cache it for multiple operations, and delete it when done.
|
jpayne@69
|
64 * The instance is thread-safe, that is, it can be used concurrently.
|
jpayne@69
|
65 *
|
jpayne@69
|
66 * UTS #46 defines Unicode IDNA Compatibility Processing,
|
jpayne@69
|
67 * updated to the latest version of Unicode and compatible with both
|
jpayne@69
|
68 * IDNA2003 and IDNA2008.
|
jpayne@69
|
69 *
|
jpayne@69
|
70 * The worker functions use transitional processing, including deviation mappings,
|
jpayne@69
|
71 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
|
jpayne@69
|
72 * is used in which case the deviation characters are passed through without change.
|
jpayne@69
|
73 *
|
jpayne@69
|
74 * Disallowed characters are mapped to U+FFFD.
|
jpayne@69
|
75 *
|
jpayne@69
|
76 * For available options see the uidna.h header.
|
jpayne@69
|
77 * Operations with the UTS #46 instance do not support the
|
jpayne@69
|
78 * UIDNA_ALLOW_UNASSIGNED option.
|
jpayne@69
|
79 *
|
jpayne@69
|
80 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
|
jpayne@69
|
81 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
|
jpayne@69
|
82 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
|
jpayne@69
|
83 *
|
jpayne@69
|
84 * @param options Bit set to modify the processing and error checking.
|
jpayne@69
|
85 * See option bit set values in uidna.h.
|
jpayne@69
|
86 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
87 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
88 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
89 * function chaining. (See User Guide for details.)
|
jpayne@69
|
90 * @return the UTS #46 IDNA instance, if successful
|
jpayne@69
|
91 * @stable ICU 4.6
|
jpayne@69
|
92 */
|
jpayne@69
|
93 static IDNA *
|
jpayne@69
|
94 createUTS46Instance(uint32_t options, UErrorCode &errorCode);
|
jpayne@69
|
95
|
jpayne@69
|
96 /**
|
jpayne@69
|
97 * Converts a single domain name label into its ASCII form for DNS lookup.
|
jpayne@69
|
98 * If any processing step fails, then info.hasErrors() will be TRUE and
|
jpayne@69
|
99 * the result might not be an ASCII string.
|
jpayne@69
|
100 * The label might be modified according to the types of errors.
|
jpayne@69
|
101 * Labels with severe errors will be left in (or turned into) their Unicode form.
|
jpayne@69
|
102 *
|
jpayne@69
|
103 * The UErrorCode indicates an error only in exceptional cases,
|
jpayne@69
|
104 * such as a U_MEMORY_ALLOCATION_ERROR.
|
jpayne@69
|
105 *
|
jpayne@69
|
106 * @param label Input domain name label
|
jpayne@69
|
107 * @param dest Destination string object
|
jpayne@69
|
108 * @param info Output container of IDNA processing details.
|
jpayne@69
|
109 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
110 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
111 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
112 * function chaining. (See User Guide for details.)
|
jpayne@69
|
113 * @return dest
|
jpayne@69
|
114 * @stable ICU 4.6
|
jpayne@69
|
115 */
|
jpayne@69
|
116 virtual UnicodeString &
|
jpayne@69
|
117 labelToASCII(const UnicodeString &label, UnicodeString &dest,
|
jpayne@69
|
118 IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
jpayne@69
|
119
|
jpayne@69
|
120 /**
|
jpayne@69
|
121 * Converts a single domain name label into its Unicode form for human-readable display.
|
jpayne@69
|
122 * If any processing step fails, then info.hasErrors() will be TRUE.
|
jpayne@69
|
123 * The label might be modified according to the types of errors.
|
jpayne@69
|
124 *
|
jpayne@69
|
125 * The UErrorCode indicates an error only in exceptional cases,
|
jpayne@69
|
126 * such as a U_MEMORY_ALLOCATION_ERROR.
|
jpayne@69
|
127 *
|
jpayne@69
|
128 * @param label Input domain name label
|
jpayne@69
|
129 * @param dest Destination string object
|
jpayne@69
|
130 * @param info Output container of IDNA processing details.
|
jpayne@69
|
131 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
132 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
133 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
134 * function chaining. (See User Guide for details.)
|
jpayne@69
|
135 * @return dest
|
jpayne@69
|
136 * @stable ICU 4.6
|
jpayne@69
|
137 */
|
jpayne@69
|
138 virtual UnicodeString &
|
jpayne@69
|
139 labelToUnicode(const UnicodeString &label, UnicodeString &dest,
|
jpayne@69
|
140 IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
jpayne@69
|
141
|
jpayne@69
|
142 /**
|
jpayne@69
|
143 * Converts a whole domain name into its ASCII form for DNS lookup.
|
jpayne@69
|
144 * If any processing step fails, then info.hasErrors() will be TRUE and
|
jpayne@69
|
145 * the result might not be an ASCII string.
|
jpayne@69
|
146 * The domain name might be modified according to the types of errors.
|
jpayne@69
|
147 * Labels with severe errors will be left in (or turned into) their Unicode form.
|
jpayne@69
|
148 *
|
jpayne@69
|
149 * The UErrorCode indicates an error only in exceptional cases,
|
jpayne@69
|
150 * such as a U_MEMORY_ALLOCATION_ERROR.
|
jpayne@69
|
151 *
|
jpayne@69
|
152 * @param name Input domain name
|
jpayne@69
|
153 * @param dest Destination string object
|
jpayne@69
|
154 * @param info Output container of IDNA processing details.
|
jpayne@69
|
155 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
156 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
157 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
158 * function chaining. (See User Guide for details.)
|
jpayne@69
|
159 * @return dest
|
jpayne@69
|
160 * @stable ICU 4.6
|
jpayne@69
|
161 */
|
jpayne@69
|
162 virtual UnicodeString &
|
jpayne@69
|
163 nameToASCII(const UnicodeString &name, UnicodeString &dest,
|
jpayne@69
|
164 IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
jpayne@69
|
165
|
jpayne@69
|
166 /**
|
jpayne@69
|
167 * Converts a whole domain name into its Unicode form for human-readable display.
|
jpayne@69
|
168 * If any processing step fails, then info.hasErrors() will be TRUE.
|
jpayne@69
|
169 * The domain name might be modified according to the types of errors.
|
jpayne@69
|
170 *
|
jpayne@69
|
171 * The UErrorCode indicates an error only in exceptional cases,
|
jpayne@69
|
172 * such as a U_MEMORY_ALLOCATION_ERROR.
|
jpayne@69
|
173 *
|
jpayne@69
|
174 * @param name Input domain name
|
jpayne@69
|
175 * @param dest Destination string object
|
jpayne@69
|
176 * @param info Output container of IDNA processing details.
|
jpayne@69
|
177 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
178 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
179 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
180 * function chaining. (See User Guide for details.)
|
jpayne@69
|
181 * @return dest
|
jpayne@69
|
182 * @stable ICU 4.6
|
jpayne@69
|
183 */
|
jpayne@69
|
184 virtual UnicodeString &
|
jpayne@69
|
185 nameToUnicode(const UnicodeString &name, UnicodeString &dest,
|
jpayne@69
|
186 IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
jpayne@69
|
187
|
jpayne@69
|
188 // UTF-8 versions of the processing methods ---------------------------- ***
|
jpayne@69
|
189
|
jpayne@69
|
190 /**
|
jpayne@69
|
191 * Converts a single domain name label into its ASCII form for DNS lookup.
|
jpayne@69
|
192 * UTF-8 version of labelToASCII(), same behavior.
|
jpayne@69
|
193 *
|
jpayne@69
|
194 * @param label Input domain name label
|
jpayne@69
|
195 * @param dest Destination byte sink; Flush()ed if successful
|
jpayne@69
|
196 * @param info Output container of IDNA processing details.
|
jpayne@69
|
197 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
198 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
199 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
200 * function chaining. (See User Guide for details.)
|
jpayne@69
|
201 * @return dest
|
jpayne@69
|
202 * @stable ICU 4.6
|
jpayne@69
|
203 */
|
jpayne@69
|
204 virtual void
|
jpayne@69
|
205 labelToASCII_UTF8(StringPiece label, ByteSink &dest,
|
jpayne@69
|
206 IDNAInfo &info, UErrorCode &errorCode) const;
|
jpayne@69
|
207
|
jpayne@69
|
208 /**
|
jpayne@69
|
209 * Converts a single domain name label into its Unicode form for human-readable display.
|
jpayne@69
|
210 * UTF-8 version of labelToUnicode(), same behavior.
|
jpayne@69
|
211 *
|
jpayne@69
|
212 * @param label Input domain name label
|
jpayne@69
|
213 * @param dest Destination byte sink; Flush()ed if successful
|
jpayne@69
|
214 * @param info Output container of IDNA processing details.
|
jpayne@69
|
215 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
216 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
217 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
218 * function chaining. (See User Guide for details.)
|
jpayne@69
|
219 * @return dest
|
jpayne@69
|
220 * @stable ICU 4.6
|
jpayne@69
|
221 */
|
jpayne@69
|
222 virtual void
|
jpayne@69
|
223 labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
|
jpayne@69
|
224 IDNAInfo &info, UErrorCode &errorCode) const;
|
jpayne@69
|
225
|
jpayne@69
|
226 /**
|
jpayne@69
|
227 * Converts a whole domain name into its ASCII form for DNS lookup.
|
jpayne@69
|
228 * UTF-8 version of nameToASCII(), same behavior.
|
jpayne@69
|
229 *
|
jpayne@69
|
230 * @param name Input domain name
|
jpayne@69
|
231 * @param dest Destination byte sink; Flush()ed if successful
|
jpayne@69
|
232 * @param info Output container of IDNA processing details.
|
jpayne@69
|
233 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
234 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
235 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
236 * function chaining. (See User Guide for details.)
|
jpayne@69
|
237 * @return dest
|
jpayne@69
|
238 * @stable ICU 4.6
|
jpayne@69
|
239 */
|
jpayne@69
|
240 virtual void
|
jpayne@69
|
241 nameToASCII_UTF8(StringPiece name, ByteSink &dest,
|
jpayne@69
|
242 IDNAInfo &info, UErrorCode &errorCode) const;
|
jpayne@69
|
243
|
jpayne@69
|
244 /**
|
jpayne@69
|
245 * Converts a whole domain name into its Unicode form for human-readable display.
|
jpayne@69
|
246 * UTF-8 version of nameToUnicode(), same behavior.
|
jpayne@69
|
247 *
|
jpayne@69
|
248 * @param name Input domain name
|
jpayne@69
|
249 * @param dest Destination byte sink; Flush()ed if successful
|
jpayne@69
|
250 * @param info Output container of IDNA processing details.
|
jpayne@69
|
251 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
252 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
253 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
254 * function chaining. (See User Guide for details.)
|
jpayne@69
|
255 * @return dest
|
jpayne@69
|
256 * @stable ICU 4.6
|
jpayne@69
|
257 */
|
jpayne@69
|
258 virtual void
|
jpayne@69
|
259 nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
|
jpayne@69
|
260 IDNAInfo &info, UErrorCode &errorCode) const;
|
jpayne@69
|
261 };
|
jpayne@69
|
262
|
jpayne@69
|
263 class UTS46;
|
jpayne@69
|
264
|
jpayne@69
|
265 /**
|
jpayne@69
|
266 * Output container for IDNA processing errors.
|
jpayne@69
|
267 * The IDNAInfo class is not suitable for subclassing.
|
jpayne@69
|
268 * @stable ICU 4.6
|
jpayne@69
|
269 */
|
jpayne@69
|
270 class U_COMMON_API IDNAInfo : public UMemory {
|
jpayne@69
|
271 public:
|
jpayne@69
|
272 /**
|
jpayne@69
|
273 * Constructor for stack allocation.
|
jpayne@69
|
274 * @stable ICU 4.6
|
jpayne@69
|
275 */
|
jpayne@69
|
276 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
|
jpayne@69
|
277 /**
|
jpayne@69
|
278 * Were there IDNA processing errors?
|
jpayne@69
|
279 * @return TRUE if there were processing errors
|
jpayne@69
|
280 * @stable ICU 4.6
|
jpayne@69
|
281 */
|
jpayne@69
|
282 UBool hasErrors() const { return errors!=0; }
|
jpayne@69
|
283 /**
|
jpayne@69
|
284 * Returns a bit set indicating IDNA processing errors.
|
jpayne@69
|
285 * See UIDNA_ERROR_... constants in uidna.h.
|
jpayne@69
|
286 * @return bit set of processing errors
|
jpayne@69
|
287 * @stable ICU 4.6
|
jpayne@69
|
288 */
|
jpayne@69
|
289 uint32_t getErrors() const { return errors; }
|
jpayne@69
|
290 /**
|
jpayne@69
|
291 * Returns TRUE if transitional and nontransitional processing produce different results.
|
jpayne@69
|
292 * This is the case when the input label or domain name contains
|
jpayne@69
|
293 * one or more deviation characters outside a Punycode label (see UTS #46).
|
jpayne@69
|
294 * <ul>
|
jpayne@69
|
295 * <li>With nontransitional processing, such characters are
|
jpayne@69
|
296 * copied to the destination string.
|
jpayne@69
|
297 * <li>With transitional processing, such characters are
|
jpayne@69
|
298 * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
|
jpayne@69
|
299 * </ul>
|
jpayne@69
|
300 * @return TRUE if transitional and nontransitional processing produce different results
|
jpayne@69
|
301 * @stable ICU 4.6
|
jpayne@69
|
302 */
|
jpayne@69
|
303 UBool isTransitionalDifferent() const { return isTransDiff; }
|
jpayne@69
|
304
|
jpayne@69
|
305 private:
|
jpayne@69
|
306 friend class UTS46;
|
jpayne@69
|
307
|
jpayne@69
|
308 IDNAInfo(const IDNAInfo &other); // no copying
|
jpayne@69
|
309 IDNAInfo &operator=(const IDNAInfo &other); // no copying
|
jpayne@69
|
310
|
jpayne@69
|
311 void reset() {
|
jpayne@69
|
312 errors=labelErrors=0;
|
jpayne@69
|
313 isTransDiff=FALSE;
|
jpayne@69
|
314 isBiDi=FALSE;
|
jpayne@69
|
315 isOkBiDi=TRUE;
|
jpayne@69
|
316 }
|
jpayne@69
|
317
|
jpayne@69
|
318 uint32_t errors, labelErrors;
|
jpayne@69
|
319 UBool isTransDiff;
|
jpayne@69
|
320 UBool isBiDi;
|
jpayne@69
|
321 UBool isOkBiDi;
|
jpayne@69
|
322 };
|
jpayne@69
|
323
|
jpayne@69
|
324 U_NAMESPACE_END
|
jpayne@69
|
325
|
jpayne@69
|
326 #endif // UCONFIG_NO_IDNA
|
jpayne@69
|
327
|
jpayne@69
|
328 #endif /* U_SHOW_CPLUSPLUS_API */
|
jpayne@69
|
329
|
jpayne@69
|
330 #endif // __IDNA_H__
|