jpayne@69
|
1 // © 2016 and later: Unicode, Inc. and others.
|
jpayne@69
|
2 // License & terms of use: http://www.unicode.org/copyright.html
|
jpayne@69
|
3 /*
|
jpayne@69
|
4 *******************************************************************************
|
jpayne@69
|
5 *
|
jpayne@69
|
6 * Copyright (C) 2009-2013, International Business Machines
|
jpayne@69
|
7 * Corporation and others. All Rights Reserved.
|
jpayne@69
|
8 *
|
jpayne@69
|
9 *******************************************************************************
|
jpayne@69
|
10 * file name: normalizer2.h
|
jpayne@69
|
11 * encoding: UTF-8
|
jpayne@69
|
12 * tab size: 8 (not used)
|
jpayne@69
|
13 * indentation:4
|
jpayne@69
|
14 *
|
jpayne@69
|
15 * created on: 2009nov22
|
jpayne@69
|
16 * created by: Markus W. Scherer
|
jpayne@69
|
17 */
|
jpayne@69
|
18
|
jpayne@69
|
19 #ifndef __NORMALIZER2_H__
|
jpayne@69
|
20 #define __NORMALIZER2_H__
|
jpayne@69
|
21
|
jpayne@69
|
22 /**
|
jpayne@69
|
23 * \file
|
jpayne@69
|
24 * \brief C++ API: New API for Unicode Normalization.
|
jpayne@69
|
25 */
|
jpayne@69
|
26
|
jpayne@69
|
27 #include "unicode/utypes.h"
|
jpayne@69
|
28
|
jpayne@69
|
29 #if U_SHOW_CPLUSPLUS_API
|
jpayne@69
|
30
|
jpayne@69
|
31 #if !UCONFIG_NO_NORMALIZATION
|
jpayne@69
|
32
|
jpayne@69
|
33 #include "unicode/stringpiece.h"
|
jpayne@69
|
34 #include "unicode/uniset.h"
|
jpayne@69
|
35 #include "unicode/unistr.h"
|
jpayne@69
|
36 #include "unicode/unorm2.h"
|
jpayne@69
|
37
|
jpayne@69
|
38 U_NAMESPACE_BEGIN
|
jpayne@69
|
39
|
jpayne@69
|
40 class ByteSink;
|
jpayne@69
|
41
|
jpayne@69
|
42 /**
|
jpayne@69
|
43 * Unicode normalization functionality for standard Unicode normalization or
|
jpayne@69
|
44 * for using custom mapping tables.
|
jpayne@69
|
45 * All instances of this class are unmodifiable/immutable.
|
jpayne@69
|
46 * Instances returned by getInstance() are singletons that must not be deleted by the caller.
|
jpayne@69
|
47 * The Normalizer2 class is not intended for public subclassing.
|
jpayne@69
|
48 *
|
jpayne@69
|
49 * The primary functions are to produce a normalized string and to detect whether
|
jpayne@69
|
50 * a string is already normalized.
|
jpayne@69
|
51 * The most commonly used normalization forms are those defined in
|
jpayne@69
|
52 * http://www.unicode.org/unicode/reports/tr15/
|
jpayne@69
|
53 * However, this API supports additional normalization forms for specialized purposes.
|
jpayne@69
|
54 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
|
jpayne@69
|
55 * and can be used in implementations of UTS #46.
|
jpayne@69
|
56 *
|
jpayne@69
|
57 * Not only are the standard compose and decompose modes supplied,
|
jpayne@69
|
58 * but additional modes are provided as documented in the Mode enum.
|
jpayne@69
|
59 *
|
jpayne@69
|
60 * Some of the functions in this class identify normalization boundaries.
|
jpayne@69
|
61 * At a normalization boundary, the portions of the string
|
jpayne@69
|
62 * before it and starting from it do not interact and can be handled independently.
|
jpayne@69
|
63 *
|
jpayne@69
|
64 * The spanQuickCheckYes() stops at a normalization boundary.
|
jpayne@69
|
65 * When the goal is a normalized string, then the text before the boundary
|
jpayne@69
|
66 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
|
jpayne@69
|
67 *
|
jpayne@69
|
68 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
|
jpayne@69
|
69 * a character is guaranteed to be at a normalization boundary,
|
jpayne@69
|
70 * regardless of context.
|
jpayne@69
|
71 * This is used for moving from one normalization boundary to the next
|
jpayne@69
|
72 * or preceding boundary, and for performing iterative normalization.
|
jpayne@69
|
73 *
|
jpayne@69
|
74 * Iterative normalization is useful when only a small portion of a
|
jpayne@69
|
75 * longer string needs to be processed.
|
jpayne@69
|
76 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
|
jpayne@69
|
77 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
|
jpayne@69
|
78 * (to process only the substring for which sort key bytes are computed).
|
jpayne@69
|
79 *
|
jpayne@69
|
80 * The set of normalization boundaries returned by these functions may not be
|
jpayne@69
|
81 * complete: There may be more boundaries that could be returned.
|
jpayne@69
|
82 * Different functions may return different boundaries.
|
jpayne@69
|
83 * @stable ICU 4.4
|
jpayne@69
|
84 */
|
jpayne@69
|
85 class U_COMMON_API Normalizer2 : public UObject {
|
jpayne@69
|
86 public:
|
jpayne@69
|
87 /**
|
jpayne@69
|
88 * Destructor.
|
jpayne@69
|
89 * @stable ICU 4.4
|
jpayne@69
|
90 */
|
jpayne@69
|
91 ~Normalizer2();
|
jpayne@69
|
92
|
jpayne@69
|
93 /**
|
jpayne@69
|
94 * Returns a Normalizer2 instance for Unicode NFC normalization.
|
jpayne@69
|
95 * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
|
jpayne@69
|
96 * Returns an unmodifiable singleton instance. Do not delete it.
|
jpayne@69
|
97 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
98 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
99 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
100 * function chaining. (See User Guide for details.)
|
jpayne@69
|
101 * @return the requested Normalizer2, if successful
|
jpayne@69
|
102 * @stable ICU 49
|
jpayne@69
|
103 */
|
jpayne@69
|
104 static const Normalizer2 *
|
jpayne@69
|
105 getNFCInstance(UErrorCode &errorCode);
|
jpayne@69
|
106
|
jpayne@69
|
107 /**
|
jpayne@69
|
108 * Returns a Normalizer2 instance for Unicode NFD normalization.
|
jpayne@69
|
109 * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
|
jpayne@69
|
110 * Returns an unmodifiable singleton instance. Do not delete it.
|
jpayne@69
|
111 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
112 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
113 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
114 * function chaining. (See User Guide for details.)
|
jpayne@69
|
115 * @return the requested Normalizer2, if successful
|
jpayne@69
|
116 * @stable ICU 49
|
jpayne@69
|
117 */
|
jpayne@69
|
118 static const Normalizer2 *
|
jpayne@69
|
119 getNFDInstance(UErrorCode &errorCode);
|
jpayne@69
|
120
|
jpayne@69
|
121 /**
|
jpayne@69
|
122 * Returns a Normalizer2 instance for Unicode NFKC normalization.
|
jpayne@69
|
123 * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
|
jpayne@69
|
124 * Returns an unmodifiable singleton instance. Do not delete it.
|
jpayne@69
|
125 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
126 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
127 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
128 * function chaining. (See User Guide for details.)
|
jpayne@69
|
129 * @return the requested Normalizer2, if successful
|
jpayne@69
|
130 * @stable ICU 49
|
jpayne@69
|
131 */
|
jpayne@69
|
132 static const Normalizer2 *
|
jpayne@69
|
133 getNFKCInstance(UErrorCode &errorCode);
|
jpayne@69
|
134
|
jpayne@69
|
135 /**
|
jpayne@69
|
136 * Returns a Normalizer2 instance for Unicode NFKD normalization.
|
jpayne@69
|
137 * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
|
jpayne@69
|
138 * Returns an unmodifiable singleton instance. Do not delete it.
|
jpayne@69
|
139 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
140 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
141 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
142 * function chaining. (See User Guide for details.)
|
jpayne@69
|
143 * @return the requested Normalizer2, if successful
|
jpayne@69
|
144 * @stable ICU 49
|
jpayne@69
|
145 */
|
jpayne@69
|
146 static const Normalizer2 *
|
jpayne@69
|
147 getNFKDInstance(UErrorCode &errorCode);
|
jpayne@69
|
148
|
jpayne@69
|
149 /**
|
jpayne@69
|
150 * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
|
jpayne@69
|
151 * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
|
jpayne@69
|
152 * Returns an unmodifiable singleton instance. Do not delete it.
|
jpayne@69
|
153 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
154 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
155 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
156 * function chaining. (See User Guide for details.)
|
jpayne@69
|
157 * @return the requested Normalizer2, if successful
|
jpayne@69
|
158 * @stable ICU 49
|
jpayne@69
|
159 */
|
jpayne@69
|
160 static const Normalizer2 *
|
jpayne@69
|
161 getNFKCCasefoldInstance(UErrorCode &errorCode);
|
jpayne@69
|
162
|
jpayne@69
|
163 /**
|
jpayne@69
|
164 * Returns a Normalizer2 instance which uses the specified data file
|
jpayne@69
|
165 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
|
jpayne@69
|
166 * and which composes or decomposes text according to the specified mode.
|
jpayne@69
|
167 * Returns an unmodifiable singleton instance. Do not delete it.
|
jpayne@69
|
168 *
|
jpayne@69
|
169 * Use packageName=NULL for data files that are part of ICU's own data.
|
jpayne@69
|
170 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
|
jpayne@69
|
171 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
|
jpayne@69
|
172 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
|
jpayne@69
|
173 *
|
jpayne@69
|
174 * @param packageName NULL for ICU built-in data, otherwise application data package name
|
jpayne@69
|
175 * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
|
jpayne@69
|
176 * @param mode normalization mode (compose or decompose etc.)
|
jpayne@69
|
177 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
178 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
179 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
180 * function chaining. (See User Guide for details.)
|
jpayne@69
|
181 * @return the requested Normalizer2, if successful
|
jpayne@69
|
182 * @stable ICU 4.4
|
jpayne@69
|
183 */
|
jpayne@69
|
184 static const Normalizer2 *
|
jpayne@69
|
185 getInstance(const char *packageName,
|
jpayne@69
|
186 const char *name,
|
jpayne@69
|
187 UNormalization2Mode mode,
|
jpayne@69
|
188 UErrorCode &errorCode);
|
jpayne@69
|
189
|
jpayne@69
|
190 /**
|
jpayne@69
|
191 * Returns the normalized form of the source string.
|
jpayne@69
|
192 * @param src source string
|
jpayne@69
|
193 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
194 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
195 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
196 * function chaining. (See User Guide for details.)
|
jpayne@69
|
197 * @return normalized src
|
jpayne@69
|
198 * @stable ICU 4.4
|
jpayne@69
|
199 */
|
jpayne@69
|
200 UnicodeString
|
jpayne@69
|
201 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
|
jpayne@69
|
202 UnicodeString result;
|
jpayne@69
|
203 normalize(src, result, errorCode);
|
jpayne@69
|
204 return result;
|
jpayne@69
|
205 }
|
jpayne@69
|
206 /**
|
jpayne@69
|
207 * Writes the normalized form of the source string to the destination string
|
jpayne@69
|
208 * (replacing its contents) and returns the destination string.
|
jpayne@69
|
209 * The source and destination strings must be different objects.
|
jpayne@69
|
210 * @param src source string
|
jpayne@69
|
211 * @param dest destination string; its contents is replaced with normalized src
|
jpayne@69
|
212 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
213 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
214 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
215 * function chaining. (See User Guide for details.)
|
jpayne@69
|
216 * @return dest
|
jpayne@69
|
217 * @stable ICU 4.4
|
jpayne@69
|
218 */
|
jpayne@69
|
219 virtual UnicodeString &
|
jpayne@69
|
220 normalize(const UnicodeString &src,
|
jpayne@69
|
221 UnicodeString &dest,
|
jpayne@69
|
222 UErrorCode &errorCode) const = 0;
|
jpayne@69
|
223
|
jpayne@69
|
224 /**
|
jpayne@69
|
225 * Normalizes a UTF-8 string and optionally records how source substrings
|
jpayne@69
|
226 * relate to changed and unchanged result substrings.
|
jpayne@69
|
227 *
|
jpayne@69
|
228 * Currently implemented completely only for "compose" modes,
|
jpayne@69
|
229 * such as for NFC, NFKC, and NFKC_Casefold
|
jpayne@69
|
230 * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
jpayne@69
|
231 * Otherwise currently converts to & from UTF-16 and does not support edits.
|
jpayne@69
|
232 *
|
jpayne@69
|
233 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
jpayne@69
|
234 * @param src Source UTF-8 string.
|
jpayne@69
|
235 * @param sink A ByteSink to which the normalized UTF-8 result string is written.
|
jpayne@69
|
236 * sink.Flush() is called at the end.
|
jpayne@69
|
237 * @param edits Records edits for index mapping, working with styled text,
|
jpayne@69
|
238 * and getting only changes (if any).
|
jpayne@69
|
239 * The Edits contents is undefined if any error occurs.
|
jpayne@69
|
240 * This function calls edits->reset() first unless
|
jpayne@69
|
241 * options includes U_EDITS_NO_RESET. edits can be nullptr.
|
jpayne@69
|
242 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
243 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
244 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
245 * function chaining. (See User Guide for details.)
|
jpayne@69
|
246 * @stable ICU 60
|
jpayne@69
|
247 */
|
jpayne@69
|
248 virtual void
|
jpayne@69
|
249 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
|
jpayne@69
|
250 Edits *edits, UErrorCode &errorCode) const;
|
jpayne@69
|
251
|
jpayne@69
|
252 /**
|
jpayne@69
|
253 * Appends the normalized form of the second string to the first string
|
jpayne@69
|
254 * (merging them at the boundary) and returns the first string.
|
jpayne@69
|
255 * The result is normalized if the first string was normalized.
|
jpayne@69
|
256 * The first and second strings must be different objects.
|
jpayne@69
|
257 * @param first string, should be normalized
|
jpayne@69
|
258 * @param second string, will be normalized
|
jpayne@69
|
259 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
260 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
261 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
262 * function chaining. (See User Guide for details.)
|
jpayne@69
|
263 * @return first
|
jpayne@69
|
264 * @stable ICU 4.4
|
jpayne@69
|
265 */
|
jpayne@69
|
266 virtual UnicodeString &
|
jpayne@69
|
267 normalizeSecondAndAppend(UnicodeString &first,
|
jpayne@69
|
268 const UnicodeString &second,
|
jpayne@69
|
269 UErrorCode &errorCode) const = 0;
|
jpayne@69
|
270 /**
|
jpayne@69
|
271 * Appends the second string to the first string
|
jpayne@69
|
272 * (merging them at the boundary) and returns the first string.
|
jpayne@69
|
273 * The result is normalized if both the strings were normalized.
|
jpayne@69
|
274 * The first and second strings must be different objects.
|
jpayne@69
|
275 * @param first string, should be normalized
|
jpayne@69
|
276 * @param second string, should be normalized
|
jpayne@69
|
277 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
278 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
279 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
280 * function chaining. (See User Guide for details.)
|
jpayne@69
|
281 * @return first
|
jpayne@69
|
282 * @stable ICU 4.4
|
jpayne@69
|
283 */
|
jpayne@69
|
284 virtual UnicodeString &
|
jpayne@69
|
285 append(UnicodeString &first,
|
jpayne@69
|
286 const UnicodeString &second,
|
jpayne@69
|
287 UErrorCode &errorCode) const = 0;
|
jpayne@69
|
288
|
jpayne@69
|
289 /**
|
jpayne@69
|
290 * Gets the decomposition mapping of c.
|
jpayne@69
|
291 * Roughly equivalent to normalizing the String form of c
|
jpayne@69
|
292 * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
|
jpayne@69
|
293 * returns FALSE and does not write a string
|
jpayne@69
|
294 * if c does not have a decomposition mapping in this instance's data.
|
jpayne@69
|
295 * This function is independent of the mode of the Normalizer2.
|
jpayne@69
|
296 * @param c code point
|
jpayne@69
|
297 * @param decomposition String object which will be set to c's
|
jpayne@69
|
298 * decomposition mapping, if there is one.
|
jpayne@69
|
299 * @return TRUE if c has a decomposition, otherwise FALSE
|
jpayne@69
|
300 * @stable ICU 4.6
|
jpayne@69
|
301 */
|
jpayne@69
|
302 virtual UBool
|
jpayne@69
|
303 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
|
jpayne@69
|
304
|
jpayne@69
|
305 /**
|
jpayne@69
|
306 * Gets the raw decomposition mapping of c.
|
jpayne@69
|
307 *
|
jpayne@69
|
308 * This is similar to the getDecomposition() method but returns the
|
jpayne@69
|
309 * raw decomposition mapping as specified in UnicodeData.txt or
|
jpayne@69
|
310 * (for custom data) in the mapping files processed by the gennorm2 tool.
|
jpayne@69
|
311 * By contrast, getDecomposition() returns the processed,
|
jpayne@69
|
312 * recursively-decomposed version of this mapping.
|
jpayne@69
|
313 *
|
jpayne@69
|
314 * When used on a standard NFKC Normalizer2 instance,
|
jpayne@69
|
315 * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
|
jpayne@69
|
316 *
|
jpayne@69
|
317 * When used on a standard NFC Normalizer2 instance,
|
jpayne@69
|
318 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
|
jpayne@69
|
319 * in this case, the result contains either one or two code points (=1..4 char16_ts).
|
jpayne@69
|
320 *
|
jpayne@69
|
321 * This function is independent of the mode of the Normalizer2.
|
jpayne@69
|
322 * The default implementation returns FALSE.
|
jpayne@69
|
323 * @param c code point
|
jpayne@69
|
324 * @param decomposition String object which will be set to c's
|
jpayne@69
|
325 * raw decomposition mapping, if there is one.
|
jpayne@69
|
326 * @return TRUE if c has a decomposition, otherwise FALSE
|
jpayne@69
|
327 * @stable ICU 49
|
jpayne@69
|
328 */
|
jpayne@69
|
329 virtual UBool
|
jpayne@69
|
330 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
|
jpayne@69
|
331
|
jpayne@69
|
332 /**
|
jpayne@69
|
333 * Performs pairwise composition of a & b and returns the composite if there is one.
|
jpayne@69
|
334 *
|
jpayne@69
|
335 * Returns a composite code point c only if c has a two-way mapping to a+b.
|
jpayne@69
|
336 * In standard Unicode normalization, this means that
|
jpayne@69
|
337 * c has a canonical decomposition to a+b
|
jpayne@69
|
338 * and c does not have the Full_Composition_Exclusion property.
|
jpayne@69
|
339 *
|
jpayne@69
|
340 * This function is independent of the mode of the Normalizer2.
|
jpayne@69
|
341 * The default implementation returns a negative value.
|
jpayne@69
|
342 * @param a A (normalization starter) code point.
|
jpayne@69
|
343 * @param b Another code point.
|
jpayne@69
|
344 * @return The non-negative composite code point if there is one; otherwise a negative value.
|
jpayne@69
|
345 * @stable ICU 49
|
jpayne@69
|
346 */
|
jpayne@69
|
347 virtual UChar32
|
jpayne@69
|
348 composePair(UChar32 a, UChar32 b) const;
|
jpayne@69
|
349
|
jpayne@69
|
350 /**
|
jpayne@69
|
351 * Gets the combining class of c.
|
jpayne@69
|
352 * The default implementation returns 0
|
jpayne@69
|
353 * but all standard implementations return the Unicode Canonical_Combining_Class value.
|
jpayne@69
|
354 * @param c code point
|
jpayne@69
|
355 * @return c's combining class
|
jpayne@69
|
356 * @stable ICU 49
|
jpayne@69
|
357 */
|
jpayne@69
|
358 virtual uint8_t
|
jpayne@69
|
359 getCombiningClass(UChar32 c) const;
|
jpayne@69
|
360
|
jpayne@69
|
361 /**
|
jpayne@69
|
362 * Tests if the string is normalized.
|
jpayne@69
|
363 * Internally, in cases where the quickCheck() method would return "maybe"
|
jpayne@69
|
364 * (which is only possible for the two COMPOSE modes) this method
|
jpayne@69
|
365 * resolves to "yes" or "no" to provide a definitive result,
|
jpayne@69
|
366 * at the cost of doing more work in those cases.
|
jpayne@69
|
367 * @param s input string
|
jpayne@69
|
368 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
369 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
370 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
371 * function chaining. (See User Guide for details.)
|
jpayne@69
|
372 * @return TRUE if s is normalized
|
jpayne@69
|
373 * @stable ICU 4.4
|
jpayne@69
|
374 */
|
jpayne@69
|
375 virtual UBool
|
jpayne@69
|
376 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
|
jpayne@69
|
377 /**
|
jpayne@69
|
378 * Tests if the UTF-8 string is normalized.
|
jpayne@69
|
379 * Internally, in cases where the quickCheck() method would return "maybe"
|
jpayne@69
|
380 * (which is only possible for the two COMPOSE modes) this method
|
jpayne@69
|
381 * resolves to "yes" or "no" to provide a definitive result,
|
jpayne@69
|
382 * at the cost of doing more work in those cases.
|
jpayne@69
|
383 *
|
jpayne@69
|
384 * This works for all normalization modes,
|
jpayne@69
|
385 * but it is currently optimized for UTF-8 only for "compose" modes,
|
jpayne@69
|
386 * such as for NFC, NFKC, and NFKC_Casefold
|
jpayne@69
|
387 * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
jpayne@69
|
388 * For other modes it currently converts to UTF-16 and calls isNormalized().
|
jpayne@69
|
389 *
|
jpayne@69
|
390 * @param s UTF-8 input string
|
jpayne@69
|
391 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
392 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
393 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
394 * function chaining. (See User Guide for details.)
|
jpayne@69
|
395 * @return TRUE if s is normalized
|
jpayne@69
|
396 * @stable ICU 60
|
jpayne@69
|
397 */
|
jpayne@69
|
398 virtual UBool
|
jpayne@69
|
399 isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
|
jpayne@69
|
400
|
jpayne@69
|
401
|
jpayne@69
|
402 /**
|
jpayne@69
|
403 * Tests if the string is normalized.
|
jpayne@69
|
404 * For the two COMPOSE modes, the result could be "maybe" in cases that
|
jpayne@69
|
405 * would take a little more work to resolve definitively.
|
jpayne@69
|
406 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
|
jpayne@69
|
407 * combination of quick check + normalization, to avoid
|
jpayne@69
|
408 * re-checking the "yes" prefix.
|
jpayne@69
|
409 * @param s input string
|
jpayne@69
|
410 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
411 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
412 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
413 * function chaining. (See User Guide for details.)
|
jpayne@69
|
414 * @return UNormalizationCheckResult
|
jpayne@69
|
415 * @stable ICU 4.4
|
jpayne@69
|
416 */
|
jpayne@69
|
417 virtual UNormalizationCheckResult
|
jpayne@69
|
418 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
|
jpayne@69
|
419
|
jpayne@69
|
420 /**
|
jpayne@69
|
421 * Returns the end of the normalized substring of the input string.
|
jpayne@69
|
422 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
|
jpayne@69
|
423 * the substring <code>UnicodeString(s, 0, end)</code>
|
jpayne@69
|
424 * will pass the quick check with a "yes" result.
|
jpayne@69
|
425 *
|
jpayne@69
|
426 * The returned end index is usually one or more characters before the
|
jpayne@69
|
427 * "no" or "maybe" character: The end index is at a normalization boundary.
|
jpayne@69
|
428 * (See the class documentation for more about normalization boundaries.)
|
jpayne@69
|
429 *
|
jpayne@69
|
430 * When the goal is a normalized string and most input strings are expected
|
jpayne@69
|
431 * to be normalized already, then call this method,
|
jpayne@69
|
432 * and if it returns a prefix shorter than the input string,
|
jpayne@69
|
433 * copy that prefix and use normalizeSecondAndAppend() for the remainder.
|
jpayne@69
|
434 * @param s input string
|
jpayne@69
|
435 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
436 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
437 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
438 * function chaining. (See User Guide for details.)
|
jpayne@69
|
439 * @return "yes" span end index
|
jpayne@69
|
440 * @stable ICU 4.4
|
jpayne@69
|
441 */
|
jpayne@69
|
442 virtual int32_t
|
jpayne@69
|
443 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
|
jpayne@69
|
444
|
jpayne@69
|
445 /**
|
jpayne@69
|
446 * Tests if the character always has a normalization boundary before it,
|
jpayne@69
|
447 * regardless of context.
|
jpayne@69
|
448 * If true, then the character does not normalization-interact with
|
jpayne@69
|
449 * preceding characters.
|
jpayne@69
|
450 * In other words, a string containing this character can be normalized
|
jpayne@69
|
451 * by processing portions before this character and starting from this
|
jpayne@69
|
452 * character independently.
|
jpayne@69
|
453 * This is used for iterative normalization. See the class documentation for details.
|
jpayne@69
|
454 * @param c character to test
|
jpayne@69
|
455 * @return TRUE if c has a normalization boundary before it
|
jpayne@69
|
456 * @stable ICU 4.4
|
jpayne@69
|
457 */
|
jpayne@69
|
458 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
|
jpayne@69
|
459
|
jpayne@69
|
460 /**
|
jpayne@69
|
461 * Tests if the character always has a normalization boundary after it,
|
jpayne@69
|
462 * regardless of context.
|
jpayne@69
|
463 * If true, then the character does not normalization-interact with
|
jpayne@69
|
464 * following characters.
|
jpayne@69
|
465 * In other words, a string containing this character can be normalized
|
jpayne@69
|
466 * by processing portions up to this character and after this
|
jpayne@69
|
467 * character independently.
|
jpayne@69
|
468 * This is used for iterative normalization. See the class documentation for details.
|
jpayne@69
|
469 * Note that this operation may be significantly slower than hasBoundaryBefore().
|
jpayne@69
|
470 * @param c character to test
|
jpayne@69
|
471 * @return TRUE if c has a normalization boundary after it
|
jpayne@69
|
472 * @stable ICU 4.4
|
jpayne@69
|
473 */
|
jpayne@69
|
474 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
|
jpayne@69
|
475
|
jpayne@69
|
476 /**
|
jpayne@69
|
477 * Tests if the character is normalization-inert.
|
jpayne@69
|
478 * If true, then the character does not change, nor normalization-interact with
|
jpayne@69
|
479 * preceding or following characters.
|
jpayne@69
|
480 * In other words, a string containing this character can be normalized
|
jpayne@69
|
481 * by processing portions before this character and after this
|
jpayne@69
|
482 * character independently.
|
jpayne@69
|
483 * This is used for iterative normalization. See the class documentation for details.
|
jpayne@69
|
484 * Note that this operation may be significantly slower than hasBoundaryBefore().
|
jpayne@69
|
485 * @param c character to test
|
jpayne@69
|
486 * @return TRUE if c is normalization-inert
|
jpayne@69
|
487 * @stable ICU 4.4
|
jpayne@69
|
488 */
|
jpayne@69
|
489 virtual UBool isInert(UChar32 c) const = 0;
|
jpayne@69
|
490 };
|
jpayne@69
|
491
|
jpayne@69
|
492 /**
|
jpayne@69
|
493 * Normalization filtered by a UnicodeSet.
|
jpayne@69
|
494 * Normalizes portions of the text contained in the filter set and leaves
|
jpayne@69
|
495 * portions not contained in the filter set unchanged.
|
jpayne@69
|
496 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
|
jpayne@69
|
497 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
|
jpayne@69
|
498 * This class implements all of (and only) the Normalizer2 API.
|
jpayne@69
|
499 * An instance of this class is unmodifiable/immutable but is constructed and
|
jpayne@69
|
500 * must be destructed by the owner.
|
jpayne@69
|
501 * @stable ICU 4.4
|
jpayne@69
|
502 */
|
jpayne@69
|
503 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
|
jpayne@69
|
504 public:
|
jpayne@69
|
505 /**
|
jpayne@69
|
506 * Constructs a filtered normalizer wrapping any Normalizer2 instance
|
jpayne@69
|
507 * and a filter set.
|
jpayne@69
|
508 * Both are aliased and must not be modified or deleted while this object
|
jpayne@69
|
509 * is used.
|
jpayne@69
|
510 * The filter set should be frozen; otherwise the performance will suffer greatly.
|
jpayne@69
|
511 * @param n2 wrapped Normalizer2 instance
|
jpayne@69
|
512 * @param filterSet UnicodeSet which determines the characters to be normalized
|
jpayne@69
|
513 * @stable ICU 4.4
|
jpayne@69
|
514 */
|
jpayne@69
|
515 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
|
jpayne@69
|
516 norm2(n2), set(filterSet) {}
|
jpayne@69
|
517
|
jpayne@69
|
518 /**
|
jpayne@69
|
519 * Destructor.
|
jpayne@69
|
520 * @stable ICU 4.4
|
jpayne@69
|
521 */
|
jpayne@69
|
522 ~FilteredNormalizer2();
|
jpayne@69
|
523
|
jpayne@69
|
524 /**
|
jpayne@69
|
525 * Writes the normalized form of the source string to the destination string
|
jpayne@69
|
526 * (replacing its contents) and returns the destination string.
|
jpayne@69
|
527 * The source and destination strings must be different objects.
|
jpayne@69
|
528 * @param src source string
|
jpayne@69
|
529 * @param dest destination string; its contents is replaced with normalized src
|
jpayne@69
|
530 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
531 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
532 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
533 * function chaining. (See User Guide for details.)
|
jpayne@69
|
534 * @return dest
|
jpayne@69
|
535 * @stable ICU 4.4
|
jpayne@69
|
536 */
|
jpayne@69
|
537 virtual UnicodeString &
|
jpayne@69
|
538 normalize(const UnicodeString &src,
|
jpayne@69
|
539 UnicodeString &dest,
|
jpayne@69
|
540 UErrorCode &errorCode) const U_OVERRIDE;
|
jpayne@69
|
541
|
jpayne@69
|
542 /**
|
jpayne@69
|
543 * Normalizes a UTF-8 string and optionally records how source substrings
|
jpayne@69
|
544 * relate to changed and unchanged result substrings.
|
jpayne@69
|
545 *
|
jpayne@69
|
546 * Currently implemented completely only for "compose" modes,
|
jpayne@69
|
547 * such as for NFC, NFKC, and NFKC_Casefold
|
jpayne@69
|
548 * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
jpayne@69
|
549 * Otherwise currently converts to & from UTF-16 and does not support edits.
|
jpayne@69
|
550 *
|
jpayne@69
|
551 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
jpayne@69
|
552 * @param src Source UTF-8 string.
|
jpayne@69
|
553 * @param sink A ByteSink to which the normalized UTF-8 result string is written.
|
jpayne@69
|
554 * sink.Flush() is called at the end.
|
jpayne@69
|
555 * @param edits Records edits for index mapping, working with styled text,
|
jpayne@69
|
556 * and getting only changes (if any).
|
jpayne@69
|
557 * The Edits contents is undefined if any error occurs.
|
jpayne@69
|
558 * This function calls edits->reset() first unless
|
jpayne@69
|
559 * options includes U_EDITS_NO_RESET. edits can be nullptr.
|
jpayne@69
|
560 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
561 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
562 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
563 * function chaining. (See User Guide for details.)
|
jpayne@69
|
564 * @stable ICU 60
|
jpayne@69
|
565 */
|
jpayne@69
|
566 virtual void
|
jpayne@69
|
567 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
|
jpayne@69
|
568 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
|
jpayne@69
|
569
|
jpayne@69
|
570 /**
|
jpayne@69
|
571 * Appends the normalized form of the second string to the first string
|
jpayne@69
|
572 * (merging them at the boundary) and returns the first string.
|
jpayne@69
|
573 * The result is normalized if the first string was normalized.
|
jpayne@69
|
574 * The first and second strings must be different objects.
|
jpayne@69
|
575 * @param first string, should be normalized
|
jpayne@69
|
576 * @param second string, will be normalized
|
jpayne@69
|
577 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
578 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
579 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
580 * function chaining. (See User Guide for details.)
|
jpayne@69
|
581 * @return first
|
jpayne@69
|
582 * @stable ICU 4.4
|
jpayne@69
|
583 */
|
jpayne@69
|
584 virtual UnicodeString &
|
jpayne@69
|
585 normalizeSecondAndAppend(UnicodeString &first,
|
jpayne@69
|
586 const UnicodeString &second,
|
jpayne@69
|
587 UErrorCode &errorCode) const U_OVERRIDE;
|
jpayne@69
|
588 /**
|
jpayne@69
|
589 * Appends the second string to the first string
|
jpayne@69
|
590 * (merging them at the boundary) and returns the first string.
|
jpayne@69
|
591 * The result is normalized if both the strings were normalized.
|
jpayne@69
|
592 * The first and second strings must be different objects.
|
jpayne@69
|
593 * @param first string, should be normalized
|
jpayne@69
|
594 * @param second string, should be normalized
|
jpayne@69
|
595 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
596 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
597 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
598 * function chaining. (See User Guide for details.)
|
jpayne@69
|
599 * @return first
|
jpayne@69
|
600 * @stable ICU 4.4
|
jpayne@69
|
601 */
|
jpayne@69
|
602 virtual UnicodeString &
|
jpayne@69
|
603 append(UnicodeString &first,
|
jpayne@69
|
604 const UnicodeString &second,
|
jpayne@69
|
605 UErrorCode &errorCode) const U_OVERRIDE;
|
jpayne@69
|
606
|
jpayne@69
|
607 /**
|
jpayne@69
|
608 * Gets the decomposition mapping of c.
|
jpayne@69
|
609 * For details see the base class documentation.
|
jpayne@69
|
610 *
|
jpayne@69
|
611 * This function is independent of the mode of the Normalizer2.
|
jpayne@69
|
612 * @param c code point
|
jpayne@69
|
613 * @param decomposition String object which will be set to c's
|
jpayne@69
|
614 * decomposition mapping, if there is one.
|
jpayne@69
|
615 * @return TRUE if c has a decomposition, otherwise FALSE
|
jpayne@69
|
616 * @stable ICU 4.6
|
jpayne@69
|
617 */
|
jpayne@69
|
618 virtual UBool
|
jpayne@69
|
619 getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
|
jpayne@69
|
620
|
jpayne@69
|
621 /**
|
jpayne@69
|
622 * Gets the raw decomposition mapping of c.
|
jpayne@69
|
623 * For details see the base class documentation.
|
jpayne@69
|
624 *
|
jpayne@69
|
625 * This function is independent of the mode of the Normalizer2.
|
jpayne@69
|
626 * @param c code point
|
jpayne@69
|
627 * @param decomposition String object which will be set to c's
|
jpayne@69
|
628 * raw decomposition mapping, if there is one.
|
jpayne@69
|
629 * @return TRUE if c has a decomposition, otherwise FALSE
|
jpayne@69
|
630 * @stable ICU 49
|
jpayne@69
|
631 */
|
jpayne@69
|
632 virtual UBool
|
jpayne@69
|
633 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
|
jpayne@69
|
634
|
jpayne@69
|
635 /**
|
jpayne@69
|
636 * Performs pairwise composition of a & b and returns the composite if there is one.
|
jpayne@69
|
637 * For details see the base class documentation.
|
jpayne@69
|
638 *
|
jpayne@69
|
639 * This function is independent of the mode of the Normalizer2.
|
jpayne@69
|
640 * @param a A (normalization starter) code point.
|
jpayne@69
|
641 * @param b Another code point.
|
jpayne@69
|
642 * @return The non-negative composite code point if there is one; otherwise a negative value.
|
jpayne@69
|
643 * @stable ICU 49
|
jpayne@69
|
644 */
|
jpayne@69
|
645 virtual UChar32
|
jpayne@69
|
646 composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
|
jpayne@69
|
647
|
jpayne@69
|
648 /**
|
jpayne@69
|
649 * Gets the combining class of c.
|
jpayne@69
|
650 * The default implementation returns 0
|
jpayne@69
|
651 * but all standard implementations return the Unicode Canonical_Combining_Class value.
|
jpayne@69
|
652 * @param c code point
|
jpayne@69
|
653 * @return c's combining class
|
jpayne@69
|
654 * @stable ICU 49
|
jpayne@69
|
655 */
|
jpayne@69
|
656 virtual uint8_t
|
jpayne@69
|
657 getCombiningClass(UChar32 c) const U_OVERRIDE;
|
jpayne@69
|
658
|
jpayne@69
|
659 /**
|
jpayne@69
|
660 * Tests if the string is normalized.
|
jpayne@69
|
661 * For details see the Normalizer2 base class documentation.
|
jpayne@69
|
662 * @param s input string
|
jpayne@69
|
663 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
664 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
665 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
666 * function chaining. (See User Guide for details.)
|
jpayne@69
|
667 * @return TRUE if s is normalized
|
jpayne@69
|
668 * @stable ICU 4.4
|
jpayne@69
|
669 */
|
jpayne@69
|
670 virtual UBool
|
jpayne@69
|
671 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
|
jpayne@69
|
672 /**
|
jpayne@69
|
673 * Tests if the UTF-8 string is normalized.
|
jpayne@69
|
674 * Internally, in cases where the quickCheck() method would return "maybe"
|
jpayne@69
|
675 * (which is only possible for the two COMPOSE modes) this method
|
jpayne@69
|
676 * resolves to "yes" or "no" to provide a definitive result,
|
jpayne@69
|
677 * at the cost of doing more work in those cases.
|
jpayne@69
|
678 *
|
jpayne@69
|
679 * This works for all normalization modes,
|
jpayne@69
|
680 * but it is currently optimized for UTF-8 only for "compose" modes,
|
jpayne@69
|
681 * such as for NFC, NFKC, and NFKC_Casefold
|
jpayne@69
|
682 * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
jpayne@69
|
683 * For other modes it currently converts to UTF-16 and calls isNormalized().
|
jpayne@69
|
684 *
|
jpayne@69
|
685 * @param s UTF-8 input string
|
jpayne@69
|
686 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
687 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
688 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
689 * function chaining. (See User Guide for details.)
|
jpayne@69
|
690 * @return TRUE if s is normalized
|
jpayne@69
|
691 * @stable ICU 60
|
jpayne@69
|
692 */
|
jpayne@69
|
693 virtual UBool
|
jpayne@69
|
694 isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
|
jpayne@69
|
695 /**
|
jpayne@69
|
696 * Tests if the string is normalized.
|
jpayne@69
|
697 * For details see the Normalizer2 base class documentation.
|
jpayne@69
|
698 * @param s input string
|
jpayne@69
|
699 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
700 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
701 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
702 * function chaining. (See User Guide for details.)
|
jpayne@69
|
703 * @return UNormalizationCheckResult
|
jpayne@69
|
704 * @stable ICU 4.4
|
jpayne@69
|
705 */
|
jpayne@69
|
706 virtual UNormalizationCheckResult
|
jpayne@69
|
707 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
|
jpayne@69
|
708 /**
|
jpayne@69
|
709 * Returns the end of the normalized substring of the input string.
|
jpayne@69
|
710 * For details see the Normalizer2 base class documentation.
|
jpayne@69
|
711 * @param s input string
|
jpayne@69
|
712 * @param errorCode Standard ICU error code. Its input value must
|
jpayne@69
|
713 * pass the U_SUCCESS() test, or else the function returns
|
jpayne@69
|
714 * immediately. Check for U_FAILURE() on output or use with
|
jpayne@69
|
715 * function chaining. (See User Guide for details.)
|
jpayne@69
|
716 * @return "yes" span end index
|
jpayne@69
|
717 * @stable ICU 4.4
|
jpayne@69
|
718 */
|
jpayne@69
|
719 virtual int32_t
|
jpayne@69
|
720 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
|
jpayne@69
|
721
|
jpayne@69
|
722 /**
|
jpayne@69
|
723 * Tests if the character always has a normalization boundary before it,
|
jpayne@69
|
724 * regardless of context.
|
jpayne@69
|
725 * For details see the Normalizer2 base class documentation.
|
jpayne@69
|
726 * @param c character to test
|
jpayne@69
|
727 * @return TRUE if c has a normalization boundary before it
|
jpayne@69
|
728 * @stable ICU 4.4
|
jpayne@69
|
729 */
|
jpayne@69
|
730 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
|
jpayne@69
|
731
|
jpayne@69
|
732 /**
|
jpayne@69
|
733 * Tests if the character always has a normalization boundary after it,
|
jpayne@69
|
734 * regardless of context.
|
jpayne@69
|
735 * For details see the Normalizer2 base class documentation.
|
jpayne@69
|
736 * @param c character to test
|
jpayne@69
|
737 * @return TRUE if c has a normalization boundary after it
|
jpayne@69
|
738 * @stable ICU 4.4
|
jpayne@69
|
739 */
|
jpayne@69
|
740 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
|
jpayne@69
|
741
|
jpayne@69
|
742 /**
|
jpayne@69
|
743 * Tests if the character is normalization-inert.
|
jpayne@69
|
744 * For details see the Normalizer2 base class documentation.
|
jpayne@69
|
745 * @param c character to test
|
jpayne@69
|
746 * @return TRUE if c is normalization-inert
|
jpayne@69
|
747 * @stable ICU 4.4
|
jpayne@69
|
748 */
|
jpayne@69
|
749 virtual UBool isInert(UChar32 c) const U_OVERRIDE;
|
jpayne@69
|
750 private:
|
jpayne@69
|
751 UnicodeString &
|
jpayne@69
|
752 normalize(const UnicodeString &src,
|
jpayne@69
|
753 UnicodeString &dest,
|
jpayne@69
|
754 USetSpanCondition spanCondition,
|
jpayne@69
|
755 UErrorCode &errorCode) const;
|
jpayne@69
|
756
|
jpayne@69
|
757 void
|
jpayne@69
|
758 normalizeUTF8(uint32_t options, const char *src, int32_t length,
|
jpayne@69
|
759 ByteSink &sink, Edits *edits,
|
jpayne@69
|
760 USetSpanCondition spanCondition,
|
jpayne@69
|
761 UErrorCode &errorCode) const;
|
jpayne@69
|
762
|
jpayne@69
|
763 UnicodeString &
|
jpayne@69
|
764 normalizeSecondAndAppend(UnicodeString &first,
|
jpayne@69
|
765 const UnicodeString &second,
|
jpayne@69
|
766 UBool doNormalize,
|
jpayne@69
|
767 UErrorCode &errorCode) const;
|
jpayne@69
|
768
|
jpayne@69
|
769 const Normalizer2 &norm2;
|
jpayne@69
|
770 const UnicodeSet &set;
|
jpayne@69
|
771 };
|
jpayne@69
|
772
|
jpayne@69
|
773 U_NAMESPACE_END
|
jpayne@69
|
774
|
jpayne@69
|
775 #endif // !UCONFIG_NO_NORMALIZATION
|
jpayne@69
|
776
|
jpayne@69
|
777 #endif /* U_SHOW_CPLUSPLUS_API */
|
jpayne@69
|
778
|
jpayne@69
|
779 #endif // __NORMALIZER2_H__
|