Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/tblcoll.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 // © 2016 and later: Unicode, Inc. and others. | |
2 // License & terms of use: http://www.unicode.org/copyright.html | |
3 /* | |
4 ****************************************************************************** | |
5 * Copyright (C) 1996-2016, International Business Machines Corporation and | |
6 * others. All Rights Reserved. | |
7 ****************************************************************************** | |
8 */ | |
9 | |
10 /** | |
11 * \file | |
12 * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class. | |
13 */ | |
14 | |
15 /** | |
16 * File tblcoll.h | |
17 * | |
18 * Created by: Helena Shih | |
19 * | |
20 * Modification History: | |
21 * | |
22 * Date Name Description | |
23 * 2/5/97 aliu Added streamIn and streamOut methods. Added | |
24 * constructor which reads RuleBasedCollator object from | |
25 * a binary file. Added writeToFile method which streams | |
26 * RuleBasedCollator out to a binary file. The streamIn | |
27 * and streamOut methods use istream and ostream objects | |
28 * in binary mode. | |
29 * 2/12/97 aliu Modified to use TableCollationData sub-object to | |
30 * hold invariant data. | |
31 * 2/13/97 aliu Moved several methods into this class from Collation. | |
32 * Added a private RuleBasedCollator(Locale&) constructor, | |
33 * to be used by Collator::createDefault(). General | |
34 * clean up. | |
35 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy | |
36 * constructor and getDynamicClassID. | |
37 * 3/5/97 aliu Modified constructFromFile() to add parameter | |
38 * specifying whether or not binary loading is to be | |
39 * attempted. This is required for dynamic rule loading. | |
40 * 05/07/97 helena Added memory allocation error detection. | |
41 * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to | |
42 * use MergeCollation::getPattern. | |
43 * 6/20/97 helena Java class name change. | |
44 * 8/18/97 helena Added internal API documentation. | |
45 * 09/03/97 helena Added createCollationKeyValues(). | |
46 * 02/10/98 damiba Added compare with "length" parameter | |
47 * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java | |
48 * 04/23/99 stephen Removed EDecompositionMode, merged with | |
49 * Normalizer::EMode | |
50 * 06/14/99 stephen Removed kResourceBundleSuffix | |
51 * 11/02/99 helena Collator performance enhancements. Eliminates the | |
52 * UnicodeString construction and special case for NO_OP. | |
53 * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator | |
54 * internal state management. | |
55 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator | |
56 * to implementation file. | |
57 * 01/29/01 synwee Modified into a C++ wrapper which calls C API | |
58 * (ucol.h) | |
59 * 2012-2014 markus Rewritten in C++ again. | |
60 */ | |
61 | |
62 #ifndef TBLCOLL_H | |
63 #define TBLCOLL_H | |
64 | |
65 #include "unicode/utypes.h" | |
66 | |
67 #if U_SHOW_CPLUSPLUS_API | |
68 | |
69 #if !UCONFIG_NO_COLLATION | |
70 | |
71 #include "unicode/coll.h" | |
72 #include "unicode/locid.h" | |
73 #include "unicode/uiter.h" | |
74 #include "unicode/ucol.h" | |
75 | |
76 U_NAMESPACE_BEGIN | |
77 | |
78 struct CollationCacheEntry; | |
79 struct CollationData; | |
80 struct CollationSettings; | |
81 struct CollationTailoring; | |
82 /** | |
83 * @stable ICU 2.0 | |
84 */ | |
85 class StringSearch; | |
86 /** | |
87 * @stable ICU 2.0 | |
88 */ | |
89 class CollationElementIterator; | |
90 class CollationKey; | |
91 class SortKeyByteSink; | |
92 class UnicodeSet; | |
93 class UnicodeString; | |
94 class UVector64; | |
95 | |
96 /** | |
97 * The RuleBasedCollator class provides the implementation of | |
98 * Collator, using data-driven tables. The user can create a customized | |
99 * table-based collation. | |
100 * <p> | |
101 * For more information about the collation service see | |
102 * <a href="http://userguide.icu-project.org/collation">the User Guide</a>. | |
103 * <p> | |
104 * Collation service provides correct sorting orders for most locales supported in ICU. | |
105 * If specific data for a locale is not available, the orders eventually falls back | |
106 * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. | |
107 * <p> | |
108 * Sort ordering may be customized by providing your own set of rules. For more on | |
109 * this subject see the <a href="http://userguide.icu-project.org/collation/customization"> | |
110 * Collation Customization</a> section of the User Guide. | |
111 * <p> | |
112 * Note, RuleBasedCollator is not to be subclassed. | |
113 * @see Collator | |
114 */ | |
115 class U_I18N_API RuleBasedCollator : public Collator { | |
116 public: | |
117 /** | |
118 * RuleBasedCollator constructor. This takes the table rules and builds a | |
119 * collation table out of them. Please see RuleBasedCollator class | |
120 * description for more details on the collation rule syntax. | |
121 * @param rules the collation rules to build the collation table from. | |
122 * @param status reporting a success or an error. | |
123 * @stable ICU 2.0 | |
124 */ | |
125 RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); | |
126 | |
127 /** | |
128 * RuleBasedCollator constructor. This takes the table rules and builds a | |
129 * collation table out of them. Please see RuleBasedCollator class | |
130 * description for more details on the collation rule syntax. | |
131 * @param rules the collation rules to build the collation table from. | |
132 * @param collationStrength strength for comparison | |
133 * @param status reporting a success or an error. | |
134 * @stable ICU 2.0 | |
135 */ | |
136 RuleBasedCollator(const UnicodeString& rules, | |
137 ECollationStrength collationStrength, | |
138 UErrorCode& status); | |
139 | |
140 /** | |
141 * RuleBasedCollator constructor. This takes the table rules and builds a | |
142 * collation table out of them. Please see RuleBasedCollator class | |
143 * description for more details on the collation rule syntax. | |
144 * @param rules the collation rules to build the collation table from. | |
145 * @param decompositionMode the normalisation mode | |
146 * @param status reporting a success or an error. | |
147 * @stable ICU 2.0 | |
148 */ | |
149 RuleBasedCollator(const UnicodeString& rules, | |
150 UColAttributeValue decompositionMode, | |
151 UErrorCode& status); | |
152 | |
153 /** | |
154 * RuleBasedCollator constructor. This takes the table rules and builds a | |
155 * collation table out of them. Please see RuleBasedCollator class | |
156 * description for more details on the collation rule syntax. | |
157 * @param rules the collation rules to build the collation table from. | |
158 * @param collationStrength strength for comparison | |
159 * @param decompositionMode the normalisation mode | |
160 * @param status reporting a success or an error. | |
161 * @stable ICU 2.0 | |
162 */ | |
163 RuleBasedCollator(const UnicodeString& rules, | |
164 ECollationStrength collationStrength, | |
165 UColAttributeValue decompositionMode, | |
166 UErrorCode& status); | |
167 | |
168 #ifndef U_HIDE_INTERNAL_API | |
169 /** | |
170 * TODO: document & propose as public API | |
171 * @internal | |
172 */ | |
173 RuleBasedCollator(const UnicodeString &rules, | |
174 UParseError &parseError, UnicodeString &reason, | |
175 UErrorCode &errorCode); | |
176 #endif /* U_HIDE_INTERNAL_API */ | |
177 | |
178 /** | |
179 * Copy constructor. | |
180 * @param other the RuleBasedCollator object to be copied | |
181 * @stable ICU 2.0 | |
182 */ | |
183 RuleBasedCollator(const RuleBasedCollator& other); | |
184 | |
185 | |
186 /** Opens a collator from a collator binary image created using | |
187 * cloneBinary. Binary image used in instantiation of the | |
188 * collator remains owned by the user and should stay around for | |
189 * the lifetime of the collator. The API also takes a base collator | |
190 * which must be the root collator. | |
191 * @param bin binary image owned by the user and required through the | |
192 * lifetime of the collator | |
193 * @param length size of the image. If negative, the API will try to | |
194 * figure out the length of the image | |
195 * @param base Base collator, for lookup of untailored characters. | |
196 * Must be the root collator, must not be NULL. | |
197 * The base is required to be present through the lifetime of the collator. | |
198 * @param status for catching errors | |
199 * @return newly created collator | |
200 * @see cloneBinary | |
201 * @stable ICU 3.4 | |
202 */ | |
203 RuleBasedCollator(const uint8_t *bin, int32_t length, | |
204 const RuleBasedCollator *base, | |
205 UErrorCode &status); | |
206 | |
207 /** | |
208 * Destructor. | |
209 * @stable ICU 2.0 | |
210 */ | |
211 virtual ~RuleBasedCollator(); | |
212 | |
213 /** | |
214 * Assignment operator. | |
215 * @param other other RuleBasedCollator object to copy from. | |
216 * @stable ICU 2.0 | |
217 */ | |
218 RuleBasedCollator& operator=(const RuleBasedCollator& other); | |
219 | |
220 /** | |
221 * Returns true if argument is the same as this object. | |
222 * @param other Collator object to be compared. | |
223 * @return true if arguments is the same as this object. | |
224 * @stable ICU 2.0 | |
225 */ | |
226 virtual UBool operator==(const Collator& other) const; | |
227 | |
228 /** | |
229 * Makes a copy of this object. | |
230 * @return a copy of this object, owned by the caller | |
231 * @stable ICU 2.0 | |
232 */ | |
233 virtual RuleBasedCollator* clone() const; | |
234 | |
235 /** | |
236 * Creates a collation element iterator for the source string. The caller of | |
237 * this method is responsible for the memory management of the return | |
238 * pointer. | |
239 * @param source the string over which the CollationElementIterator will | |
240 * iterate. | |
241 * @return the collation element iterator of the source string using this as | |
242 * the based Collator. | |
243 * @stable ICU 2.2 | |
244 */ | |
245 virtual CollationElementIterator* createCollationElementIterator( | |
246 const UnicodeString& source) const; | |
247 | |
248 /** | |
249 * Creates a collation element iterator for the source. The caller of this | |
250 * method is responsible for the memory management of the returned pointer. | |
251 * @param source the CharacterIterator which produces the characters over | |
252 * which the CollationElementItgerator will iterate. | |
253 * @return the collation element iterator of the source using this as the | |
254 * based Collator. | |
255 * @stable ICU 2.2 | |
256 */ | |
257 virtual CollationElementIterator* createCollationElementIterator( | |
258 const CharacterIterator& source) const; | |
259 | |
260 // Make deprecated versions of Collator::compare() visible. | |
261 using Collator::compare; | |
262 | |
263 /** | |
264 * The comparison function compares the character data stored in two | |
265 * different strings. Returns information about whether a string is less | |
266 * than, greater than or equal to another string. | |
267 * @param source the source string to be compared with. | |
268 * @param target the string that is to be compared with the source string. | |
269 * @param status possible error code | |
270 * @return Returns an enum value. UCOL_GREATER if source is greater | |
271 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less | |
272 * than target | |
273 * @stable ICU 2.6 | |
274 **/ | |
275 virtual UCollationResult compare(const UnicodeString& source, | |
276 const UnicodeString& target, | |
277 UErrorCode &status) const; | |
278 | |
279 /** | |
280 * Does the same thing as compare but limits the comparison to a specified | |
281 * length | |
282 * @param source the source string to be compared with. | |
283 * @param target the string that is to be compared with the source string. | |
284 * @param length the length the comparison is limited to | |
285 * @param status possible error code | |
286 * @return Returns an enum value. UCOL_GREATER if source (up to the specified | |
287 * length) is greater than target; UCOL_EQUAL if source (up to specified | |
288 * length) is equal to target; UCOL_LESS if source (up to the specified | |
289 * length) is less than target. | |
290 * @stable ICU 2.6 | |
291 */ | |
292 virtual UCollationResult compare(const UnicodeString& source, | |
293 const UnicodeString& target, | |
294 int32_t length, | |
295 UErrorCode &status) const; | |
296 | |
297 /** | |
298 * The comparison function compares the character data stored in two | |
299 * different string arrays. Returns information about whether a string array | |
300 * is less than, greater than or equal to another string array. | |
301 * @param source the source string array to be compared with. | |
302 * @param sourceLength the length of the source string array. If this value | |
303 * is equal to -1, the string array is null-terminated. | |
304 * @param target the string that is to be compared with the source string. | |
305 * @param targetLength the length of the target string array. If this value | |
306 * is equal to -1, the string array is null-terminated. | |
307 * @param status possible error code | |
308 * @return Returns an enum value. UCOL_GREATER if source is greater | |
309 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less | |
310 * than target | |
311 * @stable ICU 2.6 | |
312 */ | |
313 virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, | |
314 const char16_t* target, int32_t targetLength, | |
315 UErrorCode &status) const; | |
316 | |
317 /** | |
318 * Compares two strings using the Collator. | |
319 * Returns whether the first one compares less than/equal to/greater than | |
320 * the second one. | |
321 * This version takes UCharIterator input. | |
322 * @param sIter the first ("source") string iterator | |
323 * @param tIter the second ("target") string iterator | |
324 * @param status ICU status | |
325 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER | |
326 * @stable ICU 4.2 | |
327 */ | |
328 virtual UCollationResult compare(UCharIterator &sIter, | |
329 UCharIterator &tIter, | |
330 UErrorCode &status) const; | |
331 | |
332 /** | |
333 * Compares two UTF-8 strings using the Collator. | |
334 * Returns whether the first one compares less than/equal to/greater than | |
335 * the second one. | |
336 * This version takes UTF-8 input. | |
337 * Note that a StringPiece can be implicitly constructed | |
338 * from a std::string or a NUL-terminated const char * string. | |
339 * @param source the first UTF-8 string | |
340 * @param target the second UTF-8 string | |
341 * @param status ICU status | |
342 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER | |
343 * @stable ICU 51 | |
344 */ | |
345 virtual UCollationResult compareUTF8(const StringPiece &source, | |
346 const StringPiece &target, | |
347 UErrorCode &status) const; | |
348 | |
349 /** | |
350 * Transforms the string into a series of characters | |
351 * that can be compared with CollationKey.compare(). | |
352 * | |
353 * Note that sort keys are often less efficient than simply doing comparison. | |
354 * For more details, see the ICU User Guide. | |
355 * | |
356 * @param source the source string. | |
357 * @param key the transformed key of the source string. | |
358 * @param status the error code status. | |
359 * @return the transformed key. | |
360 * @see CollationKey | |
361 * @stable ICU 2.0 | |
362 */ | |
363 virtual CollationKey& getCollationKey(const UnicodeString& source, | |
364 CollationKey& key, | |
365 UErrorCode& status) const; | |
366 | |
367 /** | |
368 * Transforms a specified region of the string into a series of characters | |
369 * that can be compared with CollationKey.compare. | |
370 * | |
371 * Note that sort keys are often less efficient than simply doing comparison. | |
372 * For more details, see the ICU User Guide. | |
373 * | |
374 * @param source the source string. | |
375 * @param sourceLength the length of the source string. | |
376 * @param key the transformed key of the source string. | |
377 * @param status the error code status. | |
378 * @return the transformed key. | |
379 * @see CollationKey | |
380 * @stable ICU 2.0 | |
381 */ | |
382 virtual CollationKey& getCollationKey(const char16_t *source, | |
383 int32_t sourceLength, | |
384 CollationKey& key, | |
385 UErrorCode& status) const; | |
386 | |
387 /** | |
388 * Generates the hash code for the rule-based collation object. | |
389 * @return the hash code. | |
390 * @stable ICU 2.0 | |
391 */ | |
392 virtual int32_t hashCode() const; | |
393 | |
394 #ifndef U_FORCE_HIDE_DEPRECATED_API | |
395 /** | |
396 * Gets the locale of the Collator | |
397 * @param type can be either requested, valid or actual locale. For more | |
398 * information see the definition of ULocDataLocaleType in | |
399 * uloc.h | |
400 * @param status the error code status. | |
401 * @return locale where the collation data lives. If the collator | |
402 * was instantiated from rules, locale is empty. | |
403 * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback | |
404 */ | |
405 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; | |
406 #endif // U_FORCE_HIDE_DEPRECATED_API | |
407 | |
408 /** | |
409 * Gets the tailoring rules for this collator. | |
410 * @return the collation tailoring from which this collator was created | |
411 * @stable ICU 2.0 | |
412 */ | |
413 const UnicodeString& getRules() const; | |
414 | |
415 /** | |
416 * Gets the version information for a Collator. | |
417 * @param info the version # information, the result will be filled in | |
418 * @stable ICU 2.0 | |
419 */ | |
420 virtual void getVersion(UVersionInfo info) const; | |
421 | |
422 #ifndef U_HIDE_DEPRECATED_API | |
423 /** | |
424 * Returns the maximum length of any expansion sequences that end with the | |
425 * specified comparison order. | |
426 * | |
427 * This is specific to the kind of collation element values and sequences | |
428 * returned by the CollationElementIterator. | |
429 * Call CollationElementIterator::getMaxExpansion() instead. | |
430 * | |
431 * @param order a collation order returned by CollationElementIterator::previous | |
432 * or CollationElementIterator::next. | |
433 * @return maximum size of the expansion sequences ending with the collation | |
434 * element, or 1 if the collation element does not occur at the end of | |
435 * any expansion sequence | |
436 * @see CollationElementIterator#getMaxExpansion | |
437 * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead. | |
438 */ | |
439 int32_t getMaxExpansion(int32_t order) const; | |
440 #endif /* U_HIDE_DEPRECATED_API */ | |
441 | |
442 /** | |
443 * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This | |
444 * method is to implement a simple version of RTTI, since not all C++ | |
445 * compilers support genuine RTTI. Polymorphic operator==() and clone() | |
446 * methods call this method. | |
447 * @return The class ID for this object. All objects of a given class have | |
448 * the same class ID. Objects of other classes have different class | |
449 * IDs. | |
450 * @stable ICU 2.0 | |
451 */ | |
452 virtual UClassID getDynamicClassID(void) const; | |
453 | |
454 /** | |
455 * Returns the class ID for this class. This is useful only for comparing to | |
456 * a return value from getDynamicClassID(). For example: | |
457 * <pre> | |
458 * Base* polymorphic_pointer = createPolymorphicObject(); | |
459 * if (polymorphic_pointer->getDynamicClassID() == | |
460 * Derived::getStaticClassID()) ... | |
461 * </pre> | |
462 * @return The class ID for all objects of this class. | |
463 * @stable ICU 2.0 | |
464 */ | |
465 static UClassID U_EXPORT2 getStaticClassID(void); | |
466 | |
467 #ifndef U_HIDE_DEPRECATED_API | |
468 /** | |
469 * Do not use this method: The caller and the ICU library might use different heaps. | |
470 * Use cloneBinary() instead which writes to caller-provided memory. | |
471 * | |
472 * Returns a binary format of this collator. | |
473 * @param length Returns the length of the data, in bytes | |
474 * @param status the error code status. | |
475 * @return memory, owned by the caller, of size 'length' bytes. | |
476 * @deprecated ICU 52. Use cloneBinary() instead. | |
477 */ | |
478 uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const; | |
479 #endif /* U_HIDE_DEPRECATED_API */ | |
480 | |
481 /** Creates a binary image of a collator. This binary image can be stored and | |
482 * later used to instantiate a collator using ucol_openBinary. | |
483 * This API supports preflighting. | |
484 * @param buffer a fill-in buffer to receive the binary image | |
485 * @param capacity capacity of the destination buffer | |
486 * @param status for catching errors | |
487 * @return size of the image | |
488 * @see ucol_openBinary | |
489 * @stable ICU 3.4 | |
490 */ | |
491 int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const; | |
492 | |
493 /** | |
494 * Returns current rules. Delta defines whether full rules are returned or | |
495 * just the tailoring. | |
496 * | |
497 * getRules(void) should normally be used instead. | |
498 * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales | |
499 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. | |
500 * @param buffer UnicodeString to store the result rules | |
501 * @stable ICU 2.2 | |
502 * @see UCOL_FULL_RULES | |
503 */ | |
504 void getRules(UColRuleOption delta, UnicodeString &buffer) const; | |
505 | |
506 /** | |
507 * Universal attribute setter | |
508 * @param attr attribute type | |
509 * @param value attribute value | |
510 * @param status to indicate whether the operation went on smoothly or there were errors | |
511 * @stable ICU 2.2 | |
512 */ | |
513 virtual void setAttribute(UColAttribute attr, UColAttributeValue value, | |
514 UErrorCode &status); | |
515 | |
516 /** | |
517 * Universal attribute getter. | |
518 * @param attr attribute type | |
519 * @param status to indicate whether the operation went on smoothly or there were errors | |
520 * @return attribute value | |
521 * @stable ICU 2.2 | |
522 */ | |
523 virtual UColAttributeValue getAttribute(UColAttribute attr, | |
524 UErrorCode &status) const; | |
525 | |
526 /** | |
527 * Sets the variable top to the top of the specified reordering group. | |
528 * The variable top determines the highest-sorting character | |
529 * which is affected by UCOL_ALTERNATE_HANDLING. | |
530 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. | |
531 * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, | |
532 * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; | |
533 * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group | |
534 * @param errorCode Standard ICU error code. Its input value must | |
535 * pass the U_SUCCESS() test, or else the function returns | |
536 * immediately. Check for U_FAILURE() on output or use with | |
537 * function chaining. (See User Guide for details.) | |
538 * @return *this | |
539 * @see getMaxVariable | |
540 * @stable ICU 53 | |
541 */ | |
542 virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode); | |
543 | |
544 /** | |
545 * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. | |
546 * @return the maximum variable reordering group. | |
547 * @see setMaxVariable | |
548 * @stable ICU 53 | |
549 */ | |
550 virtual UColReorderCode getMaxVariable() const; | |
551 | |
552 #ifndef U_FORCE_HIDE_DEPRECATED_API | |
553 /** | |
554 * Sets the variable top to the primary weight of the specified string. | |
555 * | |
556 * Beginning with ICU 53, the variable top is pinned to | |
557 * the top of one of the supported reordering groups, | |
558 * and it must not be beyond the last of those groups. | |
559 * See setMaxVariable(). | |
560 * @param varTop one or more (if contraction) char16_ts to which the variable top should be set | |
561 * @param len length of variable top string. If -1 it is considered to be zero terminated. | |
562 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> | |
563 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> | |
564 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond | |
565 * the last reordering group supported by setMaxVariable() | |
566 * @return variable top primary weight | |
567 * @deprecated ICU 53 Call setMaxVariable() instead. | |
568 */ | |
569 virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status); | |
570 | |
571 /** | |
572 * Sets the variable top to the primary weight of the specified string. | |
573 * | |
574 * Beginning with ICU 53, the variable top is pinned to | |
575 * the top of one of the supported reordering groups, | |
576 * and it must not be beyond the last of those groups. | |
577 * See setMaxVariable(). | |
578 * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set | |
579 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> | |
580 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> | |
581 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond | |
582 * the last reordering group supported by setMaxVariable() | |
583 * @return variable top primary weight | |
584 * @deprecated ICU 53 Call setMaxVariable() instead. | |
585 */ | |
586 virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status); | |
587 | |
588 /** | |
589 * Sets the variable top to the specified primary weight. | |
590 * | |
591 * Beginning with ICU 53, the variable top is pinned to | |
592 * the top of one of the supported reordering groups, | |
593 * and it must not be beyond the last of those groups. | |
594 * See setMaxVariable(). | |
595 * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop | |
596 * @param status error code | |
597 * @deprecated ICU 53 Call setMaxVariable() instead. | |
598 */ | |
599 virtual void setVariableTop(uint32_t varTop, UErrorCode &status); | |
600 #endif // U_FORCE_HIDE_DEPRECATED_API | |
601 | |
602 /** | |
603 * Gets the variable top value of a Collator. | |
604 * @param status error code (not changed by function). If error code is set, the return value is undefined. | |
605 * @return the variable top primary weight | |
606 * @see getMaxVariable | |
607 * @stable ICU 2.0 | |
608 */ | |
609 virtual uint32_t getVariableTop(UErrorCode &status) const; | |
610 | |
611 /** | |
612 * Get a UnicodeSet that contains all the characters and sequences tailored in | |
613 * this collator. | |
614 * @param status error code of the operation | |
615 * @return a pointer to a UnicodeSet object containing all the | |
616 * code points and sequences that may sort differently than | |
617 * in the root collator. The object must be disposed of by using delete | |
618 * @stable ICU 2.4 | |
619 */ | |
620 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; | |
621 | |
622 /** | |
623 * Get the sort key as an array of bytes from a UnicodeString. | |
624 * | |
625 * Note that sort keys are often less efficient than simply doing comparison. | |
626 * For more details, see the ICU User Guide. | |
627 * | |
628 * @param source string to be processed. | |
629 * @param result buffer to store result in. If NULL, number of bytes needed | |
630 * will be returned. | |
631 * @param resultLength length of the result buffer. If if not enough the | |
632 * buffer will be filled to capacity. | |
633 * @return Number of bytes needed for storing the sort key | |
634 * @stable ICU 2.0 | |
635 */ | |
636 virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, | |
637 int32_t resultLength) const; | |
638 | |
639 /** | |
640 * Get the sort key as an array of bytes from a char16_t buffer. | |
641 * | |
642 * Note that sort keys are often less efficient than simply doing comparison. | |
643 * For more details, see the ICU User Guide. | |
644 * | |
645 * @param source string to be processed. | |
646 * @param sourceLength length of string to be processed. If -1, the string | |
647 * is 0 terminated and length will be decided by the function. | |
648 * @param result buffer to store result in. If NULL, number of bytes needed | |
649 * will be returned. | |
650 * @param resultLength length of the result buffer. If if not enough the | |
651 * buffer will be filled to capacity. | |
652 * @return Number of bytes needed for storing the sort key | |
653 * @stable ICU 2.2 | |
654 */ | |
655 virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, | |
656 uint8_t *result, int32_t resultLength) const; | |
657 | |
658 /** | |
659 * Retrieves the reordering codes for this collator. | |
660 * @param dest The array to fill with the script ordering. | |
661 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function | |
662 * will only return the length of the result without writing any codes (pre-flighting). | |
663 * @param status A reference to an error code value, which must not indicate | |
664 * a failure before the function call. | |
665 * @return The length of the script ordering array. | |
666 * @see ucol_setReorderCodes | |
667 * @see Collator#getEquivalentReorderCodes | |
668 * @see Collator#setReorderCodes | |
669 * @stable ICU 4.8 | |
670 */ | |
671 virtual int32_t getReorderCodes(int32_t *dest, | |
672 int32_t destCapacity, | |
673 UErrorCode& status) const; | |
674 | |
675 /** | |
676 * Sets the ordering of scripts for this collator. | |
677 * @param reorderCodes An array of script codes in the new order. This can be NULL if the | |
678 * length is also set to 0. An empty array will clear any reordering codes on the collator. | |
679 * @param reorderCodesLength The length of reorderCodes. | |
680 * @param status error code | |
681 * @see ucol_setReorderCodes | |
682 * @see Collator#getReorderCodes | |
683 * @see Collator#getEquivalentReorderCodes | |
684 * @stable ICU 4.8 | |
685 */ | |
686 virtual void setReorderCodes(const int32_t* reorderCodes, | |
687 int32_t reorderCodesLength, | |
688 UErrorCode& status) ; | |
689 | |
690 /** | |
691 * Implements ucol_strcollUTF8(). | |
692 * @internal | |
693 */ | |
694 virtual UCollationResult internalCompareUTF8( | |
695 const char *left, int32_t leftLength, | |
696 const char *right, int32_t rightLength, | |
697 UErrorCode &errorCode) const; | |
698 | |
699 /** Get the short definition string for a collator. This internal API harvests the collator's | |
700 * locale and the attribute set and produces a string that can be used for opening | |
701 * a collator with the same attributes using the ucol_openFromShortString API. | |
702 * This string will be normalized. | |
703 * The structure and the syntax of the string is defined in the "Naming collators" | |
704 * section of the users guide: | |
705 * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme | |
706 * This function supports preflighting. | |
707 * | |
708 * This is internal, and intended to be used with delegate converters. | |
709 * | |
710 * @param locale a locale that will appear as a collators locale in the resulting | |
711 * short string definition. If NULL, the locale will be harvested | |
712 * from the collator. | |
713 * @param buffer space to hold the resulting string | |
714 * @param capacity capacity of the buffer | |
715 * @param status for returning errors. All the preflighting errors are featured | |
716 * @return length of the resulting string | |
717 * @see ucol_openFromShortString | |
718 * @see ucol_normalizeShortDefinitionString | |
719 * @see ucol_getShortDefinitionString | |
720 * @internal | |
721 */ | |
722 virtual int32_t internalGetShortDefinitionString(const char *locale, | |
723 char *buffer, | |
724 int32_t capacity, | |
725 UErrorCode &status) const; | |
726 | |
727 /** | |
728 * Implements ucol_nextSortKeyPart(). | |
729 * @internal | |
730 */ | |
731 virtual int32_t internalNextSortKeyPart( | |
732 UCharIterator *iter, uint32_t state[2], | |
733 uint8_t *dest, int32_t count, UErrorCode &errorCode) const; | |
734 | |
735 // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API | |
736 /** | |
737 * Only for use in ucol_openRules(). | |
738 * @internal | |
739 */ | |
740 RuleBasedCollator(); | |
741 | |
742 #ifndef U_HIDE_INTERNAL_API | |
743 /** | |
744 * Implements ucol_getLocaleByType(). | |
745 * Needed because the lifetime of the locale ID string must match that of the collator. | |
746 * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper. | |
747 * @internal | |
748 */ | |
749 const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const; | |
750 | |
751 /** | |
752 * Implements ucol_getContractionsAndExpansions(). | |
753 * Gets this collator's sets of contraction strings and/or | |
754 * characters and strings that map to multiple collation elements (expansions). | |
755 * If addPrefixes is TRUE, then contractions that are expressed as | |
756 * prefix/pre-context rules are included. | |
757 * @param contractions if not NULL, the set to hold the contractions | |
758 * @param expansions if not NULL, the set to hold the expansions | |
759 * @param addPrefixes include prefix contextual mappings | |
760 * @param errorCode in/out ICU error code | |
761 * @internal | |
762 */ | |
763 void internalGetContractionsAndExpansions( | |
764 UnicodeSet *contractions, UnicodeSet *expansions, | |
765 UBool addPrefixes, UErrorCode &errorCode) const; | |
766 | |
767 /** | |
768 * Adds the contractions that start with character c to the set. | |
769 * Ignores prefixes. Used by AlphabeticIndex. | |
770 * @internal | |
771 */ | |
772 void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const; | |
773 | |
774 /** | |
775 * Implements from-rule constructors, and ucol_openRules(). | |
776 * @internal | |
777 */ | |
778 void internalBuildTailoring( | |
779 const UnicodeString &rules, | |
780 int32_t strength, | |
781 UColAttributeValue decompositionMode, | |
782 UParseError *outParseError, UnicodeString *outReason, | |
783 UErrorCode &errorCode); | |
784 | |
785 /** @internal */ | |
786 static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) { | |
787 return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc)); | |
788 } | |
789 /** @internal */ | |
790 static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) { | |
791 return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc)); | |
792 } | |
793 | |
794 /** | |
795 * Appends the CEs for the string to the vector. | |
796 * @internal for tests & tools | |
797 */ | |
798 void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const; | |
799 #endif // U_HIDE_INTERNAL_API | |
800 | |
801 protected: | |
802 /** | |
803 * Used internally by registration to define the requested and valid locales. | |
804 * @param requestedLocale the requested locale | |
805 * @param validLocale the valid locale | |
806 * @param actualLocale the actual locale | |
807 * @internal | |
808 */ | |
809 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); | |
810 | |
811 private: | |
812 friend class CollationElementIterator; | |
813 friend class Collator; | |
814 | |
815 RuleBasedCollator(const CollationCacheEntry *entry); | |
816 | |
817 /** | |
818 * Enumeration of attributes that are relevant for short definition strings | |
819 * (e.g., ucol_getShortDefinitionString()). | |
820 * Effectively extends UColAttribute. | |
821 */ | |
822 enum Attributes { | |
823 ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT, | |
824 ATTR_LIMIT | |
825 }; | |
826 | |
827 void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode); | |
828 | |
829 // Both lengths must be <0 or else both must be >=0. | |
830 UCollationResult doCompare(const char16_t *left, int32_t leftLength, | |
831 const char16_t *right, int32_t rightLength, | |
832 UErrorCode &errorCode) const; | |
833 UCollationResult doCompare(const uint8_t *left, int32_t leftLength, | |
834 const uint8_t *right, int32_t rightLength, | |
835 UErrorCode &errorCode) const; | |
836 | |
837 void writeSortKey(const char16_t *s, int32_t length, | |
838 SortKeyByteSink &sink, UErrorCode &errorCode) const; | |
839 | |
840 void writeIdenticalLevel(const char16_t *s, const char16_t *limit, | |
841 SortKeyByteSink &sink, UErrorCode &errorCode) const; | |
842 | |
843 const CollationSettings &getDefaultSettings() const; | |
844 | |
845 void setAttributeDefault(int32_t attribute) { | |
846 explicitlySetAttributes &= ~((uint32_t)1 << attribute); | |
847 } | |
848 void setAttributeExplicitly(int32_t attribute) { | |
849 explicitlySetAttributes |= (uint32_t)1 << attribute; | |
850 } | |
851 UBool attributeHasBeenSetExplicitly(int32_t attribute) const { | |
852 // assert(0 <= attribute < ATTR_LIMIT); | |
853 return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0); | |
854 } | |
855 | |
856 /** | |
857 * Tests whether a character is "unsafe" for use as a collation starting point. | |
858 * | |
859 * @param c code point or code unit | |
860 * @return TRUE if c is unsafe | |
861 * @see CollationElementIterator#setOffset(int) | |
862 */ | |
863 UBool isUnsafe(UChar32 c) const; | |
864 | |
865 static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode); | |
866 UBool initMaxExpansions(UErrorCode &errorCode) const; | |
867 | |
868 void setFastLatinOptions(CollationSettings &ownedSettings) const; | |
869 | |
870 const CollationData *data; | |
871 const CollationSettings *settings; // reference-counted | |
872 const CollationTailoring *tailoring; // alias of cacheEntry->tailoring | |
873 const CollationCacheEntry *cacheEntry; // reference-counted | |
874 Locale validLocale; | |
875 uint32_t explicitlySetAttributes; | |
876 | |
877 UBool actualLocaleIsSameAsValid; | |
878 }; | |
879 | |
880 U_NAMESPACE_END | |
881 | |
882 #endif // !UCONFIG_NO_COLLATION | |
883 | |
884 #endif /* U_SHOW_CPLUSPLUS_API */ | |
885 | |
886 #endif // TBLCOLL_H |