annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/coleitr.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 ******************************************************************************
jpayne@69 5 * Copyright (C) 1997-2014, International Business Machines
jpayne@69 6 * Corporation and others. All Rights Reserved.
jpayne@69 7 ******************************************************************************
jpayne@69 8 */
jpayne@69 9
jpayne@69 10 /**
jpayne@69 11 * \file
jpayne@69 12 * \brief C++ API: Collation Element Iterator.
jpayne@69 13 */
jpayne@69 14
jpayne@69 15 /**
jpayne@69 16 * File coleitr.h
jpayne@69 17 *
jpayne@69 18 * Created by: Helena Shih
jpayne@69 19 *
jpayne@69 20 * Modification History:
jpayne@69 21 *
jpayne@69 22 * Date Name Description
jpayne@69 23 *
jpayne@69 24 * 8/18/97 helena Added internal API documentation.
jpayne@69 25 * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java
jpayne@69 26 * 12/10/99 aliu Ported Thai collation support from Java.
jpayne@69 27 * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h)
jpayne@69 28 * 02/19/01 swquek Removed CollationElementsIterator() since it is
jpayne@69 29 * private constructor and no calls are made to it
jpayne@69 30 * 2012-2014 markus Rewritten in C++ again.
jpayne@69 31 */
jpayne@69 32
jpayne@69 33 #ifndef COLEITR_H
jpayne@69 34 #define COLEITR_H
jpayne@69 35
jpayne@69 36 #include "unicode/utypes.h"
jpayne@69 37
jpayne@69 38 #if U_SHOW_CPLUSPLUS_API
jpayne@69 39
jpayne@69 40 #if !UCONFIG_NO_COLLATION
jpayne@69 41
jpayne@69 42 #include "unicode/unistr.h"
jpayne@69 43 #include "unicode/uobject.h"
jpayne@69 44
jpayne@69 45 struct UCollationElements;
jpayne@69 46 struct UHashtable;
jpayne@69 47
jpayne@69 48 U_NAMESPACE_BEGIN
jpayne@69 49
jpayne@69 50 struct CollationData;
jpayne@69 51
jpayne@69 52 class CharacterIterator;
jpayne@69 53 class CollationIterator;
jpayne@69 54 class RuleBasedCollator;
jpayne@69 55 class UCollationPCE;
jpayne@69 56 class UVector32;
jpayne@69 57
jpayne@69 58 /**
jpayne@69 59 * The CollationElementIterator class is used as an iterator to walk through
jpayne@69 60 * each character of an international string. Use the iterator to return the
jpayne@69 61 * ordering priority of the positioned character. The ordering priority of a
jpayne@69 62 * character, which we refer to as a key, defines how a character is collated in
jpayne@69 63 * the given collation object.
jpayne@69 64 * For example, consider the following in Slovak and in traditional Spanish collation:
jpayne@69 65 * <pre>
jpayne@69 66 * "ca" -> the first key is key('c') and second key is key('a').
jpayne@69 67 * "cha" -> the first key is key('ch') and second key is key('a').</pre>
jpayne@69 68 * And in German phonebook collation,
jpayne@69 69 * <pre> \htmlonly "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
jpayne@69 70 * the third key is key('b'). \endhtmlonly </pre>
jpayne@69 71 * The key of a character, is an integer composed of primary order(short),
jpayne@69 72 * secondary order(char), and tertiary order(char). Java strictly defines the
jpayne@69 73 * size and signedness of its primitive data types. Therefore, the static
jpayne@69 74 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return
jpayne@69 75 * int32_t to ensure the correctness of the key value.
jpayne@69 76 * <p>Example of the iterator usage: (without error checking)
jpayne@69 77 * <pre>
jpayne@69 78 * \code
jpayne@69 79 * void CollationElementIterator_Example()
jpayne@69 80 * {
jpayne@69 81 * UnicodeString str = "This is a test";
jpayne@69 82 * UErrorCode success = U_ZERO_ERROR;
jpayne@69 83 * RuleBasedCollator* rbc =
jpayne@69 84 * (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
jpayne@69 85 * CollationElementIterator* c =
jpayne@69 86 * rbc->createCollationElementIterator( str );
jpayne@69 87 * int32_t order = c->next(success);
jpayne@69 88 * c->reset();
jpayne@69 89 * order = c->previous(success);
jpayne@69 90 * delete c;
jpayne@69 91 * delete rbc;
jpayne@69 92 * }
jpayne@69 93 * \endcode
jpayne@69 94 * </pre>
jpayne@69 95 * <p>
jpayne@69 96 * The method next() returns the collation order of the next character based on
jpayne@69 97 * the comparison level of the collator. The method previous() returns the
jpayne@69 98 * collation order of the previous character based on the comparison level of
jpayne@69 99 * the collator. The Collation Element Iterator moves only in one direction
jpayne@69 100 * between calls to reset(), setOffset(), or setText(). That is, next()
jpayne@69 101 * and previous() can not be inter-used. Whenever previous() is to be called after
jpayne@69 102 * next() or vice versa, reset(), setOffset() or setText() has to be called first
jpayne@69 103 * to reset the status, shifting pointers to either the end or the start of
jpayne@69 104 * the string (reset() or setText()), or the specified position (setOffset()).
jpayne@69 105 * Hence at the next call of next() or previous(), the first or last collation order,
jpayne@69 106 * or collation order at the spefcifieid position will be returned. If a change of
jpayne@69 107 * direction is done without one of these calls, the result is undefined.
jpayne@69 108 * <p>
jpayne@69 109 * The result of a forward iterate (next()) and reversed result of the backward
jpayne@69 110 * iterate (previous()) on the same string are equivalent, if collation orders
jpayne@69 111 * with the value 0 are ignored.
jpayne@69 112 * Character based on the comparison level of the collator. A collation order
jpayne@69 113 * consists of primary order, secondary order and tertiary order. The data
jpayne@69 114 * type of the collation order is <strong>int32_t</strong>.
jpayne@69 115 *
jpayne@69 116 * Note, CollationElementIterator should not be subclassed.
jpayne@69 117 * @see Collator
jpayne@69 118 * @see RuleBasedCollator
jpayne@69 119 * @version 1.8 Jan 16 2001
jpayne@69 120 */
jpayne@69 121 class U_I18N_API CollationElementIterator U_FINAL : public UObject {
jpayne@69 122 public:
jpayne@69 123
jpayne@69 124 // CollationElementIterator public data member ------------------------------
jpayne@69 125
jpayne@69 126 enum {
jpayne@69 127 /**
jpayne@69 128 * NULLORDER indicates that an error has occured while processing
jpayne@69 129 * @stable ICU 2.0
jpayne@69 130 */
jpayne@69 131 NULLORDER = (int32_t)0xffffffff
jpayne@69 132 };
jpayne@69 133
jpayne@69 134 // CollationElementIterator public constructor/destructor -------------------
jpayne@69 135
jpayne@69 136 /**
jpayne@69 137 * Copy constructor.
jpayne@69 138 *
jpayne@69 139 * @param other the object to be copied from
jpayne@69 140 * @stable ICU 2.0
jpayne@69 141 */
jpayne@69 142 CollationElementIterator(const CollationElementIterator& other);
jpayne@69 143
jpayne@69 144 /**
jpayne@69 145 * Destructor
jpayne@69 146 * @stable ICU 2.0
jpayne@69 147 */
jpayne@69 148 virtual ~CollationElementIterator();
jpayne@69 149
jpayne@69 150 // CollationElementIterator public methods ----------------------------------
jpayne@69 151
jpayne@69 152 /**
jpayne@69 153 * Returns true if "other" is the same as "this"
jpayne@69 154 *
jpayne@69 155 * @param other the object to be compared
jpayne@69 156 * @return true if "other" is the same as "this"
jpayne@69 157 * @stable ICU 2.0
jpayne@69 158 */
jpayne@69 159 UBool operator==(const CollationElementIterator& other) const;
jpayne@69 160
jpayne@69 161 /**
jpayne@69 162 * Returns true if "other" is not the same as "this".
jpayne@69 163 *
jpayne@69 164 * @param other the object to be compared
jpayne@69 165 * @return true if "other" is not the same as "this"
jpayne@69 166 * @stable ICU 2.0
jpayne@69 167 */
jpayne@69 168 UBool operator!=(const CollationElementIterator& other) const;
jpayne@69 169
jpayne@69 170 /**
jpayne@69 171 * Resets the cursor to the beginning of the string.
jpayne@69 172 * @stable ICU 2.0
jpayne@69 173 */
jpayne@69 174 void reset(void);
jpayne@69 175
jpayne@69 176 /**
jpayne@69 177 * Gets the ordering priority of the next character in the string.
jpayne@69 178 * @param status the error code status.
jpayne@69 179 * @return the next character's ordering. otherwise returns NULLORDER if an
jpayne@69 180 * error has occured or if the end of string has been reached
jpayne@69 181 * @stable ICU 2.0
jpayne@69 182 */
jpayne@69 183 int32_t next(UErrorCode& status);
jpayne@69 184
jpayne@69 185 /**
jpayne@69 186 * Get the ordering priority of the previous collation element in the string.
jpayne@69 187 * @param status the error code status.
jpayne@69 188 * @return the previous element's ordering. otherwise returns NULLORDER if an
jpayne@69 189 * error has occured or if the start of string has been reached
jpayne@69 190 * @stable ICU 2.0
jpayne@69 191 */
jpayne@69 192 int32_t previous(UErrorCode& status);
jpayne@69 193
jpayne@69 194 /**
jpayne@69 195 * Gets the primary order of a collation order.
jpayne@69 196 * @param order the collation order
jpayne@69 197 * @return the primary order of a collation order.
jpayne@69 198 * @stable ICU 2.0
jpayne@69 199 */
jpayne@69 200 static inline int32_t primaryOrder(int32_t order);
jpayne@69 201
jpayne@69 202 /**
jpayne@69 203 * Gets the secondary order of a collation order.
jpayne@69 204 * @param order the collation order
jpayne@69 205 * @return the secondary order of a collation order.
jpayne@69 206 * @stable ICU 2.0
jpayne@69 207 */
jpayne@69 208 static inline int32_t secondaryOrder(int32_t order);
jpayne@69 209
jpayne@69 210 /**
jpayne@69 211 * Gets the tertiary order of a collation order.
jpayne@69 212 * @param order the collation order
jpayne@69 213 * @return the tertiary order of a collation order.
jpayne@69 214 * @stable ICU 2.0
jpayne@69 215 */
jpayne@69 216 static inline int32_t tertiaryOrder(int32_t order);
jpayne@69 217
jpayne@69 218 /**
jpayne@69 219 * Return the maximum length of any expansion sequences that end with the
jpayne@69 220 * specified comparison order.
jpayne@69 221 * @param order a collation order returned by previous or next.
jpayne@69 222 * @return maximum size of the expansion sequences ending with the collation
jpayne@69 223 * element or 1 if collation element does not occur at the end of any
jpayne@69 224 * expansion sequence
jpayne@69 225 * @stable ICU 2.0
jpayne@69 226 */
jpayne@69 227 int32_t getMaxExpansion(int32_t order) const;
jpayne@69 228
jpayne@69 229 /**
jpayne@69 230 * Gets the comparison order in the desired strength. Ignore the other
jpayne@69 231 * differences.
jpayne@69 232 * @param order The order value
jpayne@69 233 * @stable ICU 2.0
jpayne@69 234 */
jpayne@69 235 int32_t strengthOrder(int32_t order) const;
jpayne@69 236
jpayne@69 237 /**
jpayne@69 238 * Sets the source string.
jpayne@69 239 * @param str the source string.
jpayne@69 240 * @param status the error code status.
jpayne@69 241 * @stable ICU 2.0
jpayne@69 242 */
jpayne@69 243 void setText(const UnicodeString& str, UErrorCode& status);
jpayne@69 244
jpayne@69 245 /**
jpayne@69 246 * Sets the source string.
jpayne@69 247 * @param str the source character iterator.
jpayne@69 248 * @param status the error code status.
jpayne@69 249 * @stable ICU 2.0
jpayne@69 250 */
jpayne@69 251 void setText(CharacterIterator& str, UErrorCode& status);
jpayne@69 252
jpayne@69 253 /**
jpayne@69 254 * Checks if a comparison order is ignorable.
jpayne@69 255 * @param order the collation order.
jpayne@69 256 * @return TRUE if a character is ignorable, FALSE otherwise.
jpayne@69 257 * @stable ICU 2.0
jpayne@69 258 */
jpayne@69 259 static inline UBool isIgnorable(int32_t order);
jpayne@69 260
jpayne@69 261 /**
jpayne@69 262 * Gets the offset of the currently processed character in the source string.
jpayne@69 263 * @return the offset of the character.
jpayne@69 264 * @stable ICU 2.0
jpayne@69 265 */
jpayne@69 266 int32_t getOffset(void) const;
jpayne@69 267
jpayne@69 268 /**
jpayne@69 269 * Sets the offset of the currently processed character in the source string.
jpayne@69 270 * @param newOffset the new offset.
jpayne@69 271 * @param status the error code status.
jpayne@69 272 * @return the offset of the character.
jpayne@69 273 * @stable ICU 2.0
jpayne@69 274 */
jpayne@69 275 void setOffset(int32_t newOffset, UErrorCode& status);
jpayne@69 276
jpayne@69 277 /**
jpayne@69 278 * ICU "poor man's RTTI", returns a UClassID for the actual class.
jpayne@69 279 *
jpayne@69 280 * @stable ICU 2.2
jpayne@69 281 */
jpayne@69 282 virtual UClassID getDynamicClassID() const;
jpayne@69 283
jpayne@69 284 /**
jpayne@69 285 * ICU "poor man's RTTI", returns a UClassID for this class.
jpayne@69 286 *
jpayne@69 287 * @stable ICU 2.2
jpayne@69 288 */
jpayne@69 289 static UClassID U_EXPORT2 getStaticClassID();
jpayne@69 290
jpayne@69 291 #ifndef U_HIDE_INTERNAL_API
jpayne@69 292 /** @internal */
jpayne@69 293 static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) {
jpayne@69 294 return reinterpret_cast<CollationElementIterator *>(uc);
jpayne@69 295 }
jpayne@69 296 /** @internal */
jpayne@69 297 static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) {
jpayne@69 298 return reinterpret_cast<const CollationElementIterator *>(uc);
jpayne@69 299 }
jpayne@69 300 /** @internal */
jpayne@69 301 inline UCollationElements *toUCollationElements() {
jpayne@69 302 return reinterpret_cast<UCollationElements *>(this);
jpayne@69 303 }
jpayne@69 304 /** @internal */
jpayne@69 305 inline const UCollationElements *toUCollationElements() const {
jpayne@69 306 return reinterpret_cast<const UCollationElements *>(this);
jpayne@69 307 }
jpayne@69 308 #endif // U_HIDE_INTERNAL_API
jpayne@69 309
jpayne@69 310 private:
jpayne@69 311 friend class RuleBasedCollator;
jpayne@69 312 friend class UCollationPCE;
jpayne@69 313
jpayne@69 314 /**
jpayne@69 315 * CollationElementIterator constructor. This takes the source string and the
jpayne@69 316 * collation object. The cursor will walk thru the source string based on the
jpayne@69 317 * predefined collation rules. If the source string is empty, NULLORDER will
jpayne@69 318 * be returned on the calls to next().
jpayne@69 319 * @param sourceText the source string.
jpayne@69 320 * @param order the collation object.
jpayne@69 321 * @param status the error code status.
jpayne@69 322 */
jpayne@69 323 CollationElementIterator(const UnicodeString& sourceText,
jpayne@69 324 const RuleBasedCollator* order, UErrorCode& status);
jpayne@69 325 // Note: The constructors should take settings & tailoring, not a collator,
jpayne@69 326 // to avoid circular dependencies.
jpayne@69 327 // However, for operator==() we would need to be able to compare tailoring data for equality
jpayne@69 328 // without making CollationData or CollationTailoring depend on TailoredSet.
jpayne@69 329 // (See the implementation of RuleBasedCollator::operator==().)
jpayne@69 330 // That might require creating an intermediate class that would be used
jpayne@69 331 // by both CollationElementIterator and RuleBasedCollator
jpayne@69 332 // but only contain the part of RBC== related to data and rules.
jpayne@69 333
jpayne@69 334 /**
jpayne@69 335 * CollationElementIterator constructor. This takes the source string and the
jpayne@69 336 * collation object. The cursor will walk thru the source string based on the
jpayne@69 337 * predefined collation rules. If the source string is empty, NULLORDER will
jpayne@69 338 * be returned on the calls to next().
jpayne@69 339 * @param sourceText the source string.
jpayne@69 340 * @param order the collation object.
jpayne@69 341 * @param status the error code status.
jpayne@69 342 */
jpayne@69 343 CollationElementIterator(const CharacterIterator& sourceText,
jpayne@69 344 const RuleBasedCollator* order, UErrorCode& status);
jpayne@69 345
jpayne@69 346 /**
jpayne@69 347 * Assignment operator
jpayne@69 348 *
jpayne@69 349 * @param other the object to be copied
jpayne@69 350 */
jpayne@69 351 const CollationElementIterator&
jpayne@69 352 operator=(const CollationElementIterator& other);
jpayne@69 353
jpayne@69 354 CollationElementIterator(); // default constructor not implemented
jpayne@69 355
jpayne@69 356 /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
jpayne@69 357 inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; }
jpayne@69 358
jpayne@69 359 static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode);
jpayne@69 360
jpayne@69 361 static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order);
jpayne@69 362
jpayne@69 363 // CollationElementIterator private data members ----------------------------
jpayne@69 364
jpayne@69 365 CollationIterator *iter_; // owned
jpayne@69 366 const RuleBasedCollator *rbc_; // aliased
jpayne@69 367 uint32_t otherHalf_;
jpayne@69 368 /**
jpayne@69 369 * <0: backwards; 0: just after reset() (previous() begins from end);
jpayne@69 370 * 1: just after setOffset(); >1: forward
jpayne@69 371 */
jpayne@69 372 int8_t dir_;
jpayne@69 373 /**
jpayne@69 374 * Stores offsets from expansions and from unsafe-backwards iteration,
jpayne@69 375 * so that getOffset() returns intermediate offsets for the CEs
jpayne@69 376 * that are consistent with forward iteration.
jpayne@69 377 */
jpayne@69 378 UVector32 *offsets_;
jpayne@69 379
jpayne@69 380 UnicodeString string_;
jpayne@69 381 };
jpayne@69 382
jpayne@69 383 // CollationElementIterator inline method definitions --------------------------
jpayne@69 384
jpayne@69 385 inline int32_t CollationElementIterator::primaryOrder(int32_t order)
jpayne@69 386 {
jpayne@69 387 return (order >> 16) & 0xffff;
jpayne@69 388 }
jpayne@69 389
jpayne@69 390 inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
jpayne@69 391 {
jpayne@69 392 return (order >> 8) & 0xff;
jpayne@69 393 }
jpayne@69 394
jpayne@69 395 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
jpayne@69 396 {
jpayne@69 397 return order & 0xff;
jpayne@69 398 }
jpayne@69 399
jpayne@69 400 inline UBool CollationElementIterator::isIgnorable(int32_t order)
jpayne@69 401 {
jpayne@69 402 return (order & 0xffff0000) == 0;
jpayne@69 403 }
jpayne@69 404
jpayne@69 405 U_NAMESPACE_END
jpayne@69 406
jpayne@69 407 #endif /* #if !UCONFIG_NO_COLLATION */
jpayne@69 408
jpayne@69 409 #endif /* U_SHOW_CPLUSPLUS_API */
jpayne@69 410
jpayne@69 411 #endif