jpayne@69
|
1 // © 2016 and later: Unicode, Inc. and others.
|
jpayne@69
|
2 // License & terms of use: http://www.unicode.org/copyright.html
|
jpayne@69
|
3 /*
|
jpayne@69
|
4 ******************************************************************************
|
jpayne@69
|
5 * Copyright (C) 1997-2014, International Business Machines
|
jpayne@69
|
6 * Corporation and others. All Rights Reserved.
|
jpayne@69
|
7 ******************************************************************************
|
jpayne@69
|
8 */
|
jpayne@69
|
9
|
jpayne@69
|
10 /**
|
jpayne@69
|
11 * \file
|
jpayne@69
|
12 * \brief C++ API: Collation Element Iterator.
|
jpayne@69
|
13 */
|
jpayne@69
|
14
|
jpayne@69
|
15 /**
|
jpayne@69
|
16 * File coleitr.h
|
jpayne@69
|
17 *
|
jpayne@69
|
18 * Created by: Helena Shih
|
jpayne@69
|
19 *
|
jpayne@69
|
20 * Modification History:
|
jpayne@69
|
21 *
|
jpayne@69
|
22 * Date Name Description
|
jpayne@69
|
23 *
|
jpayne@69
|
24 * 8/18/97 helena Added internal API documentation.
|
jpayne@69
|
25 * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java
|
jpayne@69
|
26 * 12/10/99 aliu Ported Thai collation support from Java.
|
jpayne@69
|
27 * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h)
|
jpayne@69
|
28 * 02/19/01 swquek Removed CollationElementsIterator() since it is
|
jpayne@69
|
29 * private constructor and no calls are made to it
|
jpayne@69
|
30 * 2012-2014 markus Rewritten in C++ again.
|
jpayne@69
|
31 */
|
jpayne@69
|
32
|
jpayne@69
|
33 #ifndef COLEITR_H
|
jpayne@69
|
34 #define COLEITR_H
|
jpayne@69
|
35
|
jpayne@69
|
36 #include "unicode/utypes.h"
|
jpayne@69
|
37
|
jpayne@69
|
38 #if U_SHOW_CPLUSPLUS_API
|
jpayne@69
|
39
|
jpayne@69
|
40 #if !UCONFIG_NO_COLLATION
|
jpayne@69
|
41
|
jpayne@69
|
42 #include "unicode/unistr.h"
|
jpayne@69
|
43 #include "unicode/uobject.h"
|
jpayne@69
|
44
|
jpayne@69
|
45 struct UCollationElements;
|
jpayne@69
|
46 struct UHashtable;
|
jpayne@69
|
47
|
jpayne@69
|
48 U_NAMESPACE_BEGIN
|
jpayne@69
|
49
|
jpayne@69
|
50 struct CollationData;
|
jpayne@69
|
51
|
jpayne@69
|
52 class CharacterIterator;
|
jpayne@69
|
53 class CollationIterator;
|
jpayne@69
|
54 class RuleBasedCollator;
|
jpayne@69
|
55 class UCollationPCE;
|
jpayne@69
|
56 class UVector32;
|
jpayne@69
|
57
|
jpayne@69
|
58 /**
|
jpayne@69
|
59 * The CollationElementIterator class is used as an iterator to walk through
|
jpayne@69
|
60 * each character of an international string. Use the iterator to return the
|
jpayne@69
|
61 * ordering priority of the positioned character. The ordering priority of a
|
jpayne@69
|
62 * character, which we refer to as a key, defines how a character is collated in
|
jpayne@69
|
63 * the given collation object.
|
jpayne@69
|
64 * For example, consider the following in Slovak and in traditional Spanish collation:
|
jpayne@69
|
65 * <pre>
|
jpayne@69
|
66 * "ca" -> the first key is key('c') and second key is key('a').
|
jpayne@69
|
67 * "cha" -> the first key is key('ch') and second key is key('a').</pre>
|
jpayne@69
|
68 * And in German phonebook collation,
|
jpayne@69
|
69 * <pre> \htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and
|
jpayne@69
|
70 * the third key is key('b'). \endhtmlonly </pre>
|
jpayne@69
|
71 * The key of a character, is an integer composed of primary order(short),
|
jpayne@69
|
72 * secondary order(char), and tertiary order(char). Java strictly defines the
|
jpayne@69
|
73 * size and signedness of its primitive data types. Therefore, the static
|
jpayne@69
|
74 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return
|
jpayne@69
|
75 * int32_t to ensure the correctness of the key value.
|
jpayne@69
|
76 * <p>Example of the iterator usage: (without error checking)
|
jpayne@69
|
77 * <pre>
|
jpayne@69
|
78 * \code
|
jpayne@69
|
79 * void CollationElementIterator_Example()
|
jpayne@69
|
80 * {
|
jpayne@69
|
81 * UnicodeString str = "This is a test";
|
jpayne@69
|
82 * UErrorCode success = U_ZERO_ERROR;
|
jpayne@69
|
83 * RuleBasedCollator* rbc =
|
jpayne@69
|
84 * (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
|
jpayne@69
|
85 * CollationElementIterator* c =
|
jpayne@69
|
86 * rbc->createCollationElementIterator( str );
|
jpayne@69
|
87 * int32_t order = c->next(success);
|
jpayne@69
|
88 * c->reset();
|
jpayne@69
|
89 * order = c->previous(success);
|
jpayne@69
|
90 * delete c;
|
jpayne@69
|
91 * delete rbc;
|
jpayne@69
|
92 * }
|
jpayne@69
|
93 * \endcode
|
jpayne@69
|
94 * </pre>
|
jpayne@69
|
95 * <p>
|
jpayne@69
|
96 * The method next() returns the collation order of the next character based on
|
jpayne@69
|
97 * the comparison level of the collator. The method previous() returns the
|
jpayne@69
|
98 * collation order of the previous character based on the comparison level of
|
jpayne@69
|
99 * the collator. The Collation Element Iterator moves only in one direction
|
jpayne@69
|
100 * between calls to reset(), setOffset(), or setText(). That is, next()
|
jpayne@69
|
101 * and previous() can not be inter-used. Whenever previous() is to be called after
|
jpayne@69
|
102 * next() or vice versa, reset(), setOffset() or setText() has to be called first
|
jpayne@69
|
103 * to reset the status, shifting pointers to either the end or the start of
|
jpayne@69
|
104 * the string (reset() or setText()), or the specified position (setOffset()).
|
jpayne@69
|
105 * Hence at the next call of next() or previous(), the first or last collation order,
|
jpayne@69
|
106 * or collation order at the spefcifieid position will be returned. If a change of
|
jpayne@69
|
107 * direction is done without one of these calls, the result is undefined.
|
jpayne@69
|
108 * <p>
|
jpayne@69
|
109 * The result of a forward iterate (next()) and reversed result of the backward
|
jpayne@69
|
110 * iterate (previous()) on the same string are equivalent, if collation orders
|
jpayne@69
|
111 * with the value 0 are ignored.
|
jpayne@69
|
112 * Character based on the comparison level of the collator. A collation order
|
jpayne@69
|
113 * consists of primary order, secondary order and tertiary order. The data
|
jpayne@69
|
114 * type of the collation order is <strong>int32_t</strong>.
|
jpayne@69
|
115 *
|
jpayne@69
|
116 * Note, CollationElementIterator should not be subclassed.
|
jpayne@69
|
117 * @see Collator
|
jpayne@69
|
118 * @see RuleBasedCollator
|
jpayne@69
|
119 * @version 1.8 Jan 16 2001
|
jpayne@69
|
120 */
|
jpayne@69
|
121 class U_I18N_API CollationElementIterator U_FINAL : public UObject {
|
jpayne@69
|
122 public:
|
jpayne@69
|
123
|
jpayne@69
|
124 // CollationElementIterator public data member ------------------------------
|
jpayne@69
|
125
|
jpayne@69
|
126 enum {
|
jpayne@69
|
127 /**
|
jpayne@69
|
128 * NULLORDER indicates that an error has occured while processing
|
jpayne@69
|
129 * @stable ICU 2.0
|
jpayne@69
|
130 */
|
jpayne@69
|
131 NULLORDER = (int32_t)0xffffffff
|
jpayne@69
|
132 };
|
jpayne@69
|
133
|
jpayne@69
|
134 // CollationElementIterator public constructor/destructor -------------------
|
jpayne@69
|
135
|
jpayne@69
|
136 /**
|
jpayne@69
|
137 * Copy constructor.
|
jpayne@69
|
138 *
|
jpayne@69
|
139 * @param other the object to be copied from
|
jpayne@69
|
140 * @stable ICU 2.0
|
jpayne@69
|
141 */
|
jpayne@69
|
142 CollationElementIterator(const CollationElementIterator& other);
|
jpayne@69
|
143
|
jpayne@69
|
144 /**
|
jpayne@69
|
145 * Destructor
|
jpayne@69
|
146 * @stable ICU 2.0
|
jpayne@69
|
147 */
|
jpayne@69
|
148 virtual ~CollationElementIterator();
|
jpayne@69
|
149
|
jpayne@69
|
150 // CollationElementIterator public methods ----------------------------------
|
jpayne@69
|
151
|
jpayne@69
|
152 /**
|
jpayne@69
|
153 * Returns true if "other" is the same as "this"
|
jpayne@69
|
154 *
|
jpayne@69
|
155 * @param other the object to be compared
|
jpayne@69
|
156 * @return true if "other" is the same as "this"
|
jpayne@69
|
157 * @stable ICU 2.0
|
jpayne@69
|
158 */
|
jpayne@69
|
159 UBool operator==(const CollationElementIterator& other) const;
|
jpayne@69
|
160
|
jpayne@69
|
161 /**
|
jpayne@69
|
162 * Returns true if "other" is not the same as "this".
|
jpayne@69
|
163 *
|
jpayne@69
|
164 * @param other the object to be compared
|
jpayne@69
|
165 * @return true if "other" is not the same as "this"
|
jpayne@69
|
166 * @stable ICU 2.0
|
jpayne@69
|
167 */
|
jpayne@69
|
168 UBool operator!=(const CollationElementIterator& other) const;
|
jpayne@69
|
169
|
jpayne@69
|
170 /**
|
jpayne@69
|
171 * Resets the cursor to the beginning of the string.
|
jpayne@69
|
172 * @stable ICU 2.0
|
jpayne@69
|
173 */
|
jpayne@69
|
174 void reset(void);
|
jpayne@69
|
175
|
jpayne@69
|
176 /**
|
jpayne@69
|
177 * Gets the ordering priority of the next character in the string.
|
jpayne@69
|
178 * @param status the error code status.
|
jpayne@69
|
179 * @return the next character's ordering. otherwise returns NULLORDER if an
|
jpayne@69
|
180 * error has occured or if the end of string has been reached
|
jpayne@69
|
181 * @stable ICU 2.0
|
jpayne@69
|
182 */
|
jpayne@69
|
183 int32_t next(UErrorCode& status);
|
jpayne@69
|
184
|
jpayne@69
|
185 /**
|
jpayne@69
|
186 * Get the ordering priority of the previous collation element in the string.
|
jpayne@69
|
187 * @param status the error code status.
|
jpayne@69
|
188 * @return the previous element's ordering. otherwise returns NULLORDER if an
|
jpayne@69
|
189 * error has occured or if the start of string has been reached
|
jpayne@69
|
190 * @stable ICU 2.0
|
jpayne@69
|
191 */
|
jpayne@69
|
192 int32_t previous(UErrorCode& status);
|
jpayne@69
|
193
|
jpayne@69
|
194 /**
|
jpayne@69
|
195 * Gets the primary order of a collation order.
|
jpayne@69
|
196 * @param order the collation order
|
jpayne@69
|
197 * @return the primary order of a collation order.
|
jpayne@69
|
198 * @stable ICU 2.0
|
jpayne@69
|
199 */
|
jpayne@69
|
200 static inline int32_t primaryOrder(int32_t order);
|
jpayne@69
|
201
|
jpayne@69
|
202 /**
|
jpayne@69
|
203 * Gets the secondary order of a collation order.
|
jpayne@69
|
204 * @param order the collation order
|
jpayne@69
|
205 * @return the secondary order of a collation order.
|
jpayne@69
|
206 * @stable ICU 2.0
|
jpayne@69
|
207 */
|
jpayne@69
|
208 static inline int32_t secondaryOrder(int32_t order);
|
jpayne@69
|
209
|
jpayne@69
|
210 /**
|
jpayne@69
|
211 * Gets the tertiary order of a collation order.
|
jpayne@69
|
212 * @param order the collation order
|
jpayne@69
|
213 * @return the tertiary order of a collation order.
|
jpayne@69
|
214 * @stable ICU 2.0
|
jpayne@69
|
215 */
|
jpayne@69
|
216 static inline int32_t tertiaryOrder(int32_t order);
|
jpayne@69
|
217
|
jpayne@69
|
218 /**
|
jpayne@69
|
219 * Return the maximum length of any expansion sequences that end with the
|
jpayne@69
|
220 * specified comparison order.
|
jpayne@69
|
221 * @param order a collation order returned by previous or next.
|
jpayne@69
|
222 * @return maximum size of the expansion sequences ending with the collation
|
jpayne@69
|
223 * element or 1 if collation element does not occur at the end of any
|
jpayne@69
|
224 * expansion sequence
|
jpayne@69
|
225 * @stable ICU 2.0
|
jpayne@69
|
226 */
|
jpayne@69
|
227 int32_t getMaxExpansion(int32_t order) const;
|
jpayne@69
|
228
|
jpayne@69
|
229 /**
|
jpayne@69
|
230 * Gets the comparison order in the desired strength. Ignore the other
|
jpayne@69
|
231 * differences.
|
jpayne@69
|
232 * @param order The order value
|
jpayne@69
|
233 * @stable ICU 2.0
|
jpayne@69
|
234 */
|
jpayne@69
|
235 int32_t strengthOrder(int32_t order) const;
|
jpayne@69
|
236
|
jpayne@69
|
237 /**
|
jpayne@69
|
238 * Sets the source string.
|
jpayne@69
|
239 * @param str the source string.
|
jpayne@69
|
240 * @param status the error code status.
|
jpayne@69
|
241 * @stable ICU 2.0
|
jpayne@69
|
242 */
|
jpayne@69
|
243 void setText(const UnicodeString& str, UErrorCode& status);
|
jpayne@69
|
244
|
jpayne@69
|
245 /**
|
jpayne@69
|
246 * Sets the source string.
|
jpayne@69
|
247 * @param str the source character iterator.
|
jpayne@69
|
248 * @param status the error code status.
|
jpayne@69
|
249 * @stable ICU 2.0
|
jpayne@69
|
250 */
|
jpayne@69
|
251 void setText(CharacterIterator& str, UErrorCode& status);
|
jpayne@69
|
252
|
jpayne@69
|
253 /**
|
jpayne@69
|
254 * Checks if a comparison order is ignorable.
|
jpayne@69
|
255 * @param order the collation order.
|
jpayne@69
|
256 * @return TRUE if a character is ignorable, FALSE otherwise.
|
jpayne@69
|
257 * @stable ICU 2.0
|
jpayne@69
|
258 */
|
jpayne@69
|
259 static inline UBool isIgnorable(int32_t order);
|
jpayne@69
|
260
|
jpayne@69
|
261 /**
|
jpayne@69
|
262 * Gets the offset of the currently processed character in the source string.
|
jpayne@69
|
263 * @return the offset of the character.
|
jpayne@69
|
264 * @stable ICU 2.0
|
jpayne@69
|
265 */
|
jpayne@69
|
266 int32_t getOffset(void) const;
|
jpayne@69
|
267
|
jpayne@69
|
268 /**
|
jpayne@69
|
269 * Sets the offset of the currently processed character in the source string.
|
jpayne@69
|
270 * @param newOffset the new offset.
|
jpayne@69
|
271 * @param status the error code status.
|
jpayne@69
|
272 * @return the offset of the character.
|
jpayne@69
|
273 * @stable ICU 2.0
|
jpayne@69
|
274 */
|
jpayne@69
|
275 void setOffset(int32_t newOffset, UErrorCode& status);
|
jpayne@69
|
276
|
jpayne@69
|
277 /**
|
jpayne@69
|
278 * ICU "poor man's RTTI", returns a UClassID for the actual class.
|
jpayne@69
|
279 *
|
jpayne@69
|
280 * @stable ICU 2.2
|
jpayne@69
|
281 */
|
jpayne@69
|
282 virtual UClassID getDynamicClassID() const;
|
jpayne@69
|
283
|
jpayne@69
|
284 /**
|
jpayne@69
|
285 * ICU "poor man's RTTI", returns a UClassID for this class.
|
jpayne@69
|
286 *
|
jpayne@69
|
287 * @stable ICU 2.2
|
jpayne@69
|
288 */
|
jpayne@69
|
289 static UClassID U_EXPORT2 getStaticClassID();
|
jpayne@69
|
290
|
jpayne@69
|
291 #ifndef U_HIDE_INTERNAL_API
|
jpayne@69
|
292 /** @internal */
|
jpayne@69
|
293 static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) {
|
jpayne@69
|
294 return reinterpret_cast<CollationElementIterator *>(uc);
|
jpayne@69
|
295 }
|
jpayne@69
|
296 /** @internal */
|
jpayne@69
|
297 static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) {
|
jpayne@69
|
298 return reinterpret_cast<const CollationElementIterator *>(uc);
|
jpayne@69
|
299 }
|
jpayne@69
|
300 /** @internal */
|
jpayne@69
|
301 inline UCollationElements *toUCollationElements() {
|
jpayne@69
|
302 return reinterpret_cast<UCollationElements *>(this);
|
jpayne@69
|
303 }
|
jpayne@69
|
304 /** @internal */
|
jpayne@69
|
305 inline const UCollationElements *toUCollationElements() const {
|
jpayne@69
|
306 return reinterpret_cast<const UCollationElements *>(this);
|
jpayne@69
|
307 }
|
jpayne@69
|
308 #endif // U_HIDE_INTERNAL_API
|
jpayne@69
|
309
|
jpayne@69
|
310 private:
|
jpayne@69
|
311 friend class RuleBasedCollator;
|
jpayne@69
|
312 friend class UCollationPCE;
|
jpayne@69
|
313
|
jpayne@69
|
314 /**
|
jpayne@69
|
315 * CollationElementIterator constructor. This takes the source string and the
|
jpayne@69
|
316 * collation object. The cursor will walk thru the source string based on the
|
jpayne@69
|
317 * predefined collation rules. If the source string is empty, NULLORDER will
|
jpayne@69
|
318 * be returned on the calls to next().
|
jpayne@69
|
319 * @param sourceText the source string.
|
jpayne@69
|
320 * @param order the collation object.
|
jpayne@69
|
321 * @param status the error code status.
|
jpayne@69
|
322 */
|
jpayne@69
|
323 CollationElementIterator(const UnicodeString& sourceText,
|
jpayne@69
|
324 const RuleBasedCollator* order, UErrorCode& status);
|
jpayne@69
|
325 // Note: The constructors should take settings & tailoring, not a collator,
|
jpayne@69
|
326 // to avoid circular dependencies.
|
jpayne@69
|
327 // However, for operator==() we would need to be able to compare tailoring data for equality
|
jpayne@69
|
328 // without making CollationData or CollationTailoring depend on TailoredSet.
|
jpayne@69
|
329 // (See the implementation of RuleBasedCollator::operator==().)
|
jpayne@69
|
330 // That might require creating an intermediate class that would be used
|
jpayne@69
|
331 // by both CollationElementIterator and RuleBasedCollator
|
jpayne@69
|
332 // but only contain the part of RBC== related to data and rules.
|
jpayne@69
|
333
|
jpayne@69
|
334 /**
|
jpayne@69
|
335 * CollationElementIterator constructor. This takes the source string and the
|
jpayne@69
|
336 * collation object. The cursor will walk thru the source string based on the
|
jpayne@69
|
337 * predefined collation rules. If the source string is empty, NULLORDER will
|
jpayne@69
|
338 * be returned on the calls to next().
|
jpayne@69
|
339 * @param sourceText the source string.
|
jpayne@69
|
340 * @param order the collation object.
|
jpayne@69
|
341 * @param status the error code status.
|
jpayne@69
|
342 */
|
jpayne@69
|
343 CollationElementIterator(const CharacterIterator& sourceText,
|
jpayne@69
|
344 const RuleBasedCollator* order, UErrorCode& status);
|
jpayne@69
|
345
|
jpayne@69
|
346 /**
|
jpayne@69
|
347 * Assignment operator
|
jpayne@69
|
348 *
|
jpayne@69
|
349 * @param other the object to be copied
|
jpayne@69
|
350 */
|
jpayne@69
|
351 const CollationElementIterator&
|
jpayne@69
|
352 operator=(const CollationElementIterator& other);
|
jpayne@69
|
353
|
jpayne@69
|
354 CollationElementIterator(); // default constructor not implemented
|
jpayne@69
|
355
|
jpayne@69
|
356 /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
|
jpayne@69
|
357 inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; }
|
jpayne@69
|
358
|
jpayne@69
|
359 static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode);
|
jpayne@69
|
360
|
jpayne@69
|
361 static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order);
|
jpayne@69
|
362
|
jpayne@69
|
363 // CollationElementIterator private data members ----------------------------
|
jpayne@69
|
364
|
jpayne@69
|
365 CollationIterator *iter_; // owned
|
jpayne@69
|
366 const RuleBasedCollator *rbc_; // aliased
|
jpayne@69
|
367 uint32_t otherHalf_;
|
jpayne@69
|
368 /**
|
jpayne@69
|
369 * <0: backwards; 0: just after reset() (previous() begins from end);
|
jpayne@69
|
370 * 1: just after setOffset(); >1: forward
|
jpayne@69
|
371 */
|
jpayne@69
|
372 int8_t dir_;
|
jpayne@69
|
373 /**
|
jpayne@69
|
374 * Stores offsets from expansions and from unsafe-backwards iteration,
|
jpayne@69
|
375 * so that getOffset() returns intermediate offsets for the CEs
|
jpayne@69
|
376 * that are consistent with forward iteration.
|
jpayne@69
|
377 */
|
jpayne@69
|
378 UVector32 *offsets_;
|
jpayne@69
|
379
|
jpayne@69
|
380 UnicodeString string_;
|
jpayne@69
|
381 };
|
jpayne@69
|
382
|
jpayne@69
|
383 // CollationElementIterator inline method definitions --------------------------
|
jpayne@69
|
384
|
jpayne@69
|
385 inline int32_t CollationElementIterator::primaryOrder(int32_t order)
|
jpayne@69
|
386 {
|
jpayne@69
|
387 return (order >> 16) & 0xffff;
|
jpayne@69
|
388 }
|
jpayne@69
|
389
|
jpayne@69
|
390 inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
|
jpayne@69
|
391 {
|
jpayne@69
|
392 return (order >> 8) & 0xff;
|
jpayne@69
|
393 }
|
jpayne@69
|
394
|
jpayne@69
|
395 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
|
jpayne@69
|
396 {
|
jpayne@69
|
397 return order & 0xff;
|
jpayne@69
|
398 }
|
jpayne@69
|
399
|
jpayne@69
|
400 inline UBool CollationElementIterator::isIgnorable(int32_t order)
|
jpayne@69
|
401 {
|
jpayne@69
|
402 return (order & 0xffff0000) == 0;
|
jpayne@69
|
403 }
|
jpayne@69
|
404
|
jpayne@69
|
405 U_NAMESPACE_END
|
jpayne@69
|
406
|
jpayne@69
|
407 #endif /* #if !UCONFIG_NO_COLLATION */
|
jpayne@69
|
408
|
jpayne@69
|
409 #endif /* U_SHOW_CPLUSPLUS_API */
|
jpayne@69
|
410
|
jpayne@69
|
411 #endif
|