jpayne@69
|
1 // © 2016 and later: Unicode, Inc. and others.
|
jpayne@69
|
2 // License & terms of use: http://www.unicode.org/copyright.html
|
jpayne@69
|
3 /*
|
jpayne@69
|
4 ********************************************************************
|
jpayne@69
|
5 *
|
jpayne@69
|
6 * Copyright (C) 1997-2011, International Business Machines
|
jpayne@69
|
7 * Corporation and others. All Rights Reserved.
|
jpayne@69
|
8 *
|
jpayne@69
|
9 ********************************************************************
|
jpayne@69
|
10 */
|
jpayne@69
|
11
|
jpayne@69
|
12 #ifndef CHARITER_H
|
jpayne@69
|
13 #define CHARITER_H
|
jpayne@69
|
14
|
jpayne@69
|
15 #include "unicode/utypes.h"
|
jpayne@69
|
16
|
jpayne@69
|
17 #if U_SHOW_CPLUSPLUS_API
|
jpayne@69
|
18
|
jpayne@69
|
19 #include "unicode/uobject.h"
|
jpayne@69
|
20 #include "unicode/unistr.h"
|
jpayne@69
|
21 /**
|
jpayne@69
|
22 * \file
|
jpayne@69
|
23 * \brief C++ API: Character Iterator
|
jpayne@69
|
24 */
|
jpayne@69
|
25
|
jpayne@69
|
26 U_NAMESPACE_BEGIN
|
jpayne@69
|
27 /**
|
jpayne@69
|
28 * Abstract class that defines an API for forward-only iteration
|
jpayne@69
|
29 * on text objects.
|
jpayne@69
|
30 * This is a minimal interface for iteration without random access
|
jpayne@69
|
31 * or backwards iteration. It is especially useful for wrapping
|
jpayne@69
|
32 * streams with converters into an object for collation or
|
jpayne@69
|
33 * normalization.
|
jpayne@69
|
34 *
|
jpayne@69
|
35 * <p>Characters can be accessed in two ways: as code units or as
|
jpayne@69
|
36 * code points.
|
jpayne@69
|
37 * Unicode code points are 21-bit integers and are the scalar values
|
jpayne@69
|
38 * of Unicode characters. ICU uses the type UChar32 for them.
|
jpayne@69
|
39 * Unicode code units are the storage units of a given
|
jpayne@69
|
40 * Unicode/UCS Transformation Format (a character encoding scheme).
|
jpayne@69
|
41 * With UTF-16, all code points can be represented with either one
|
jpayne@69
|
42 * or two code units ("surrogates").
|
jpayne@69
|
43 * String storage is typically based on code units, while properties
|
jpayne@69
|
44 * of characters are typically determined using code point values.
|
jpayne@69
|
45 * Some processes may be designed to work with sequences of code units,
|
jpayne@69
|
46 * or it may be known that all characters that are important to an
|
jpayne@69
|
47 * algorithm can be represented with single code units.
|
jpayne@69
|
48 * Other processes will need to use the code point access functions.</p>
|
jpayne@69
|
49 *
|
jpayne@69
|
50 * <p>ForwardCharacterIterator provides nextPostInc() to access
|
jpayne@69
|
51 * a code unit and advance an internal position into the text object,
|
jpayne@69
|
52 * similar to a <code>return text[position++]</code>.<br>
|
jpayne@69
|
53 * It provides next32PostInc() to access a code point and advance an internal
|
jpayne@69
|
54 * position.</p>
|
jpayne@69
|
55 *
|
jpayne@69
|
56 * <p>next32PostInc() assumes that the current position is that of
|
jpayne@69
|
57 * the beginning of a code point, i.e., of its first code unit.
|
jpayne@69
|
58 * After next32PostInc(), this will be true again.
|
jpayne@69
|
59 * In general, access to code units and code points in the same
|
jpayne@69
|
60 * iteration loop should not be mixed. In UTF-16, if the current position
|
jpayne@69
|
61 * is on a second code unit (Low Surrogate), then only that code unit
|
jpayne@69
|
62 * is returned even by next32PostInc().</p>
|
jpayne@69
|
63 *
|
jpayne@69
|
64 * <p>For iteration with either function, there are two ways to
|
jpayne@69
|
65 * check for the end of the iteration. When there are no more
|
jpayne@69
|
66 * characters in the text object:
|
jpayne@69
|
67 * <ul>
|
jpayne@69
|
68 * <li>The hasNext() function returns FALSE.</li>
|
jpayne@69
|
69 * <li>nextPostInc() and next32PostInc() return DONE
|
jpayne@69
|
70 * when one attempts to read beyond the end of the text object.</li>
|
jpayne@69
|
71 * </ul>
|
jpayne@69
|
72 *
|
jpayne@69
|
73 * Example:
|
jpayne@69
|
74 * \code
|
jpayne@69
|
75 * void function1(ForwardCharacterIterator &it) {
|
jpayne@69
|
76 * UChar32 c;
|
jpayne@69
|
77 * while(it.hasNext()) {
|
jpayne@69
|
78 * c=it.next32PostInc();
|
jpayne@69
|
79 * // use c
|
jpayne@69
|
80 * }
|
jpayne@69
|
81 * }
|
jpayne@69
|
82 *
|
jpayne@69
|
83 * void function1(ForwardCharacterIterator &it) {
|
jpayne@69
|
84 * char16_t c;
|
jpayne@69
|
85 * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
|
jpayne@69
|
86 * // use c
|
jpayne@69
|
87 * }
|
jpayne@69
|
88 * }
|
jpayne@69
|
89 * \endcode
|
jpayne@69
|
90 * </p>
|
jpayne@69
|
91 *
|
jpayne@69
|
92 * @stable ICU 2.0
|
jpayne@69
|
93 */
|
jpayne@69
|
94 class U_COMMON_API ForwardCharacterIterator : public UObject {
|
jpayne@69
|
95 public:
|
jpayne@69
|
96 /**
|
jpayne@69
|
97 * Value returned by most of ForwardCharacterIterator's functions
|
jpayne@69
|
98 * when the iterator has reached the limits of its iteration.
|
jpayne@69
|
99 * @stable ICU 2.0
|
jpayne@69
|
100 */
|
jpayne@69
|
101 enum { DONE = 0xffff };
|
jpayne@69
|
102
|
jpayne@69
|
103 /**
|
jpayne@69
|
104 * Destructor.
|
jpayne@69
|
105 * @stable ICU 2.0
|
jpayne@69
|
106 */
|
jpayne@69
|
107 virtual ~ForwardCharacterIterator();
|
jpayne@69
|
108
|
jpayne@69
|
109 /**
|
jpayne@69
|
110 * Returns true when both iterators refer to the same
|
jpayne@69
|
111 * character in the same character-storage object.
|
jpayne@69
|
112 * @param that The ForwardCharacterIterator to be compared for equality
|
jpayne@69
|
113 * @return true when both iterators refer to the same
|
jpayne@69
|
114 * character in the same character-storage object
|
jpayne@69
|
115 * @stable ICU 2.0
|
jpayne@69
|
116 */
|
jpayne@69
|
117 virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
|
jpayne@69
|
118
|
jpayne@69
|
119 /**
|
jpayne@69
|
120 * Returns true when the iterators refer to different
|
jpayne@69
|
121 * text-storage objects, or to different characters in the
|
jpayne@69
|
122 * same text-storage object.
|
jpayne@69
|
123 * @param that The ForwardCharacterIterator to be compared for inequality
|
jpayne@69
|
124 * @return true when the iterators refer to different
|
jpayne@69
|
125 * text-storage objects, or to different characters in the
|
jpayne@69
|
126 * same text-storage object
|
jpayne@69
|
127 * @stable ICU 2.0
|
jpayne@69
|
128 */
|
jpayne@69
|
129 inline UBool operator!=(const ForwardCharacterIterator& that) const;
|
jpayne@69
|
130
|
jpayne@69
|
131 /**
|
jpayne@69
|
132 * Generates a hash code for this iterator.
|
jpayne@69
|
133 * @return the hash code.
|
jpayne@69
|
134 * @stable ICU 2.0
|
jpayne@69
|
135 */
|
jpayne@69
|
136 virtual int32_t hashCode(void) const = 0;
|
jpayne@69
|
137
|
jpayne@69
|
138 /**
|
jpayne@69
|
139 * Returns a UClassID for this ForwardCharacterIterator ("poor man's
|
jpayne@69
|
140 * RTTI").<P> Despite the fact that this function is public,
|
jpayne@69
|
141 * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
|
jpayne@69
|
142 * @return a UClassID for this ForwardCharacterIterator
|
jpayne@69
|
143 * @stable ICU 2.0
|
jpayne@69
|
144 */
|
jpayne@69
|
145 virtual UClassID getDynamicClassID(void) const = 0;
|
jpayne@69
|
146
|
jpayne@69
|
147 /**
|
jpayne@69
|
148 * Gets the current code unit for returning and advances to the next code unit
|
jpayne@69
|
149 * in the iteration range
|
jpayne@69
|
150 * (toward endIndex()). If there are
|
jpayne@69
|
151 * no more code units to return, returns DONE.
|
jpayne@69
|
152 * @return the current code unit.
|
jpayne@69
|
153 * @stable ICU 2.0
|
jpayne@69
|
154 */
|
jpayne@69
|
155 virtual char16_t nextPostInc(void) = 0;
|
jpayne@69
|
156
|
jpayne@69
|
157 /**
|
jpayne@69
|
158 * Gets the current code point for returning and advances to the next code point
|
jpayne@69
|
159 * in the iteration range
|
jpayne@69
|
160 * (toward endIndex()). If there are
|
jpayne@69
|
161 * no more code points to return, returns DONE.
|
jpayne@69
|
162 * @return the current code point.
|
jpayne@69
|
163 * @stable ICU 2.0
|
jpayne@69
|
164 */
|
jpayne@69
|
165 virtual UChar32 next32PostInc(void) = 0;
|
jpayne@69
|
166
|
jpayne@69
|
167 /**
|
jpayne@69
|
168 * Returns FALSE if there are no more code units or code points
|
jpayne@69
|
169 * at or after the current position in the iteration range.
|
jpayne@69
|
170 * This is used with nextPostInc() or next32PostInc() in forward
|
jpayne@69
|
171 * iteration.
|
jpayne@69
|
172 * @returns FALSE if there are no more code units or code points
|
jpayne@69
|
173 * at or after the current position in the iteration range.
|
jpayne@69
|
174 * @stable ICU 2.0
|
jpayne@69
|
175 */
|
jpayne@69
|
176 virtual UBool hasNext() = 0;
|
jpayne@69
|
177
|
jpayne@69
|
178 protected:
|
jpayne@69
|
179 /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
jpayne@69
|
180 ForwardCharacterIterator();
|
jpayne@69
|
181
|
jpayne@69
|
182 /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
|
jpayne@69
|
183 ForwardCharacterIterator(const ForwardCharacterIterator &other);
|
jpayne@69
|
184
|
jpayne@69
|
185 /**
|
jpayne@69
|
186 * Assignment operator to be overridden in the implementing class.
|
jpayne@69
|
187 * @stable ICU 2.0
|
jpayne@69
|
188 */
|
jpayne@69
|
189 ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
|
jpayne@69
|
190 };
|
jpayne@69
|
191
|
jpayne@69
|
192 /**
|
jpayne@69
|
193 * Abstract class that defines an API for iteration
|
jpayne@69
|
194 * on text objects.
|
jpayne@69
|
195 * This is an interface for forward and backward iteration
|
jpayne@69
|
196 * and random access into a text object.
|
jpayne@69
|
197 *
|
jpayne@69
|
198 * <p>The API provides backward compatibility to the Java and older ICU
|
jpayne@69
|
199 * CharacterIterator classes but extends them significantly:
|
jpayne@69
|
200 * <ol>
|
jpayne@69
|
201 * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
|
jpayne@69
|
202 * <li>While the old API functions provided forward iteration with
|
jpayne@69
|
203 * "pre-increment" semantics, the new one also provides functions
|
jpayne@69
|
204 * with "post-increment" semantics. They are more efficient and should
|
jpayne@69
|
205 * be the preferred iterator functions for new implementations.
|
jpayne@69
|
206 * The backward iteration always had "pre-decrement" semantics, which
|
jpayne@69
|
207 * are efficient.</li>
|
jpayne@69
|
208 * <li>Just like ForwardCharacterIterator, it provides access to
|
jpayne@69
|
209 * both code units and code points. Code point access versions are available
|
jpayne@69
|
210 * for the old and the new iteration semantics.</li>
|
jpayne@69
|
211 * <li>There are new functions for setting and moving the current position
|
jpayne@69
|
212 * without returning a character, for efficiency.</li>
|
jpayne@69
|
213 * </ol>
|
jpayne@69
|
214 *
|
jpayne@69
|
215 * See ForwardCharacterIterator for examples for using the new forward iteration
|
jpayne@69
|
216 * functions. For backward iteration, there is also a hasPrevious() function
|
jpayne@69
|
217 * that can be used analogously to hasNext().
|
jpayne@69
|
218 * The old functions work as before and are shown below.</p>
|
jpayne@69
|
219 *
|
jpayne@69
|
220 * <p>Examples for some of the new functions:</p>
|
jpayne@69
|
221 *
|
jpayne@69
|
222 * Forward iteration with hasNext():
|
jpayne@69
|
223 * \code
|
jpayne@69
|
224 * void forward1(CharacterIterator &it) {
|
jpayne@69
|
225 * UChar32 c;
|
jpayne@69
|
226 * for(it.setToStart(); it.hasNext();) {
|
jpayne@69
|
227 * c=it.next32PostInc();
|
jpayne@69
|
228 * // use c
|
jpayne@69
|
229 * }
|
jpayne@69
|
230 * }
|
jpayne@69
|
231 * \endcode
|
jpayne@69
|
232 * Forward iteration more similar to loops with the old forward iteration,
|
jpayne@69
|
233 * showing a way to convert simple for() loops:
|
jpayne@69
|
234 * \code
|
jpayne@69
|
235 * void forward2(CharacterIterator &it) {
|
jpayne@69
|
236 * char16_t c;
|
jpayne@69
|
237 * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
|
jpayne@69
|
238 * // use c
|
jpayne@69
|
239 * }
|
jpayne@69
|
240 * }
|
jpayne@69
|
241 * \endcode
|
jpayne@69
|
242 * Backward iteration with setToEnd() and hasPrevious():
|
jpayne@69
|
243 * \code
|
jpayne@69
|
244 * void backward1(CharacterIterator &it) {
|
jpayne@69
|
245 * UChar32 c;
|
jpayne@69
|
246 * for(it.setToEnd(); it.hasPrevious();) {
|
jpayne@69
|
247 * c=it.previous32();
|
jpayne@69
|
248 * // use c
|
jpayne@69
|
249 * }
|
jpayne@69
|
250 * }
|
jpayne@69
|
251 * \endcode
|
jpayne@69
|
252 * Backward iteration with a more traditional for() loop:
|
jpayne@69
|
253 * \code
|
jpayne@69
|
254 * void backward2(CharacterIterator &it) {
|
jpayne@69
|
255 * char16_t c;
|
jpayne@69
|
256 * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
|
jpayne@69
|
257 * // use c
|
jpayne@69
|
258 * }
|
jpayne@69
|
259 * }
|
jpayne@69
|
260 * \endcode
|
jpayne@69
|
261 *
|
jpayne@69
|
262 * Example for random access:
|
jpayne@69
|
263 * \code
|
jpayne@69
|
264 * void random(CharacterIterator &it) {
|
jpayne@69
|
265 * // set to the third code point from the beginning
|
jpayne@69
|
266 * it.move32(3, CharacterIterator::kStart);
|
jpayne@69
|
267 * // get a code point from here without moving the position
|
jpayne@69
|
268 * UChar32 c=it.current32();
|
jpayne@69
|
269 * // get the position
|
jpayne@69
|
270 * int32_t pos=it.getIndex();
|
jpayne@69
|
271 * // get the previous code unit
|
jpayne@69
|
272 * char16_t u=it.previous();
|
jpayne@69
|
273 * // move back one more code unit
|
jpayne@69
|
274 * it.move(-1, CharacterIterator::kCurrent);
|
jpayne@69
|
275 * // set the position back to where it was
|
jpayne@69
|
276 * // and read the same code point c and move beyond it
|
jpayne@69
|
277 * it.setIndex(pos);
|
jpayne@69
|
278 * if(c!=it.next32PostInc()) {
|
jpayne@69
|
279 * exit(1); // CharacterIterator inconsistent
|
jpayne@69
|
280 * }
|
jpayne@69
|
281 * }
|
jpayne@69
|
282 * \endcode
|
jpayne@69
|
283 *
|
jpayne@69
|
284 * <p>Examples, especially for the old API:</p>
|
jpayne@69
|
285 *
|
jpayne@69
|
286 * Function processing characters, in this example simple output
|
jpayne@69
|
287 * <pre>
|
jpayne@69
|
288 * \code
|
jpayne@69
|
289 * void processChar( char16_t c )
|
jpayne@69
|
290 * {
|
jpayne@69
|
291 * cout << " " << c;
|
jpayne@69
|
292 * }
|
jpayne@69
|
293 * \endcode
|
jpayne@69
|
294 * </pre>
|
jpayne@69
|
295 * Traverse the text from start to finish
|
jpayne@69
|
296 * <pre>
|
jpayne@69
|
297 * \code
|
jpayne@69
|
298 * void traverseForward(CharacterIterator& iter)
|
jpayne@69
|
299 * {
|
jpayne@69
|
300 * for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
|
jpayne@69
|
301 * processChar(c);
|
jpayne@69
|
302 * }
|
jpayne@69
|
303 * }
|
jpayne@69
|
304 * \endcode
|
jpayne@69
|
305 * </pre>
|
jpayne@69
|
306 * Traverse the text backwards, from end to start
|
jpayne@69
|
307 * <pre>
|
jpayne@69
|
308 * \code
|
jpayne@69
|
309 * void traverseBackward(CharacterIterator& iter)
|
jpayne@69
|
310 * {
|
jpayne@69
|
311 * for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
|
jpayne@69
|
312 * processChar(c);
|
jpayne@69
|
313 * }
|
jpayne@69
|
314 * }
|
jpayne@69
|
315 * \endcode
|
jpayne@69
|
316 * </pre>
|
jpayne@69
|
317 * Traverse both forward and backward from a given position in the text.
|
jpayne@69
|
318 * Calls to notBoundary() in this example represents some additional stopping criteria.
|
jpayne@69
|
319 * <pre>
|
jpayne@69
|
320 * \code
|
jpayne@69
|
321 * void traverseOut(CharacterIterator& iter, int32_t pos)
|
jpayne@69
|
322 * {
|
jpayne@69
|
323 * char16_t c;
|
jpayne@69
|
324 * for (c = iter.setIndex(pos);
|
jpayne@69
|
325 * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
jpayne@69
|
326 * c = iter.next()) {}
|
jpayne@69
|
327 * int32_t end = iter.getIndex();
|
jpayne@69
|
328 * for (c = iter.setIndex(pos);
|
jpayne@69
|
329 * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
jpayne@69
|
330 * c = iter.previous()) {}
|
jpayne@69
|
331 * int32_t start = iter.getIndex() + 1;
|
jpayne@69
|
332 *
|
jpayne@69
|
333 * cout << "start: " << start << " end: " << end << endl;
|
jpayne@69
|
334 * for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
|
jpayne@69
|
335 * processChar(c);
|
jpayne@69
|
336 * }
|
jpayne@69
|
337 * }
|
jpayne@69
|
338 * \endcode
|
jpayne@69
|
339 * </pre>
|
jpayne@69
|
340 * Creating a StringCharacterIterator and calling the test functions
|
jpayne@69
|
341 * <pre>
|
jpayne@69
|
342 * \code
|
jpayne@69
|
343 * void CharacterIterator_Example( void )
|
jpayne@69
|
344 * {
|
jpayne@69
|
345 * cout << endl << "===== CharacterIterator_Example: =====" << endl;
|
jpayne@69
|
346 * UnicodeString text("Ein kleiner Satz.");
|
jpayne@69
|
347 * StringCharacterIterator iterator(text);
|
jpayne@69
|
348 * cout << "----- traverseForward: -----------" << endl;
|
jpayne@69
|
349 * traverseForward( iterator );
|
jpayne@69
|
350 * cout << endl << endl << "----- traverseBackward: ----------" << endl;
|
jpayne@69
|
351 * traverseBackward( iterator );
|
jpayne@69
|
352 * cout << endl << endl << "----- traverseOut: ---------------" << endl;
|
jpayne@69
|
353 * traverseOut( iterator, 7 );
|
jpayne@69
|
354 * cout << endl << endl << "-----" << endl;
|
jpayne@69
|
355 * }
|
jpayne@69
|
356 * \endcode
|
jpayne@69
|
357 * </pre>
|
jpayne@69
|
358 *
|
jpayne@69
|
359 * @stable ICU 2.0
|
jpayne@69
|
360 */
|
jpayne@69
|
361 class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
|
jpayne@69
|
362 public:
|
jpayne@69
|
363 /**
|
jpayne@69
|
364 * Origin enumeration for the move() and move32() functions.
|
jpayne@69
|
365 * @stable ICU 2.0
|
jpayne@69
|
366 */
|
jpayne@69
|
367 enum EOrigin { kStart, kCurrent, kEnd };
|
jpayne@69
|
368
|
jpayne@69
|
369 /**
|
jpayne@69
|
370 * Destructor.
|
jpayne@69
|
371 * @stable ICU 2.0
|
jpayne@69
|
372 */
|
jpayne@69
|
373 virtual ~CharacterIterator();
|
jpayne@69
|
374
|
jpayne@69
|
375 /**
|
jpayne@69
|
376 * Returns a pointer to a new CharacterIterator of the same
|
jpayne@69
|
377 * concrete class as this one, and referring to the same
|
jpayne@69
|
378 * character in the same text-storage object as this one. The
|
jpayne@69
|
379 * caller is responsible for deleting the new clone.
|
jpayne@69
|
380 * @return a pointer to a new CharacterIterator
|
jpayne@69
|
381 * @stable ICU 2.0
|
jpayne@69
|
382 */
|
jpayne@69
|
383 virtual CharacterIterator* clone() const = 0;
|
jpayne@69
|
384
|
jpayne@69
|
385 /**
|
jpayne@69
|
386 * Sets the iterator to refer to the first code unit in its
|
jpayne@69
|
387 * iteration range, and returns that code unit.
|
jpayne@69
|
388 * This can be used to begin an iteration with next().
|
jpayne@69
|
389 * @return the first code unit in its iteration range.
|
jpayne@69
|
390 * @stable ICU 2.0
|
jpayne@69
|
391 */
|
jpayne@69
|
392 virtual char16_t first(void) = 0;
|
jpayne@69
|
393
|
jpayne@69
|
394 /**
|
jpayne@69
|
395 * Sets the iterator to refer to the first code unit in its
|
jpayne@69
|
396 * iteration range, returns that code unit, and moves the position
|
jpayne@69
|
397 * to the second code unit. This is an alternative to setToStart()
|
jpayne@69
|
398 * for forward iteration with nextPostInc().
|
jpayne@69
|
399 * @return the first code unit in its iteration range.
|
jpayne@69
|
400 * @stable ICU 2.0
|
jpayne@69
|
401 */
|
jpayne@69
|
402 virtual char16_t firstPostInc(void);
|
jpayne@69
|
403
|
jpayne@69
|
404 /**
|
jpayne@69
|
405 * Sets the iterator to refer to the first code point in its
|
jpayne@69
|
406 * iteration range, and returns that code unit,
|
jpayne@69
|
407 * This can be used to begin an iteration with next32().
|
jpayne@69
|
408 * Note that an iteration with next32PostInc(), beginning with,
|
jpayne@69
|
409 * e.g., setToStart() or firstPostInc(), is more efficient.
|
jpayne@69
|
410 * @return the first code point in its iteration range.
|
jpayne@69
|
411 * @stable ICU 2.0
|
jpayne@69
|
412 */
|
jpayne@69
|
413 virtual UChar32 first32(void) = 0;
|
jpayne@69
|
414
|
jpayne@69
|
415 /**
|
jpayne@69
|
416 * Sets the iterator to refer to the first code point in its
|
jpayne@69
|
417 * iteration range, returns that code point, and moves the position
|
jpayne@69
|
418 * to the second code point. This is an alternative to setToStart()
|
jpayne@69
|
419 * for forward iteration with next32PostInc().
|
jpayne@69
|
420 * @return the first code point in its iteration range.
|
jpayne@69
|
421 * @stable ICU 2.0
|
jpayne@69
|
422 */
|
jpayne@69
|
423 virtual UChar32 first32PostInc(void);
|
jpayne@69
|
424
|
jpayne@69
|
425 /**
|
jpayne@69
|
426 * Sets the iterator to refer to the first code unit or code point in its
|
jpayne@69
|
427 * iteration range. This can be used to begin a forward
|
jpayne@69
|
428 * iteration with nextPostInc() or next32PostInc().
|
jpayne@69
|
429 * @return the start position of the iteration range
|
jpayne@69
|
430 * @stable ICU 2.0
|
jpayne@69
|
431 */
|
jpayne@69
|
432 inline int32_t setToStart();
|
jpayne@69
|
433
|
jpayne@69
|
434 /**
|
jpayne@69
|
435 * Sets the iterator to refer to the last code unit in its
|
jpayne@69
|
436 * iteration range, and returns that code unit.
|
jpayne@69
|
437 * This can be used to begin an iteration with previous().
|
jpayne@69
|
438 * @return the last code unit.
|
jpayne@69
|
439 * @stable ICU 2.0
|
jpayne@69
|
440 */
|
jpayne@69
|
441 virtual char16_t last(void) = 0;
|
jpayne@69
|
442
|
jpayne@69
|
443 /**
|
jpayne@69
|
444 * Sets the iterator to refer to the last code point in its
|
jpayne@69
|
445 * iteration range, and returns that code unit.
|
jpayne@69
|
446 * This can be used to begin an iteration with previous32().
|
jpayne@69
|
447 * @return the last code point.
|
jpayne@69
|
448 * @stable ICU 2.0
|
jpayne@69
|
449 */
|
jpayne@69
|
450 virtual UChar32 last32(void) = 0;
|
jpayne@69
|
451
|
jpayne@69
|
452 /**
|
jpayne@69
|
453 * Sets the iterator to the end of its iteration range, just behind
|
jpayne@69
|
454 * the last code unit or code point. This can be used to begin a backward
|
jpayne@69
|
455 * iteration with previous() or previous32().
|
jpayne@69
|
456 * @return the end position of the iteration range
|
jpayne@69
|
457 * @stable ICU 2.0
|
jpayne@69
|
458 */
|
jpayne@69
|
459 inline int32_t setToEnd();
|
jpayne@69
|
460
|
jpayne@69
|
461 /**
|
jpayne@69
|
462 * Sets the iterator to refer to the "position"-th code unit
|
jpayne@69
|
463 * in the text-storage object the iterator refers to, and
|
jpayne@69
|
464 * returns that code unit.
|
jpayne@69
|
465 * @param position the "position"-th code unit in the text-storage object
|
jpayne@69
|
466 * @return the "position"-th code unit.
|
jpayne@69
|
467 * @stable ICU 2.0
|
jpayne@69
|
468 */
|
jpayne@69
|
469 virtual char16_t setIndex(int32_t position) = 0;
|
jpayne@69
|
470
|
jpayne@69
|
471 /**
|
jpayne@69
|
472 * Sets the iterator to refer to the beginning of the code point
|
jpayne@69
|
473 * that contains the "position"-th code unit
|
jpayne@69
|
474 * in the text-storage object the iterator refers to, and
|
jpayne@69
|
475 * returns that code point.
|
jpayne@69
|
476 * The current position is adjusted to the beginning of the code point
|
jpayne@69
|
477 * (its first code unit).
|
jpayne@69
|
478 * @param position the "position"-th code unit in the text-storage object
|
jpayne@69
|
479 * @return the "position"-th code point.
|
jpayne@69
|
480 * @stable ICU 2.0
|
jpayne@69
|
481 */
|
jpayne@69
|
482 virtual UChar32 setIndex32(int32_t position) = 0;
|
jpayne@69
|
483
|
jpayne@69
|
484 /**
|
jpayne@69
|
485 * Returns the code unit the iterator currently refers to.
|
jpayne@69
|
486 * @return the current code unit.
|
jpayne@69
|
487 * @stable ICU 2.0
|
jpayne@69
|
488 */
|
jpayne@69
|
489 virtual char16_t current(void) const = 0;
|
jpayne@69
|
490
|
jpayne@69
|
491 /**
|
jpayne@69
|
492 * Returns the code point the iterator currently refers to.
|
jpayne@69
|
493 * @return the current code point.
|
jpayne@69
|
494 * @stable ICU 2.0
|
jpayne@69
|
495 */
|
jpayne@69
|
496 virtual UChar32 current32(void) const = 0;
|
jpayne@69
|
497
|
jpayne@69
|
498 /**
|
jpayne@69
|
499 * Advances to the next code unit in the iteration range
|
jpayne@69
|
500 * (toward endIndex()), and returns that code unit. If there are
|
jpayne@69
|
501 * no more code units to return, returns DONE.
|
jpayne@69
|
502 * @return the next code unit.
|
jpayne@69
|
503 * @stable ICU 2.0
|
jpayne@69
|
504 */
|
jpayne@69
|
505 virtual char16_t next(void) = 0;
|
jpayne@69
|
506
|
jpayne@69
|
507 /**
|
jpayne@69
|
508 * Advances to the next code point in the iteration range
|
jpayne@69
|
509 * (toward endIndex()), and returns that code point. If there are
|
jpayne@69
|
510 * no more code points to return, returns DONE.
|
jpayne@69
|
511 * Note that iteration with "pre-increment" semantics is less
|
jpayne@69
|
512 * efficient than iteration with "post-increment" semantics
|
jpayne@69
|
513 * that is provided by next32PostInc().
|
jpayne@69
|
514 * @return the next code point.
|
jpayne@69
|
515 * @stable ICU 2.0
|
jpayne@69
|
516 */
|
jpayne@69
|
517 virtual UChar32 next32(void) = 0;
|
jpayne@69
|
518
|
jpayne@69
|
519 /**
|
jpayne@69
|
520 * Advances to the previous code unit in the iteration range
|
jpayne@69
|
521 * (toward startIndex()), and returns that code unit. If there are
|
jpayne@69
|
522 * no more code units to return, returns DONE.
|
jpayne@69
|
523 * @return the previous code unit.
|
jpayne@69
|
524 * @stable ICU 2.0
|
jpayne@69
|
525 */
|
jpayne@69
|
526 virtual char16_t previous(void) = 0;
|
jpayne@69
|
527
|
jpayne@69
|
528 /**
|
jpayne@69
|
529 * Advances to the previous code point in the iteration range
|
jpayne@69
|
530 * (toward startIndex()), and returns that code point. If there are
|
jpayne@69
|
531 * no more code points to return, returns DONE.
|
jpayne@69
|
532 * @return the previous code point.
|
jpayne@69
|
533 * @stable ICU 2.0
|
jpayne@69
|
534 */
|
jpayne@69
|
535 virtual UChar32 previous32(void) = 0;
|
jpayne@69
|
536
|
jpayne@69
|
537 /**
|
jpayne@69
|
538 * Returns FALSE if there are no more code units or code points
|
jpayne@69
|
539 * before the current position in the iteration range.
|
jpayne@69
|
540 * This is used with previous() or previous32() in backward
|
jpayne@69
|
541 * iteration.
|
jpayne@69
|
542 * @return FALSE if there are no more code units or code points
|
jpayne@69
|
543 * before the current position in the iteration range, return TRUE otherwise.
|
jpayne@69
|
544 * @stable ICU 2.0
|
jpayne@69
|
545 */
|
jpayne@69
|
546 virtual UBool hasPrevious() = 0;
|
jpayne@69
|
547
|
jpayne@69
|
548 /**
|
jpayne@69
|
549 * Returns the numeric index in the underlying text-storage
|
jpayne@69
|
550 * object of the character returned by first(). Since it's
|
jpayne@69
|
551 * possible to create an iterator that iterates across only
|
jpayne@69
|
552 * part of a text-storage object, this number isn't
|
jpayne@69
|
553 * necessarily 0.
|
jpayne@69
|
554 * @returns the numeric index in the underlying text-storage
|
jpayne@69
|
555 * object of the character returned by first().
|
jpayne@69
|
556 * @stable ICU 2.0
|
jpayne@69
|
557 */
|
jpayne@69
|
558 inline int32_t startIndex(void) const;
|
jpayne@69
|
559
|
jpayne@69
|
560 /**
|
jpayne@69
|
561 * Returns the numeric index in the underlying text-storage
|
jpayne@69
|
562 * object of the position immediately BEYOND the character
|
jpayne@69
|
563 * returned by last().
|
jpayne@69
|
564 * @return the numeric index in the underlying text-storage
|
jpayne@69
|
565 * object of the position immediately BEYOND the character
|
jpayne@69
|
566 * returned by last().
|
jpayne@69
|
567 * @stable ICU 2.0
|
jpayne@69
|
568 */
|
jpayne@69
|
569 inline int32_t endIndex(void) const;
|
jpayne@69
|
570
|
jpayne@69
|
571 /**
|
jpayne@69
|
572 * Returns the numeric index in the underlying text-storage
|
jpayne@69
|
573 * object of the character the iterator currently refers to
|
jpayne@69
|
574 * (i.e., the character returned by current()).
|
jpayne@69
|
575 * @return the numeric index in the text-storage object of
|
jpayne@69
|
576 * the character the iterator currently refers to
|
jpayne@69
|
577 * @stable ICU 2.0
|
jpayne@69
|
578 */
|
jpayne@69
|
579 inline int32_t getIndex(void) const;
|
jpayne@69
|
580
|
jpayne@69
|
581 /**
|
jpayne@69
|
582 * Returns the length of the entire text in the underlying
|
jpayne@69
|
583 * text-storage object.
|
jpayne@69
|
584 * @return the length of the entire text in the text-storage object
|
jpayne@69
|
585 * @stable ICU 2.0
|
jpayne@69
|
586 */
|
jpayne@69
|
587 inline int32_t getLength() const;
|
jpayne@69
|
588
|
jpayne@69
|
589 /**
|
jpayne@69
|
590 * Moves the current position relative to the start or end of the
|
jpayne@69
|
591 * iteration range, or relative to the current position itself.
|
jpayne@69
|
592 * The movement is expressed in numbers of code units forward
|
jpayne@69
|
593 * or backward by specifying a positive or negative delta.
|
jpayne@69
|
594 * @param delta the position relative to origin. A positive delta means forward;
|
jpayne@69
|
595 * a negative delta means backward.
|
jpayne@69
|
596 * @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
jpayne@69
|
597 * @return the new position
|
jpayne@69
|
598 * @stable ICU 2.0
|
jpayne@69
|
599 */
|
jpayne@69
|
600 virtual int32_t move(int32_t delta, EOrigin origin) = 0;
|
jpayne@69
|
601
|
jpayne@69
|
602 /**
|
jpayne@69
|
603 * Moves the current position relative to the start or end of the
|
jpayne@69
|
604 * iteration range, or relative to the current position itself.
|
jpayne@69
|
605 * The movement is expressed in numbers of code points forward
|
jpayne@69
|
606 * or backward by specifying a positive or negative delta.
|
jpayne@69
|
607 * @param delta the position relative to origin. A positive delta means forward;
|
jpayne@69
|
608 * a negative delta means backward.
|
jpayne@69
|
609 * @param origin Origin enumeration {kStart, kCurrent, kEnd}
|
jpayne@69
|
610 * @return the new position
|
jpayne@69
|
611 * @stable ICU 2.0
|
jpayne@69
|
612 */
|
jpayne@69
|
613 #ifdef move32
|
jpayne@69
|
614 // One of the system headers right now is sometimes defining a conflicting macro we don't use
|
jpayne@69
|
615 #undef move32
|
jpayne@69
|
616 #endif
|
jpayne@69
|
617 virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
|
jpayne@69
|
618
|
jpayne@69
|
619 /**
|
jpayne@69
|
620 * Copies the text under iteration into the UnicodeString
|
jpayne@69
|
621 * referred to by "result".
|
jpayne@69
|
622 * @param result Receives a copy of the text under iteration.
|
jpayne@69
|
623 * @stable ICU 2.0
|
jpayne@69
|
624 */
|
jpayne@69
|
625 virtual void getText(UnicodeString& result) = 0;
|
jpayne@69
|
626
|
jpayne@69
|
627 protected:
|
jpayne@69
|
628 /**
|
jpayne@69
|
629 * Empty constructor.
|
jpayne@69
|
630 * @stable ICU 2.0
|
jpayne@69
|
631 */
|
jpayne@69
|
632 CharacterIterator();
|
jpayne@69
|
633
|
jpayne@69
|
634 /**
|
jpayne@69
|
635 * Constructor, just setting the length field in this base class.
|
jpayne@69
|
636 * @stable ICU 2.0
|
jpayne@69
|
637 */
|
jpayne@69
|
638 CharacterIterator(int32_t length);
|
jpayne@69
|
639
|
jpayne@69
|
640 /**
|
jpayne@69
|
641 * Constructor, just setting the length and position fields in this base class.
|
jpayne@69
|
642 * @stable ICU 2.0
|
jpayne@69
|
643 */
|
jpayne@69
|
644 CharacterIterator(int32_t length, int32_t position);
|
jpayne@69
|
645
|
jpayne@69
|
646 /**
|
jpayne@69
|
647 * Constructor, just setting the length, start, end, and position fields in this base class.
|
jpayne@69
|
648 * @stable ICU 2.0
|
jpayne@69
|
649 */
|
jpayne@69
|
650 CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
|
jpayne@69
|
651
|
jpayne@69
|
652 /**
|
jpayne@69
|
653 * Copy constructor.
|
jpayne@69
|
654 *
|
jpayne@69
|
655 * @param that The CharacterIterator to be copied
|
jpayne@69
|
656 * @stable ICU 2.0
|
jpayne@69
|
657 */
|
jpayne@69
|
658 CharacterIterator(const CharacterIterator &that);
|
jpayne@69
|
659
|
jpayne@69
|
660 /**
|
jpayne@69
|
661 * Assignment operator. Sets this CharacterIterator to have the same behavior,
|
jpayne@69
|
662 * as the one passed in.
|
jpayne@69
|
663 * @param that The CharacterIterator passed in.
|
jpayne@69
|
664 * @return the newly set CharacterIterator.
|
jpayne@69
|
665 * @stable ICU 2.0
|
jpayne@69
|
666 */
|
jpayne@69
|
667 CharacterIterator &operator=(const CharacterIterator &that);
|
jpayne@69
|
668
|
jpayne@69
|
669 /**
|
jpayne@69
|
670 * Base class text length field.
|
jpayne@69
|
671 * Necessary this for correct getText() and hashCode().
|
jpayne@69
|
672 * @stable ICU 2.0
|
jpayne@69
|
673 */
|
jpayne@69
|
674 int32_t textLength;
|
jpayne@69
|
675
|
jpayne@69
|
676 /**
|
jpayne@69
|
677 * Base class field for the current position.
|
jpayne@69
|
678 * @stable ICU 2.0
|
jpayne@69
|
679 */
|
jpayne@69
|
680 int32_t pos;
|
jpayne@69
|
681
|
jpayne@69
|
682 /**
|
jpayne@69
|
683 * Base class field for the start of the iteration range.
|
jpayne@69
|
684 * @stable ICU 2.0
|
jpayne@69
|
685 */
|
jpayne@69
|
686 int32_t begin;
|
jpayne@69
|
687
|
jpayne@69
|
688 /**
|
jpayne@69
|
689 * Base class field for the end of the iteration range.
|
jpayne@69
|
690 * @stable ICU 2.0
|
jpayne@69
|
691 */
|
jpayne@69
|
692 int32_t end;
|
jpayne@69
|
693 };
|
jpayne@69
|
694
|
jpayne@69
|
695 inline UBool
|
jpayne@69
|
696 ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
|
jpayne@69
|
697 return !operator==(that);
|
jpayne@69
|
698 }
|
jpayne@69
|
699
|
jpayne@69
|
700 inline int32_t
|
jpayne@69
|
701 CharacterIterator::setToStart() {
|
jpayne@69
|
702 return move(0, kStart);
|
jpayne@69
|
703 }
|
jpayne@69
|
704
|
jpayne@69
|
705 inline int32_t
|
jpayne@69
|
706 CharacterIterator::setToEnd() {
|
jpayne@69
|
707 return move(0, kEnd);
|
jpayne@69
|
708 }
|
jpayne@69
|
709
|
jpayne@69
|
710 inline int32_t
|
jpayne@69
|
711 CharacterIterator::startIndex(void) const {
|
jpayne@69
|
712 return begin;
|
jpayne@69
|
713 }
|
jpayne@69
|
714
|
jpayne@69
|
715 inline int32_t
|
jpayne@69
|
716 CharacterIterator::endIndex(void) const {
|
jpayne@69
|
717 return end;
|
jpayne@69
|
718 }
|
jpayne@69
|
719
|
jpayne@69
|
720 inline int32_t
|
jpayne@69
|
721 CharacterIterator::getIndex(void) const {
|
jpayne@69
|
722 return pos;
|
jpayne@69
|
723 }
|
jpayne@69
|
724
|
jpayne@69
|
725 inline int32_t
|
jpayne@69
|
726 CharacterIterator::getLength(void) const {
|
jpayne@69
|
727 return textLength;
|
jpayne@69
|
728 }
|
jpayne@69
|
729
|
jpayne@69
|
730 U_NAMESPACE_END
|
jpayne@69
|
731
|
jpayne@69
|
732 #endif /* U_SHOW_CPLUSPLUS_API */
|
jpayne@69
|
733
|
jpayne@69
|
734 #endif
|