jpayne@69
|
1 // © 2016 and later: Unicode, Inc. and others.
|
jpayne@69
|
2 // License & terms of use: http://www.unicode.org/copyright.html
|
jpayne@69
|
3 /*
|
jpayne@69
|
4 **********************************************************************
|
jpayne@69
|
5 * Copyright (c) 2002-2014, International Business Machines
|
jpayne@69
|
6 * Corporation and others. All Rights Reserved.
|
jpayne@69
|
7 **********************************************************************
|
jpayne@69
|
8 */
|
jpayne@69
|
9 #ifndef USETITER_H
|
jpayne@69
|
10 #define USETITER_H
|
jpayne@69
|
11
|
jpayne@69
|
12 #include "unicode/utypes.h"
|
jpayne@69
|
13
|
jpayne@69
|
14 #if U_SHOW_CPLUSPLUS_API
|
jpayne@69
|
15
|
jpayne@69
|
16 #include "unicode/uobject.h"
|
jpayne@69
|
17 #include "unicode/unistr.h"
|
jpayne@69
|
18
|
jpayne@69
|
19 /**
|
jpayne@69
|
20 * \file
|
jpayne@69
|
21 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
|
jpayne@69
|
22 */
|
jpayne@69
|
23
|
jpayne@69
|
24 U_NAMESPACE_BEGIN
|
jpayne@69
|
25
|
jpayne@69
|
26 class UnicodeSet;
|
jpayne@69
|
27 class UnicodeString;
|
jpayne@69
|
28
|
jpayne@69
|
29 /**
|
jpayne@69
|
30 *
|
jpayne@69
|
31 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
|
jpayne@69
|
32 * iterates over either code points or code point ranges. After all
|
jpayne@69
|
33 * code points or ranges have been returned, it returns the
|
jpayne@69
|
34 * multicharacter strings of the UnicodeSet, if any.
|
jpayne@69
|
35 *
|
jpayne@69
|
36 * This class is not intended to be subclassed. Consider any fields
|
jpayne@69
|
37 * or methods declared as "protected" to be private. The use of
|
jpayne@69
|
38 * protected in this class is an artifact of history.
|
jpayne@69
|
39 *
|
jpayne@69
|
40 * <p>To iterate over code points and strings, use a loop like this:
|
jpayne@69
|
41 * <pre>
|
jpayne@69
|
42 * UnicodeSetIterator it(set);
|
jpayne@69
|
43 * while (it.next()) {
|
jpayne@69
|
44 * processItem(it.getString());
|
jpayne@69
|
45 * }
|
jpayne@69
|
46 * </pre>
|
jpayne@69
|
47 * <p>Each item in the set is accessed as a string. Set elements
|
jpayne@69
|
48 * consisting of single code points are returned as strings containing
|
jpayne@69
|
49 * just the one code point.
|
jpayne@69
|
50 *
|
jpayne@69
|
51 * <p>To iterate over code point ranges, instead of individual code points,
|
jpayne@69
|
52 * use a loop like this:
|
jpayne@69
|
53 * <pre>
|
jpayne@69
|
54 * UnicodeSetIterator it(set);
|
jpayne@69
|
55 * while (it.nextRange()) {
|
jpayne@69
|
56 * if (it.isString()) {
|
jpayne@69
|
57 * processString(it.getString());
|
jpayne@69
|
58 * } else {
|
jpayne@69
|
59 * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
|
jpayne@69
|
60 * }
|
jpayne@69
|
61 * }
|
jpayne@69
|
62 * </pre>
|
jpayne@69
|
63 * @author M. Davis
|
jpayne@69
|
64 * @stable ICU 2.4
|
jpayne@69
|
65 */
|
jpayne@69
|
66 class U_COMMON_API UnicodeSetIterator : public UObject {
|
jpayne@69
|
67
|
jpayne@69
|
68 protected:
|
jpayne@69
|
69
|
jpayne@69
|
70 /**
|
jpayne@69
|
71 * Value of <tt>codepoint</tt> if the iterator points to a string.
|
jpayne@69
|
72 * If <tt>codepoint == IS_STRING</tt>, then examine
|
jpayne@69
|
73 * <tt>string</tt> for the current iteration result.
|
jpayne@69
|
74 * @stable ICU 2.4
|
jpayne@69
|
75 */
|
jpayne@69
|
76 enum { IS_STRING = -1 };
|
jpayne@69
|
77
|
jpayne@69
|
78 /**
|
jpayne@69
|
79 * Current code point, or the special value <tt>IS_STRING</tt>, if
|
jpayne@69
|
80 * the iterator points to a string.
|
jpayne@69
|
81 * @stable ICU 2.4
|
jpayne@69
|
82 */
|
jpayne@69
|
83 UChar32 codepoint;
|
jpayne@69
|
84
|
jpayne@69
|
85 /**
|
jpayne@69
|
86 * When iterating over ranges using <tt>nextRange()</tt>,
|
jpayne@69
|
87 * <tt>codepointEnd</tt> contains the inclusive end of the
|
jpayne@69
|
88 * iteration range, if <tt>codepoint != IS_STRING</tt>. If
|
jpayne@69
|
89 * iterating over code points using <tt>next()</tt>, or if
|
jpayne@69
|
90 * <tt>codepoint == IS_STRING</tt>, then the value of
|
jpayne@69
|
91 * <tt>codepointEnd</tt> is undefined.
|
jpayne@69
|
92 * @stable ICU 2.4
|
jpayne@69
|
93 */
|
jpayne@69
|
94 UChar32 codepointEnd;
|
jpayne@69
|
95
|
jpayne@69
|
96 /**
|
jpayne@69
|
97 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
|
jpayne@69
|
98 * to the current string. If <tt>codepoint != IS_STRING</tt>, the
|
jpayne@69
|
99 * value of <tt>string</tt> is undefined.
|
jpayne@69
|
100 * @stable ICU 2.4
|
jpayne@69
|
101 */
|
jpayne@69
|
102 const UnicodeString* string;
|
jpayne@69
|
103
|
jpayne@69
|
104 public:
|
jpayne@69
|
105
|
jpayne@69
|
106 /**
|
jpayne@69
|
107 * Create an iterator over the given set. The iterator is valid
|
jpayne@69
|
108 * only so long as <tt>set</tt> is valid.
|
jpayne@69
|
109 * @param set set to iterate over
|
jpayne@69
|
110 * @stable ICU 2.4
|
jpayne@69
|
111 */
|
jpayne@69
|
112 UnicodeSetIterator(const UnicodeSet& set);
|
jpayne@69
|
113
|
jpayne@69
|
114 /**
|
jpayne@69
|
115 * Create an iterator over nothing. <tt>next()</tt> and
|
jpayne@69
|
116 * <tt>nextRange()</tt> return false. This is a convenience
|
jpayne@69
|
117 * constructor allowing the target to be set later.
|
jpayne@69
|
118 * @stable ICU 2.4
|
jpayne@69
|
119 */
|
jpayne@69
|
120 UnicodeSetIterator();
|
jpayne@69
|
121
|
jpayne@69
|
122 /**
|
jpayne@69
|
123 * Destructor.
|
jpayne@69
|
124 * @stable ICU 2.4
|
jpayne@69
|
125 */
|
jpayne@69
|
126 virtual ~UnicodeSetIterator();
|
jpayne@69
|
127
|
jpayne@69
|
128 /**
|
jpayne@69
|
129 * Returns true if the current element is a string. If so, the
|
jpayne@69
|
130 * caller can retrieve it with <tt>getString()</tt>. If this
|
jpayne@69
|
131 * method returns false, the current element is a code point or
|
jpayne@69
|
132 * code point range, depending on whether <tt>next()</tt> or
|
jpayne@69
|
133 * <tt>nextRange()</tt> was called.
|
jpayne@69
|
134 * Elements of types string and codepoint can both be retrieved
|
jpayne@69
|
135 * with the function <tt>getString()</tt>.
|
jpayne@69
|
136 * Elements of type codepoint can also be retrieved with
|
jpayne@69
|
137 * <tt>getCodepoint()</tt>.
|
jpayne@69
|
138 * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
|
jpayne@69
|
139 * of the range, and <tt>getCodepointEnd()</tt> returns the end
|
jpayne@69
|
140 * of the range.
|
jpayne@69
|
141 * @stable ICU 2.4
|
jpayne@69
|
142 */
|
jpayne@69
|
143 inline UBool isString() const;
|
jpayne@69
|
144
|
jpayne@69
|
145 /**
|
jpayne@69
|
146 * Returns the current code point, if <tt>isString()</tt> returned
|
jpayne@69
|
147 * false. Otherwise returns an undefined result.
|
jpayne@69
|
148 * @stable ICU 2.4
|
jpayne@69
|
149 */
|
jpayne@69
|
150 inline UChar32 getCodepoint() const;
|
jpayne@69
|
151
|
jpayne@69
|
152 /**
|
jpayne@69
|
153 * Returns the end of the current code point range, if
|
jpayne@69
|
154 * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
|
jpayne@69
|
155 * called. Otherwise returns an undefined result.
|
jpayne@69
|
156 * @stable ICU 2.4
|
jpayne@69
|
157 */
|
jpayne@69
|
158 inline UChar32 getCodepointEnd() const;
|
jpayne@69
|
159
|
jpayne@69
|
160 /**
|
jpayne@69
|
161 * Returns the current string, if <tt>isString()</tt> returned
|
jpayne@69
|
162 * true. If the current iteration item is a code point, a UnicodeString
|
jpayne@69
|
163 * containing that single code point is returned.
|
jpayne@69
|
164 *
|
jpayne@69
|
165 * Ownership of the returned string remains with the iterator.
|
jpayne@69
|
166 * The string is guaranteed to remain valid only until the iterator is
|
jpayne@69
|
167 * advanced to the next item, or until the iterator is deleted.
|
jpayne@69
|
168 *
|
jpayne@69
|
169 * @stable ICU 2.4
|
jpayne@69
|
170 */
|
jpayne@69
|
171 const UnicodeString& getString();
|
jpayne@69
|
172
|
jpayne@69
|
173 /**
|
jpayne@69
|
174 * Advances the iteration position to the next element in the set,
|
jpayne@69
|
175 * which can be either a single code point or a string.
|
jpayne@69
|
176 * If there are no more elements in the set, return false.
|
jpayne@69
|
177 *
|
jpayne@69
|
178 * <p>
|
jpayne@69
|
179 * If <tt>isString() == TRUE</tt>, the value is a
|
jpayne@69
|
180 * string, otherwise the value is a
|
jpayne@69
|
181 * single code point. Elements of either type can be retrieved
|
jpayne@69
|
182 * with the function <tt>getString()</tt>, while elements of
|
jpayne@69
|
183 * consisting of a single code point can be retrieved with
|
jpayne@69
|
184 * <tt>getCodepoint()</tt>
|
jpayne@69
|
185 *
|
jpayne@69
|
186 * <p>The order of iteration is all code points in sorted order,
|
jpayne@69
|
187 * followed by all strings sorted order. Do not mix
|
jpayne@69
|
188 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
|
jpayne@69
|
189 * calling <tt>reset()</tt> between them. The results of doing so
|
jpayne@69
|
190 * are undefined.
|
jpayne@69
|
191 *
|
jpayne@69
|
192 * @return true if there was another element in the set.
|
jpayne@69
|
193 * @stable ICU 2.4
|
jpayne@69
|
194 */
|
jpayne@69
|
195 UBool next();
|
jpayne@69
|
196
|
jpayne@69
|
197 /**
|
jpayne@69
|
198 * Returns the next element in the set, either a code point range
|
jpayne@69
|
199 * or a string. If there are no more elements in the set, return
|
jpayne@69
|
200 * false. If <tt>isString() == TRUE</tt>, the value is a
|
jpayne@69
|
201 * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a
|
jpayne@69
|
202 * range of one or more code points from <tt>getCodepoint()</tt> to
|
jpayne@69
|
203 * <tt>getCodepointeEnd()</tt> inclusive.
|
jpayne@69
|
204 *
|
jpayne@69
|
205 * <p>The order of iteration is all code points ranges in sorted
|
jpayne@69
|
206 * order, followed by all strings sorted order. Ranges are
|
jpayne@69
|
207 * disjoint and non-contiguous. The value returned from <tt>getString()</tt>
|
jpayne@69
|
208 * is undefined unless <tt>isString() == TRUE</tt>. Do not mix calls to
|
jpayne@69
|
209 * <tt>next()</tt> and <tt>nextRange()</tt> without calling
|
jpayne@69
|
210 * <tt>reset()</tt> between them. The results of doing so are
|
jpayne@69
|
211 * undefined.
|
jpayne@69
|
212 *
|
jpayne@69
|
213 * @return true if there was another element in the set.
|
jpayne@69
|
214 * @stable ICU 2.4
|
jpayne@69
|
215 */
|
jpayne@69
|
216 UBool nextRange();
|
jpayne@69
|
217
|
jpayne@69
|
218 /**
|
jpayne@69
|
219 * Sets this iterator to visit the elements of the given set and
|
jpayne@69
|
220 * resets it to the start of that set. The iterator is valid only
|
jpayne@69
|
221 * so long as <tt>set</tt> is valid.
|
jpayne@69
|
222 * @param set the set to iterate over.
|
jpayne@69
|
223 * @stable ICU 2.4
|
jpayne@69
|
224 */
|
jpayne@69
|
225 void reset(const UnicodeSet& set);
|
jpayne@69
|
226
|
jpayne@69
|
227 /**
|
jpayne@69
|
228 * Resets this iterator to the start of the set.
|
jpayne@69
|
229 * @stable ICU 2.4
|
jpayne@69
|
230 */
|
jpayne@69
|
231 void reset();
|
jpayne@69
|
232
|
jpayne@69
|
233 /**
|
jpayne@69
|
234 * ICU "poor man's RTTI", returns a UClassID for this class.
|
jpayne@69
|
235 *
|
jpayne@69
|
236 * @stable ICU 2.4
|
jpayne@69
|
237 */
|
jpayne@69
|
238 static UClassID U_EXPORT2 getStaticClassID();
|
jpayne@69
|
239
|
jpayne@69
|
240 /**
|
jpayne@69
|
241 * ICU "poor man's RTTI", returns a UClassID for the actual class.
|
jpayne@69
|
242 *
|
jpayne@69
|
243 * @stable ICU 2.4
|
jpayne@69
|
244 */
|
jpayne@69
|
245 virtual UClassID getDynamicClassID() const;
|
jpayne@69
|
246
|
jpayne@69
|
247 // ======================= PRIVATES ===========================
|
jpayne@69
|
248
|
jpayne@69
|
249 protected:
|
jpayne@69
|
250
|
jpayne@69
|
251 // endElement and nextElements are really UChar32's, but we keep
|
jpayne@69
|
252 // them as signed int32_t's so we can do comparisons with
|
jpayne@69
|
253 // endElement set to -1. Leave them as int32_t's.
|
jpayne@69
|
254 /** The set
|
jpayne@69
|
255 * @stable ICU 2.4
|
jpayne@69
|
256 */
|
jpayne@69
|
257 const UnicodeSet* set;
|
jpayne@69
|
258 /** End range
|
jpayne@69
|
259 * @stable ICU 2.4
|
jpayne@69
|
260 */
|
jpayne@69
|
261 int32_t endRange;
|
jpayne@69
|
262 /** Range
|
jpayne@69
|
263 * @stable ICU 2.4
|
jpayne@69
|
264 */
|
jpayne@69
|
265 int32_t range;
|
jpayne@69
|
266 /** End element
|
jpayne@69
|
267 * @stable ICU 2.4
|
jpayne@69
|
268 */
|
jpayne@69
|
269 int32_t endElement;
|
jpayne@69
|
270 /** Next element
|
jpayne@69
|
271 * @stable ICU 2.4
|
jpayne@69
|
272 */
|
jpayne@69
|
273 int32_t nextElement;
|
jpayne@69
|
274 //UBool abbreviated;
|
jpayne@69
|
275 /** Next string
|
jpayne@69
|
276 * @stable ICU 2.4
|
jpayne@69
|
277 */
|
jpayne@69
|
278 int32_t nextString;
|
jpayne@69
|
279 /** String count
|
jpayne@69
|
280 * @stable ICU 2.4
|
jpayne@69
|
281 */
|
jpayne@69
|
282 int32_t stringCount;
|
jpayne@69
|
283
|
jpayne@69
|
284 /**
|
jpayne@69
|
285 * Points to the string to use when the caller asks for a
|
jpayne@69
|
286 * string and the current iteration item is a code point, not a string.
|
jpayne@69
|
287 * @internal
|
jpayne@69
|
288 */
|
jpayne@69
|
289 UnicodeString *cpString;
|
jpayne@69
|
290
|
jpayne@69
|
291 /** Copy constructor. Disallowed.
|
jpayne@69
|
292 * @stable ICU 2.4
|
jpayne@69
|
293 */
|
jpayne@69
|
294 UnicodeSetIterator(const UnicodeSetIterator&); // disallow
|
jpayne@69
|
295
|
jpayne@69
|
296 /** Assignment operator. Disallowed.
|
jpayne@69
|
297 * @stable ICU 2.4
|
jpayne@69
|
298 */
|
jpayne@69
|
299 UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
|
jpayne@69
|
300
|
jpayne@69
|
301 /** Load range
|
jpayne@69
|
302 * @stable ICU 2.4
|
jpayne@69
|
303 */
|
jpayne@69
|
304 virtual void loadRange(int32_t range);
|
jpayne@69
|
305
|
jpayne@69
|
306 };
|
jpayne@69
|
307
|
jpayne@69
|
308 inline UBool UnicodeSetIterator::isString() const {
|
jpayne@69
|
309 return codepoint == (UChar32)IS_STRING;
|
jpayne@69
|
310 }
|
jpayne@69
|
311
|
jpayne@69
|
312 inline UChar32 UnicodeSetIterator::getCodepoint() const {
|
jpayne@69
|
313 return codepoint;
|
jpayne@69
|
314 }
|
jpayne@69
|
315
|
jpayne@69
|
316 inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
|
jpayne@69
|
317 return codepointEnd;
|
jpayne@69
|
318 }
|
jpayne@69
|
319
|
jpayne@69
|
320
|
jpayne@69
|
321 U_NAMESPACE_END
|
jpayne@69
|
322
|
jpayne@69
|
323 #endif /* U_SHOW_CPLUSPLUS_API */
|
jpayne@69
|
324
|
jpayne@69
|
325 #endif
|