annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/usetiter.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 **********************************************************************
jpayne@69 5 * Copyright (c) 2002-2014, International Business Machines
jpayne@69 6 * Corporation and others. All Rights Reserved.
jpayne@69 7 **********************************************************************
jpayne@69 8 */
jpayne@69 9 #ifndef USETITER_H
jpayne@69 10 #define USETITER_H
jpayne@69 11
jpayne@69 12 #include "unicode/utypes.h"
jpayne@69 13
jpayne@69 14 #if U_SHOW_CPLUSPLUS_API
jpayne@69 15
jpayne@69 16 #include "unicode/uobject.h"
jpayne@69 17 #include "unicode/unistr.h"
jpayne@69 18
jpayne@69 19 /**
jpayne@69 20 * \file
jpayne@69 21 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
jpayne@69 22 */
jpayne@69 23
jpayne@69 24 U_NAMESPACE_BEGIN
jpayne@69 25
jpayne@69 26 class UnicodeSet;
jpayne@69 27 class UnicodeString;
jpayne@69 28
jpayne@69 29 /**
jpayne@69 30 *
jpayne@69 31 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
jpayne@69 32 * iterates over either code points or code point ranges. After all
jpayne@69 33 * code points or ranges have been returned, it returns the
jpayne@69 34 * multicharacter strings of the UnicodeSet, if any.
jpayne@69 35 *
jpayne@69 36 * This class is not intended to be subclassed. Consider any fields
jpayne@69 37 * or methods declared as "protected" to be private. The use of
jpayne@69 38 * protected in this class is an artifact of history.
jpayne@69 39 *
jpayne@69 40 * <p>To iterate over code points and strings, use a loop like this:
jpayne@69 41 * <pre>
jpayne@69 42 * UnicodeSetIterator it(set);
jpayne@69 43 * while (it.next()) {
jpayne@69 44 * processItem(it.getString());
jpayne@69 45 * }
jpayne@69 46 * </pre>
jpayne@69 47 * <p>Each item in the set is accessed as a string. Set elements
jpayne@69 48 * consisting of single code points are returned as strings containing
jpayne@69 49 * just the one code point.
jpayne@69 50 *
jpayne@69 51 * <p>To iterate over code point ranges, instead of individual code points,
jpayne@69 52 * use a loop like this:
jpayne@69 53 * <pre>
jpayne@69 54 * UnicodeSetIterator it(set);
jpayne@69 55 * while (it.nextRange()) {
jpayne@69 56 * if (it.isString()) {
jpayne@69 57 * processString(it.getString());
jpayne@69 58 * } else {
jpayne@69 59 * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
jpayne@69 60 * }
jpayne@69 61 * }
jpayne@69 62 * </pre>
jpayne@69 63 * @author M. Davis
jpayne@69 64 * @stable ICU 2.4
jpayne@69 65 */
jpayne@69 66 class U_COMMON_API UnicodeSetIterator : public UObject {
jpayne@69 67
jpayne@69 68 protected:
jpayne@69 69
jpayne@69 70 /**
jpayne@69 71 * Value of <tt>codepoint</tt> if the iterator points to a string.
jpayne@69 72 * If <tt>codepoint == IS_STRING</tt>, then examine
jpayne@69 73 * <tt>string</tt> for the current iteration result.
jpayne@69 74 * @stable ICU 2.4
jpayne@69 75 */
jpayne@69 76 enum { IS_STRING = -1 };
jpayne@69 77
jpayne@69 78 /**
jpayne@69 79 * Current code point, or the special value <tt>IS_STRING</tt>, if
jpayne@69 80 * the iterator points to a string.
jpayne@69 81 * @stable ICU 2.4
jpayne@69 82 */
jpayne@69 83 UChar32 codepoint;
jpayne@69 84
jpayne@69 85 /**
jpayne@69 86 * When iterating over ranges using <tt>nextRange()</tt>,
jpayne@69 87 * <tt>codepointEnd</tt> contains the inclusive end of the
jpayne@69 88 * iteration range, if <tt>codepoint != IS_STRING</tt>. If
jpayne@69 89 * iterating over code points using <tt>next()</tt>, or if
jpayne@69 90 * <tt>codepoint == IS_STRING</tt>, then the value of
jpayne@69 91 * <tt>codepointEnd</tt> is undefined.
jpayne@69 92 * @stable ICU 2.4
jpayne@69 93 */
jpayne@69 94 UChar32 codepointEnd;
jpayne@69 95
jpayne@69 96 /**
jpayne@69 97 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
jpayne@69 98 * to the current string. If <tt>codepoint != IS_STRING</tt>, the
jpayne@69 99 * value of <tt>string</tt> is undefined.
jpayne@69 100 * @stable ICU 2.4
jpayne@69 101 */
jpayne@69 102 const UnicodeString* string;
jpayne@69 103
jpayne@69 104 public:
jpayne@69 105
jpayne@69 106 /**
jpayne@69 107 * Create an iterator over the given set. The iterator is valid
jpayne@69 108 * only so long as <tt>set</tt> is valid.
jpayne@69 109 * @param set set to iterate over
jpayne@69 110 * @stable ICU 2.4
jpayne@69 111 */
jpayne@69 112 UnicodeSetIterator(const UnicodeSet& set);
jpayne@69 113
jpayne@69 114 /**
jpayne@69 115 * Create an iterator over nothing. <tt>next()</tt> and
jpayne@69 116 * <tt>nextRange()</tt> return false. This is a convenience
jpayne@69 117 * constructor allowing the target to be set later.
jpayne@69 118 * @stable ICU 2.4
jpayne@69 119 */
jpayne@69 120 UnicodeSetIterator();
jpayne@69 121
jpayne@69 122 /**
jpayne@69 123 * Destructor.
jpayne@69 124 * @stable ICU 2.4
jpayne@69 125 */
jpayne@69 126 virtual ~UnicodeSetIterator();
jpayne@69 127
jpayne@69 128 /**
jpayne@69 129 * Returns true if the current element is a string. If so, the
jpayne@69 130 * caller can retrieve it with <tt>getString()</tt>. If this
jpayne@69 131 * method returns false, the current element is a code point or
jpayne@69 132 * code point range, depending on whether <tt>next()</tt> or
jpayne@69 133 * <tt>nextRange()</tt> was called.
jpayne@69 134 * Elements of types string and codepoint can both be retrieved
jpayne@69 135 * with the function <tt>getString()</tt>.
jpayne@69 136 * Elements of type codepoint can also be retrieved with
jpayne@69 137 * <tt>getCodepoint()</tt>.
jpayne@69 138 * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
jpayne@69 139 * of the range, and <tt>getCodepointEnd()</tt> returns the end
jpayne@69 140 * of the range.
jpayne@69 141 * @stable ICU 2.4
jpayne@69 142 */
jpayne@69 143 inline UBool isString() const;
jpayne@69 144
jpayne@69 145 /**
jpayne@69 146 * Returns the current code point, if <tt>isString()</tt> returned
jpayne@69 147 * false. Otherwise returns an undefined result.
jpayne@69 148 * @stable ICU 2.4
jpayne@69 149 */
jpayne@69 150 inline UChar32 getCodepoint() const;
jpayne@69 151
jpayne@69 152 /**
jpayne@69 153 * Returns the end of the current code point range, if
jpayne@69 154 * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
jpayne@69 155 * called. Otherwise returns an undefined result.
jpayne@69 156 * @stable ICU 2.4
jpayne@69 157 */
jpayne@69 158 inline UChar32 getCodepointEnd() const;
jpayne@69 159
jpayne@69 160 /**
jpayne@69 161 * Returns the current string, if <tt>isString()</tt> returned
jpayne@69 162 * true. If the current iteration item is a code point, a UnicodeString
jpayne@69 163 * containing that single code point is returned.
jpayne@69 164 *
jpayne@69 165 * Ownership of the returned string remains with the iterator.
jpayne@69 166 * The string is guaranteed to remain valid only until the iterator is
jpayne@69 167 * advanced to the next item, or until the iterator is deleted.
jpayne@69 168 *
jpayne@69 169 * @stable ICU 2.4
jpayne@69 170 */
jpayne@69 171 const UnicodeString& getString();
jpayne@69 172
jpayne@69 173 /**
jpayne@69 174 * Advances the iteration position to the next element in the set,
jpayne@69 175 * which can be either a single code point or a string.
jpayne@69 176 * If there are no more elements in the set, return false.
jpayne@69 177 *
jpayne@69 178 * <p>
jpayne@69 179 * If <tt>isString() == TRUE</tt>, the value is a
jpayne@69 180 * string, otherwise the value is a
jpayne@69 181 * single code point. Elements of either type can be retrieved
jpayne@69 182 * with the function <tt>getString()</tt>, while elements of
jpayne@69 183 * consisting of a single code point can be retrieved with
jpayne@69 184 * <tt>getCodepoint()</tt>
jpayne@69 185 *
jpayne@69 186 * <p>The order of iteration is all code points in sorted order,
jpayne@69 187 * followed by all strings sorted order. Do not mix
jpayne@69 188 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
jpayne@69 189 * calling <tt>reset()</tt> between them. The results of doing so
jpayne@69 190 * are undefined.
jpayne@69 191 *
jpayne@69 192 * @return true if there was another element in the set.
jpayne@69 193 * @stable ICU 2.4
jpayne@69 194 */
jpayne@69 195 UBool next();
jpayne@69 196
jpayne@69 197 /**
jpayne@69 198 * Returns the next element in the set, either a code point range
jpayne@69 199 * or a string. If there are no more elements in the set, return
jpayne@69 200 * false. If <tt>isString() == TRUE</tt>, the value is a
jpayne@69 201 * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a
jpayne@69 202 * range of one or more code points from <tt>getCodepoint()</tt> to
jpayne@69 203 * <tt>getCodepointeEnd()</tt> inclusive.
jpayne@69 204 *
jpayne@69 205 * <p>The order of iteration is all code points ranges in sorted
jpayne@69 206 * order, followed by all strings sorted order. Ranges are
jpayne@69 207 * disjoint and non-contiguous. The value returned from <tt>getString()</tt>
jpayne@69 208 * is undefined unless <tt>isString() == TRUE</tt>. Do not mix calls to
jpayne@69 209 * <tt>next()</tt> and <tt>nextRange()</tt> without calling
jpayne@69 210 * <tt>reset()</tt> between them. The results of doing so are
jpayne@69 211 * undefined.
jpayne@69 212 *
jpayne@69 213 * @return true if there was another element in the set.
jpayne@69 214 * @stable ICU 2.4
jpayne@69 215 */
jpayne@69 216 UBool nextRange();
jpayne@69 217
jpayne@69 218 /**
jpayne@69 219 * Sets this iterator to visit the elements of the given set and
jpayne@69 220 * resets it to the start of that set. The iterator is valid only
jpayne@69 221 * so long as <tt>set</tt> is valid.
jpayne@69 222 * @param set the set to iterate over.
jpayne@69 223 * @stable ICU 2.4
jpayne@69 224 */
jpayne@69 225 void reset(const UnicodeSet& set);
jpayne@69 226
jpayne@69 227 /**
jpayne@69 228 * Resets this iterator to the start of the set.
jpayne@69 229 * @stable ICU 2.4
jpayne@69 230 */
jpayne@69 231 void reset();
jpayne@69 232
jpayne@69 233 /**
jpayne@69 234 * ICU "poor man's RTTI", returns a UClassID for this class.
jpayne@69 235 *
jpayne@69 236 * @stable ICU 2.4
jpayne@69 237 */
jpayne@69 238 static UClassID U_EXPORT2 getStaticClassID();
jpayne@69 239
jpayne@69 240 /**
jpayne@69 241 * ICU "poor man's RTTI", returns a UClassID for the actual class.
jpayne@69 242 *
jpayne@69 243 * @stable ICU 2.4
jpayne@69 244 */
jpayne@69 245 virtual UClassID getDynamicClassID() const;
jpayne@69 246
jpayne@69 247 // ======================= PRIVATES ===========================
jpayne@69 248
jpayne@69 249 protected:
jpayne@69 250
jpayne@69 251 // endElement and nextElements are really UChar32's, but we keep
jpayne@69 252 // them as signed int32_t's so we can do comparisons with
jpayne@69 253 // endElement set to -1. Leave them as int32_t's.
jpayne@69 254 /** The set
jpayne@69 255 * @stable ICU 2.4
jpayne@69 256 */
jpayne@69 257 const UnicodeSet* set;
jpayne@69 258 /** End range
jpayne@69 259 * @stable ICU 2.4
jpayne@69 260 */
jpayne@69 261 int32_t endRange;
jpayne@69 262 /** Range
jpayne@69 263 * @stable ICU 2.4
jpayne@69 264 */
jpayne@69 265 int32_t range;
jpayne@69 266 /** End element
jpayne@69 267 * @stable ICU 2.4
jpayne@69 268 */
jpayne@69 269 int32_t endElement;
jpayne@69 270 /** Next element
jpayne@69 271 * @stable ICU 2.4
jpayne@69 272 */
jpayne@69 273 int32_t nextElement;
jpayne@69 274 //UBool abbreviated;
jpayne@69 275 /** Next string
jpayne@69 276 * @stable ICU 2.4
jpayne@69 277 */
jpayne@69 278 int32_t nextString;
jpayne@69 279 /** String count
jpayne@69 280 * @stable ICU 2.4
jpayne@69 281 */
jpayne@69 282 int32_t stringCount;
jpayne@69 283
jpayne@69 284 /**
jpayne@69 285 * Points to the string to use when the caller asks for a
jpayne@69 286 * string and the current iteration item is a code point, not a string.
jpayne@69 287 * @internal
jpayne@69 288 */
jpayne@69 289 UnicodeString *cpString;
jpayne@69 290
jpayne@69 291 /** Copy constructor. Disallowed.
jpayne@69 292 * @stable ICU 2.4
jpayne@69 293 */
jpayne@69 294 UnicodeSetIterator(const UnicodeSetIterator&); // disallow
jpayne@69 295
jpayne@69 296 /** Assignment operator. Disallowed.
jpayne@69 297 * @stable ICU 2.4
jpayne@69 298 */
jpayne@69 299 UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
jpayne@69 300
jpayne@69 301 /** Load range
jpayne@69 302 * @stable ICU 2.4
jpayne@69 303 */
jpayne@69 304 virtual void loadRange(int32_t range);
jpayne@69 305
jpayne@69 306 };
jpayne@69 307
jpayne@69 308 inline UBool UnicodeSetIterator::isString() const {
jpayne@69 309 return codepoint == (UChar32)IS_STRING;
jpayne@69 310 }
jpayne@69 311
jpayne@69 312 inline UChar32 UnicodeSetIterator::getCodepoint() const {
jpayne@69 313 return codepoint;
jpayne@69 314 }
jpayne@69 315
jpayne@69 316 inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
jpayne@69 317 return codepointEnd;
jpayne@69 318 }
jpayne@69 319
jpayne@69 320
jpayne@69 321 U_NAMESPACE_END
jpayne@69 322
jpayne@69 323 #endif /* U_SHOW_CPLUSPLUS_API */
jpayne@69 324
jpayne@69 325 #endif