annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/uiter.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 *******************************************************************************
jpayne@69 5 *
jpayne@69 6 * Copyright (C) 2002-2011 International Business Machines
jpayne@69 7 * Corporation and others. All Rights Reserved.
jpayne@69 8 *
jpayne@69 9 *******************************************************************************
jpayne@69 10 * file name: uiter.h
jpayne@69 11 * encoding: UTF-8
jpayne@69 12 * tab size: 8 (not used)
jpayne@69 13 * indentation:4
jpayne@69 14 *
jpayne@69 15 * created on: 2002jan18
jpayne@69 16 * created by: Markus W. Scherer
jpayne@69 17 */
jpayne@69 18
jpayne@69 19 #ifndef __UITER_H__
jpayne@69 20 #define __UITER_H__
jpayne@69 21
jpayne@69 22 /**
jpayne@69 23 * \file
jpayne@69 24 * \brief C API: Unicode Character Iteration
jpayne@69 25 *
jpayne@69 26 * @see UCharIterator
jpayne@69 27 */
jpayne@69 28
jpayne@69 29 #include "unicode/utypes.h"
jpayne@69 30
jpayne@69 31 #if U_SHOW_CPLUSPLUS_API
jpayne@69 32 U_NAMESPACE_BEGIN
jpayne@69 33
jpayne@69 34 class CharacterIterator;
jpayne@69 35 class Replaceable;
jpayne@69 36
jpayne@69 37 U_NAMESPACE_END
jpayne@69 38 #endif
jpayne@69 39
jpayne@69 40 U_CDECL_BEGIN
jpayne@69 41
jpayne@69 42 struct UCharIterator;
jpayne@69 43 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
jpayne@69 44
jpayne@69 45 /**
jpayne@69 46 * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
jpayne@69 47 * @see UCharIteratorMove
jpayne@69 48 * @see UCharIterator
jpayne@69 49 * @stable ICU 2.1
jpayne@69 50 */
jpayne@69 51 typedef enum UCharIteratorOrigin {
jpayne@69 52 UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
jpayne@69 53 } UCharIteratorOrigin;
jpayne@69 54
jpayne@69 55 /** Constants for UCharIterator. @stable ICU 2.6 */
jpayne@69 56 enum {
jpayne@69 57 /**
jpayne@69 58 * Constant value that may be returned by UCharIteratorMove
jpayne@69 59 * indicating that the final UTF-16 index is not known, but that the move succeeded.
jpayne@69 60 * This can occur when moving relative to limit or length, or
jpayne@69 61 * when moving relative to the current index after a setState()
jpayne@69 62 * when the current UTF-16 index is not known.
jpayne@69 63 *
jpayne@69 64 * It would be very inefficient to have to count from the beginning of the text
jpayne@69 65 * just to get the current/limit/length index after moving relative to it.
jpayne@69 66 * The actual index can be determined with getIndex(UITER_CURRENT)
jpayne@69 67 * which will count the UChars if necessary.
jpayne@69 68 *
jpayne@69 69 * @stable ICU 2.6
jpayne@69 70 */
jpayne@69 71 UITER_UNKNOWN_INDEX=-2
jpayne@69 72 };
jpayne@69 73
jpayne@69 74
jpayne@69 75 /**
jpayne@69 76 * Constant for UCharIterator getState() indicating an error or
jpayne@69 77 * an unknown state.
jpayne@69 78 * Returned by uiter_getState()/UCharIteratorGetState
jpayne@69 79 * when an error occurs.
jpayne@69 80 * Also, some UCharIterator implementations may not be able to return
jpayne@69 81 * a valid state for each position. This will be clearly documented
jpayne@69 82 * for each such iterator (none of the public ones here).
jpayne@69 83 *
jpayne@69 84 * @stable ICU 2.6
jpayne@69 85 */
jpayne@69 86 #define UITER_NO_STATE ((uint32_t)0xffffffff)
jpayne@69 87
jpayne@69 88 /**
jpayne@69 89 * Function type declaration for UCharIterator.getIndex().
jpayne@69 90 *
jpayne@69 91 * Gets the current position, or the start or limit of the
jpayne@69 92 * iteration range.
jpayne@69 93 *
jpayne@69 94 * This function may perform slowly for UITER_CURRENT after setState() was called,
jpayne@69 95 * or for UITER_LENGTH, because an iterator implementation may have to count
jpayne@69 96 * UChars if the underlying storage is not UTF-16.
jpayne@69 97 *
jpayne@69 98 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 99 * @param origin get the 0, start, limit, length, or current index
jpayne@69 100 * @return the requested index, or U_SENTINEL in an error condition
jpayne@69 101 *
jpayne@69 102 * @see UCharIteratorOrigin
jpayne@69 103 * @see UCharIterator
jpayne@69 104 * @stable ICU 2.1
jpayne@69 105 */
jpayne@69 106 typedef int32_t U_CALLCONV
jpayne@69 107 UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
jpayne@69 108
jpayne@69 109 /**
jpayne@69 110 * Function type declaration for UCharIterator.move().
jpayne@69 111 *
jpayne@69 112 * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
jpayne@69 113 *
jpayne@69 114 * Moves the current position relative to the start or limit of the
jpayne@69 115 * iteration range, or relative to the current position itself.
jpayne@69 116 * The movement is expressed in numbers of code units forward
jpayne@69 117 * or backward by specifying a positive or negative delta.
jpayne@69 118 * Out of bounds movement will be pinned to the start or limit.
jpayne@69 119 *
jpayne@69 120 * This function may perform slowly for moving relative to UITER_LENGTH
jpayne@69 121 * because an iterator implementation may have to count the rest of the
jpayne@69 122 * UChars if the native storage is not UTF-16.
jpayne@69 123 *
jpayne@69 124 * When moving relative to the limit or length, or
jpayne@69 125 * relative to the current position after setState() was called,
jpayne@69 126 * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
jpayne@69 127 * determination of the actual UTF-16 index.
jpayne@69 128 * The actual index can be determined with getIndex(UITER_CURRENT)
jpayne@69 129 * which will count the UChars if necessary.
jpayne@69 130 * See UITER_UNKNOWN_INDEX for details.
jpayne@69 131 *
jpayne@69 132 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 133 * @param delta can be positive, zero, or negative
jpayne@69 134 * @param origin move relative to the 0, start, limit, length, or current index
jpayne@69 135 * @return the new index, or U_SENTINEL on an error condition,
jpayne@69 136 * or UITER_UNKNOWN_INDEX when the index is not known.
jpayne@69 137 *
jpayne@69 138 * @see UCharIteratorOrigin
jpayne@69 139 * @see UCharIterator
jpayne@69 140 * @see UITER_UNKNOWN_INDEX
jpayne@69 141 * @stable ICU 2.1
jpayne@69 142 */
jpayne@69 143 typedef int32_t U_CALLCONV
jpayne@69 144 UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
jpayne@69 145
jpayne@69 146 /**
jpayne@69 147 * Function type declaration for UCharIterator.hasNext().
jpayne@69 148 *
jpayne@69 149 * Check if current() and next() can still
jpayne@69 150 * return another code unit.
jpayne@69 151 *
jpayne@69 152 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 153 * @return boolean value for whether current() and next() can still return another code unit
jpayne@69 154 *
jpayne@69 155 * @see UCharIterator
jpayne@69 156 * @stable ICU 2.1
jpayne@69 157 */
jpayne@69 158 typedef UBool U_CALLCONV
jpayne@69 159 UCharIteratorHasNext(UCharIterator *iter);
jpayne@69 160
jpayne@69 161 /**
jpayne@69 162 * Function type declaration for UCharIterator.hasPrevious().
jpayne@69 163 *
jpayne@69 164 * Check if previous() can still return another code unit.
jpayne@69 165 *
jpayne@69 166 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 167 * @return boolean value for whether previous() can still return another code unit
jpayne@69 168 *
jpayne@69 169 * @see UCharIterator
jpayne@69 170 * @stable ICU 2.1
jpayne@69 171 */
jpayne@69 172 typedef UBool U_CALLCONV
jpayne@69 173 UCharIteratorHasPrevious(UCharIterator *iter);
jpayne@69 174
jpayne@69 175 /**
jpayne@69 176 * Function type declaration for UCharIterator.current().
jpayne@69 177 *
jpayne@69 178 * Return the code unit at the current position,
jpayne@69 179 * or U_SENTINEL if there is none (index is at the limit).
jpayne@69 180 *
jpayne@69 181 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 182 * @return the current code unit
jpayne@69 183 *
jpayne@69 184 * @see UCharIterator
jpayne@69 185 * @stable ICU 2.1
jpayne@69 186 */
jpayne@69 187 typedef UChar32 U_CALLCONV
jpayne@69 188 UCharIteratorCurrent(UCharIterator *iter);
jpayne@69 189
jpayne@69 190 /**
jpayne@69 191 * Function type declaration for UCharIterator.next().
jpayne@69 192 *
jpayne@69 193 * Return the code unit at the current index and increment
jpayne@69 194 * the index (post-increment, like s[i++]),
jpayne@69 195 * or return U_SENTINEL if there is none (index is at the limit).
jpayne@69 196 *
jpayne@69 197 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 198 * @return the current code unit (and post-increment the current index)
jpayne@69 199 *
jpayne@69 200 * @see UCharIterator
jpayne@69 201 * @stable ICU 2.1
jpayne@69 202 */
jpayne@69 203 typedef UChar32 U_CALLCONV
jpayne@69 204 UCharIteratorNext(UCharIterator *iter);
jpayne@69 205
jpayne@69 206 /**
jpayne@69 207 * Function type declaration for UCharIterator.previous().
jpayne@69 208 *
jpayne@69 209 * Decrement the index and return the code unit from there
jpayne@69 210 * (pre-decrement, like s[--i]),
jpayne@69 211 * or return U_SENTINEL if there is none (index is at the start).
jpayne@69 212 *
jpayne@69 213 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 214 * @return the previous code unit (after pre-decrementing the current index)
jpayne@69 215 *
jpayne@69 216 * @see UCharIterator
jpayne@69 217 * @stable ICU 2.1
jpayne@69 218 */
jpayne@69 219 typedef UChar32 U_CALLCONV
jpayne@69 220 UCharIteratorPrevious(UCharIterator *iter);
jpayne@69 221
jpayne@69 222 /**
jpayne@69 223 * Function type declaration for UCharIterator.reservedFn().
jpayne@69 224 * Reserved for future use.
jpayne@69 225 *
jpayne@69 226 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 227 * @param something some integer argument
jpayne@69 228 * @return some integer
jpayne@69 229 *
jpayne@69 230 * @see UCharIterator
jpayne@69 231 * @stable ICU 2.1
jpayne@69 232 */
jpayne@69 233 typedef int32_t U_CALLCONV
jpayne@69 234 UCharIteratorReserved(UCharIterator *iter, int32_t something);
jpayne@69 235
jpayne@69 236 /**
jpayne@69 237 * Function type declaration for UCharIterator.getState().
jpayne@69 238 *
jpayne@69 239 * Get the "state" of the iterator in the form of a single 32-bit word.
jpayne@69 240 * It is recommended that the state value be calculated to be as small as
jpayne@69 241 * is feasible. For strings with limited lengths, fewer than 32 bits may
jpayne@69 242 * be sufficient.
jpayne@69 243 *
jpayne@69 244 * This is used together with setState()/UCharIteratorSetState
jpayne@69 245 * to save and restore the iterator position more efficiently than with
jpayne@69 246 * getIndex()/move().
jpayne@69 247 *
jpayne@69 248 * The iterator state is defined as a uint32_t value because it is designed
jpayne@69 249 * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
jpayne@69 250 * of the character iterator.
jpayne@69 251 *
jpayne@69 252 * With some UCharIterator implementations (e.g., UTF-8),
jpayne@69 253 * getting and setting the UTF-16 index with existing functions
jpayne@69 254 * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
jpayne@69 255 * relatively slow because the iterator has to "walk" from a known index
jpayne@69 256 * to the requested one.
jpayne@69 257 * This takes more time the farther it needs to go.
jpayne@69 258 *
jpayne@69 259 * An opaque state value allows an iterator implementation to provide
jpayne@69 260 * an internal index (UTF-8: the source byte array index) for
jpayne@69 261 * fast, constant-time restoration.
jpayne@69 262 *
jpayne@69 263 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
jpayne@69 264 * the UTF-16 index may not be restored as well, but the iterator can deliver
jpayne@69 265 * the correct text contents and move relative to the current position
jpayne@69 266 * without performance degradation.
jpayne@69 267 *
jpayne@69 268 * Some UCharIterator implementations may not be able to return
jpayne@69 269 * a valid state for each position, in which case they return UITER_NO_STATE instead.
jpayne@69 270 * This will be clearly documented for each such iterator (none of the public ones here).
jpayne@69 271 *
jpayne@69 272 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 273 * @return the state word
jpayne@69 274 *
jpayne@69 275 * @see UCharIterator
jpayne@69 276 * @see UCharIteratorSetState
jpayne@69 277 * @see UITER_NO_STATE
jpayne@69 278 * @stable ICU 2.6
jpayne@69 279 */
jpayne@69 280 typedef uint32_t U_CALLCONV
jpayne@69 281 UCharIteratorGetState(const UCharIterator *iter);
jpayne@69 282
jpayne@69 283 /**
jpayne@69 284 * Function type declaration for UCharIterator.setState().
jpayne@69 285 *
jpayne@69 286 * Restore the "state" of the iterator using a state word from a getState() call.
jpayne@69 287 * The iterator object need not be the same one as for which getState() was called,
jpayne@69 288 * but it must be of the same type (set up using the same uiter_setXYZ function)
jpayne@69 289 * and it must iterate over the same string
jpayne@69 290 * (binary identical regardless of memory address).
jpayne@69 291 * For more about the state word see UCharIteratorGetState.
jpayne@69 292 *
jpayne@69 293 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
jpayne@69 294 * the UTF-16 index may not be restored as well, but the iterator can deliver
jpayne@69 295 * the correct text contents and move relative to the current position
jpayne@69 296 * without performance degradation.
jpayne@69 297 *
jpayne@69 298 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 299 * @param state the state word from a getState() call
jpayne@69 300 * on a same-type, same-string iterator
jpayne@69 301 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 302 * which must not indicate a failure before the function call.
jpayne@69 303 *
jpayne@69 304 * @see UCharIterator
jpayne@69 305 * @see UCharIteratorGetState
jpayne@69 306 * @stable ICU 2.6
jpayne@69 307 */
jpayne@69 308 typedef void U_CALLCONV
jpayne@69 309 UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
jpayne@69 310
jpayne@69 311
jpayne@69 312 /**
jpayne@69 313 * C API for code unit iteration.
jpayne@69 314 * This can be used as a C wrapper around
jpayne@69 315 * CharacterIterator, Replaceable, or implemented using simple strings, etc.
jpayne@69 316 *
jpayne@69 317 * There are two roles for using UCharIterator:
jpayne@69 318 *
jpayne@69 319 * A "provider" sets the necessary function pointers and controls the "protected"
jpayne@69 320 * fields of the UCharIterator structure. A "provider" passes a UCharIterator
jpayne@69 321 * into C APIs that need a UCharIterator as an abstract, flexible string interface.
jpayne@69 322 *
jpayne@69 323 * Implementations of such C APIs are "callers" of UCharIterator functions;
jpayne@69 324 * they only use the "public" function pointers and never access the "protected"
jpayne@69 325 * fields directly.
jpayne@69 326 *
jpayne@69 327 * The current() and next() functions only check the current index against the
jpayne@69 328 * limit, and previous() only checks the current index against the start,
jpayne@69 329 * to see if the iterator already reached the end of the iteration range.
jpayne@69 330 *
jpayne@69 331 * The assumption - in all iterators - is that the index is moved via the API,
jpayne@69 332 * which means it won't go out of bounds, or the index is modified by
jpayne@69 333 * user code that knows enough about the iterator implementation to set valid
jpayne@69 334 * index values.
jpayne@69 335 *
jpayne@69 336 * UCharIterator functions return code unit values 0..0xffff,
jpayne@69 337 * or U_SENTINEL if the iteration bounds are reached.
jpayne@69 338 *
jpayne@69 339 * @stable ICU 2.1
jpayne@69 340 */
jpayne@69 341 struct UCharIterator {
jpayne@69 342 /**
jpayne@69 343 * (protected) Pointer to string or wrapped object or similar.
jpayne@69 344 * Not used by caller.
jpayne@69 345 * @stable ICU 2.1
jpayne@69 346 */
jpayne@69 347 const void *context;
jpayne@69 348
jpayne@69 349 /**
jpayne@69 350 * (protected) Length of string or similar.
jpayne@69 351 * Not used by caller.
jpayne@69 352 * @stable ICU 2.1
jpayne@69 353 */
jpayne@69 354 int32_t length;
jpayne@69 355
jpayne@69 356 /**
jpayne@69 357 * (protected) Start index or similar.
jpayne@69 358 * Not used by caller.
jpayne@69 359 * @stable ICU 2.1
jpayne@69 360 */
jpayne@69 361 int32_t start;
jpayne@69 362
jpayne@69 363 /**
jpayne@69 364 * (protected) Current index or similar.
jpayne@69 365 * Not used by caller.
jpayne@69 366 * @stable ICU 2.1
jpayne@69 367 */
jpayne@69 368 int32_t index;
jpayne@69 369
jpayne@69 370 /**
jpayne@69 371 * (protected) Limit index or similar.
jpayne@69 372 * Not used by caller.
jpayne@69 373 * @stable ICU 2.1
jpayne@69 374 */
jpayne@69 375 int32_t limit;
jpayne@69 376
jpayne@69 377 /**
jpayne@69 378 * (protected) Used by UTF-8 iterators and possibly others.
jpayne@69 379 * @stable ICU 2.1
jpayne@69 380 */
jpayne@69 381 int32_t reservedField;
jpayne@69 382
jpayne@69 383 /**
jpayne@69 384 * (public) Returns the current position or the
jpayne@69 385 * start or limit index of the iteration range.
jpayne@69 386 *
jpayne@69 387 * @see UCharIteratorGetIndex
jpayne@69 388 * @stable ICU 2.1
jpayne@69 389 */
jpayne@69 390 UCharIteratorGetIndex *getIndex;
jpayne@69 391
jpayne@69 392 /**
jpayne@69 393 * (public) Moves the current position relative to the start or limit of the
jpayne@69 394 * iteration range, or relative to the current position itself.
jpayne@69 395 * The movement is expressed in numbers of code units forward
jpayne@69 396 * or backward by specifying a positive or negative delta.
jpayne@69 397 *
jpayne@69 398 * @see UCharIteratorMove
jpayne@69 399 * @stable ICU 2.1
jpayne@69 400 */
jpayne@69 401 UCharIteratorMove *move;
jpayne@69 402
jpayne@69 403 /**
jpayne@69 404 * (public) Check if current() and next() can still
jpayne@69 405 * return another code unit.
jpayne@69 406 *
jpayne@69 407 * @see UCharIteratorHasNext
jpayne@69 408 * @stable ICU 2.1
jpayne@69 409 */
jpayne@69 410 UCharIteratorHasNext *hasNext;
jpayne@69 411
jpayne@69 412 /**
jpayne@69 413 * (public) Check if previous() can still return another code unit.
jpayne@69 414 *
jpayne@69 415 * @see UCharIteratorHasPrevious
jpayne@69 416 * @stable ICU 2.1
jpayne@69 417 */
jpayne@69 418 UCharIteratorHasPrevious *hasPrevious;
jpayne@69 419
jpayne@69 420 /**
jpayne@69 421 * (public) Return the code unit at the current position,
jpayne@69 422 * or U_SENTINEL if there is none (index is at the limit).
jpayne@69 423 *
jpayne@69 424 * @see UCharIteratorCurrent
jpayne@69 425 * @stable ICU 2.1
jpayne@69 426 */
jpayne@69 427 UCharIteratorCurrent *current;
jpayne@69 428
jpayne@69 429 /**
jpayne@69 430 * (public) Return the code unit at the current index and increment
jpayne@69 431 * the index (post-increment, like s[i++]),
jpayne@69 432 * or return U_SENTINEL if there is none (index is at the limit).
jpayne@69 433 *
jpayne@69 434 * @see UCharIteratorNext
jpayne@69 435 * @stable ICU 2.1
jpayne@69 436 */
jpayne@69 437 UCharIteratorNext *next;
jpayne@69 438
jpayne@69 439 /**
jpayne@69 440 * (public) Decrement the index and return the code unit from there
jpayne@69 441 * (pre-decrement, like s[--i]),
jpayne@69 442 * or return U_SENTINEL if there is none (index is at the start).
jpayne@69 443 *
jpayne@69 444 * @see UCharIteratorPrevious
jpayne@69 445 * @stable ICU 2.1
jpayne@69 446 */
jpayne@69 447 UCharIteratorPrevious *previous;
jpayne@69 448
jpayne@69 449 /**
jpayne@69 450 * (public) Reserved for future use. Currently NULL.
jpayne@69 451 *
jpayne@69 452 * @see UCharIteratorReserved
jpayne@69 453 * @stable ICU 2.1
jpayne@69 454 */
jpayne@69 455 UCharIteratorReserved *reservedFn;
jpayne@69 456
jpayne@69 457 /**
jpayne@69 458 * (public) Return the state of the iterator, to be restored later with setState().
jpayne@69 459 * This function pointer is NULL if the iterator does not implement it.
jpayne@69 460 *
jpayne@69 461 * @see UCharIteratorGet
jpayne@69 462 * @stable ICU 2.6
jpayne@69 463 */
jpayne@69 464 UCharIteratorGetState *getState;
jpayne@69 465
jpayne@69 466 /**
jpayne@69 467 * (public) Restore the iterator state from the state word from a call
jpayne@69 468 * to getState().
jpayne@69 469 * This function pointer is NULL if the iterator does not implement it.
jpayne@69 470 *
jpayne@69 471 * @see UCharIteratorSet
jpayne@69 472 * @stable ICU 2.6
jpayne@69 473 */
jpayne@69 474 UCharIteratorSetState *setState;
jpayne@69 475 };
jpayne@69 476
jpayne@69 477 /**
jpayne@69 478 * Helper function for UCharIterator to get the code point
jpayne@69 479 * at the current index.
jpayne@69 480 *
jpayne@69 481 * Return the code point that includes the code unit at the current position,
jpayne@69 482 * or U_SENTINEL if there is none (index is at the limit).
jpayne@69 483 * If the current code unit is a lead or trail surrogate,
jpayne@69 484 * then the following or preceding surrogate is used to form
jpayne@69 485 * the code point value.
jpayne@69 486 *
jpayne@69 487 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 488 * @return the current code point
jpayne@69 489 *
jpayne@69 490 * @see UCharIterator
jpayne@69 491 * @see U16_GET
jpayne@69 492 * @see UnicodeString::char32At()
jpayne@69 493 * @stable ICU 2.1
jpayne@69 494 */
jpayne@69 495 U_STABLE UChar32 U_EXPORT2
jpayne@69 496 uiter_current32(UCharIterator *iter);
jpayne@69 497
jpayne@69 498 /**
jpayne@69 499 * Helper function for UCharIterator to get the next code point.
jpayne@69 500 *
jpayne@69 501 * Return the code point at the current index and increment
jpayne@69 502 * the index (post-increment, like s[i++]),
jpayne@69 503 * or return U_SENTINEL if there is none (index is at the limit).
jpayne@69 504 *
jpayne@69 505 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 506 * @return the current code point (and post-increment the current index)
jpayne@69 507 *
jpayne@69 508 * @see UCharIterator
jpayne@69 509 * @see U16_NEXT
jpayne@69 510 * @stable ICU 2.1
jpayne@69 511 */
jpayne@69 512 U_STABLE UChar32 U_EXPORT2
jpayne@69 513 uiter_next32(UCharIterator *iter);
jpayne@69 514
jpayne@69 515 /**
jpayne@69 516 * Helper function for UCharIterator to get the previous code point.
jpayne@69 517 *
jpayne@69 518 * Decrement the index and return the code point from there
jpayne@69 519 * (pre-decrement, like s[--i]),
jpayne@69 520 * or return U_SENTINEL if there is none (index is at the start).
jpayne@69 521 *
jpayne@69 522 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 523 * @return the previous code point (after pre-decrementing the current index)
jpayne@69 524 *
jpayne@69 525 * @see UCharIterator
jpayne@69 526 * @see U16_PREV
jpayne@69 527 * @stable ICU 2.1
jpayne@69 528 */
jpayne@69 529 U_STABLE UChar32 U_EXPORT2
jpayne@69 530 uiter_previous32(UCharIterator *iter);
jpayne@69 531
jpayne@69 532 /**
jpayne@69 533 * Get the "state" of the iterator in the form of a single 32-bit word.
jpayne@69 534 * This is a convenience function that calls iter->getState(iter)
jpayne@69 535 * if iter->getState is not NULL;
jpayne@69 536 * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
jpayne@69 537 *
jpayne@69 538 * Some UCharIterator implementations may not be able to return
jpayne@69 539 * a valid state for each position, in which case they return UITER_NO_STATE instead.
jpayne@69 540 * This will be clearly documented for each such iterator (none of the public ones here).
jpayne@69 541 *
jpayne@69 542 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 543 * @return the state word
jpayne@69 544 *
jpayne@69 545 * @see UCharIterator
jpayne@69 546 * @see UCharIteratorGetState
jpayne@69 547 * @see UITER_NO_STATE
jpayne@69 548 * @stable ICU 2.6
jpayne@69 549 */
jpayne@69 550 U_STABLE uint32_t U_EXPORT2
jpayne@69 551 uiter_getState(const UCharIterator *iter);
jpayne@69 552
jpayne@69 553 /**
jpayne@69 554 * Restore the "state" of the iterator using a state word from a getState() call.
jpayne@69 555 * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
jpayne@69 556 * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
jpayne@69 557 *
jpayne@69 558 * @param iter the UCharIterator structure ("this pointer")
jpayne@69 559 * @param state the state word from a getState() call
jpayne@69 560 * on a same-type, same-string iterator
jpayne@69 561 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 562 * which must not indicate a failure before the function call.
jpayne@69 563 *
jpayne@69 564 * @see UCharIterator
jpayne@69 565 * @see UCharIteratorSetState
jpayne@69 566 * @stable ICU 2.6
jpayne@69 567 */
jpayne@69 568 U_STABLE void U_EXPORT2
jpayne@69 569 uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
jpayne@69 570
jpayne@69 571 /**
jpayne@69 572 * Set up a UCharIterator to iterate over a string.
jpayne@69 573 *
jpayne@69 574 * Sets the UCharIterator function pointers for iteration over the string s
jpayne@69 575 * with iteration boundaries start=index=0 and length=limit=string length.
jpayne@69 576 * The "provider" may set the start, index, and limit values at any time
jpayne@69 577 * within the range 0..length.
jpayne@69 578 * The length field will be ignored.
jpayne@69 579 *
jpayne@69 580 * The string pointer s is set into UCharIterator.context without copying
jpayne@69 581 * or reallocating the string contents.
jpayne@69 582 *
jpayne@69 583 * getState() simply returns the current index.
jpayne@69 584 * move() will always return the final index.
jpayne@69 585 *
jpayne@69 586 * @param iter UCharIterator structure to be set for iteration
jpayne@69 587 * @param s String to iterate over
jpayne@69 588 * @param length Length of s, or -1 if NUL-terminated
jpayne@69 589 *
jpayne@69 590 * @see UCharIterator
jpayne@69 591 * @stable ICU 2.1
jpayne@69 592 */
jpayne@69 593 U_STABLE void U_EXPORT2
jpayne@69 594 uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
jpayne@69 595
jpayne@69 596 /**
jpayne@69 597 * Set up a UCharIterator to iterate over a UTF-16BE string
jpayne@69 598 * (byte vector with a big-endian pair of bytes per UChar).
jpayne@69 599 *
jpayne@69 600 * Everything works just like with a normal UChar iterator (uiter_setString),
jpayne@69 601 * except that UChars are assembled from byte pairs,
jpayne@69 602 * and that the length argument here indicates an even number of bytes.
jpayne@69 603 *
jpayne@69 604 * getState() simply returns the current index.
jpayne@69 605 * move() will always return the final index.
jpayne@69 606 *
jpayne@69 607 * @param iter UCharIterator structure to be set for iteration
jpayne@69 608 * @param s UTF-16BE string to iterate over
jpayne@69 609 * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
jpayne@69 610 * (NUL means pair of 0 bytes at even index from s)
jpayne@69 611 *
jpayne@69 612 * @see UCharIterator
jpayne@69 613 * @see uiter_setString
jpayne@69 614 * @stable ICU 2.6
jpayne@69 615 */
jpayne@69 616 U_STABLE void U_EXPORT2
jpayne@69 617 uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
jpayne@69 618
jpayne@69 619 /**
jpayne@69 620 * Set up a UCharIterator to iterate over a UTF-8 string.
jpayne@69 621 *
jpayne@69 622 * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
jpayne@69 623 * with UTF-8 iteration boundaries 0 and length.
jpayne@69 624 * The implementation counts the UTF-16 index on the fly and
jpayne@69 625 * lazily evaluates the UTF-16 length of the text.
jpayne@69 626 *
jpayne@69 627 * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
jpayne@69 628 * When the reservedField is not 0, then it contains a supplementary code point
jpayne@69 629 * and the UTF-16 index is between the two corresponding surrogates.
jpayne@69 630 * At that point, the UTF-8 index is behind that code point.
jpayne@69 631 *
jpayne@69 632 * The UTF-8 string pointer s is set into UCharIterator.context without copying
jpayne@69 633 * or reallocating the string contents.
jpayne@69 634 *
jpayne@69 635 * getState() returns a state value consisting of
jpayne@69 636 * - the current UTF-8 source byte index (bits 31..1)
jpayne@69 637 * - a flag (bit 0) that indicates whether the UChar position is in the middle
jpayne@69 638 * of a surrogate pair
jpayne@69 639 * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
jpayne@69 640 *
jpayne@69 641 * getState() cannot also encode the UTF-16 index in the state value.
jpayne@69 642 * move(relative to limit or length), or
jpayne@69 643 * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
jpayne@69 644 *
jpayne@69 645 * @param iter UCharIterator structure to be set for iteration
jpayne@69 646 * @param s UTF-8 string to iterate over
jpayne@69 647 * @param length Length of s in bytes, or -1 if NUL-terminated
jpayne@69 648 *
jpayne@69 649 * @see UCharIterator
jpayne@69 650 * @stable ICU 2.6
jpayne@69 651 */
jpayne@69 652 U_STABLE void U_EXPORT2
jpayne@69 653 uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
jpayne@69 654
jpayne@69 655 #if U_SHOW_CPLUSPLUS_API
jpayne@69 656
jpayne@69 657 /**
jpayne@69 658 * Set up a UCharIterator to wrap around a C++ CharacterIterator.
jpayne@69 659 *
jpayne@69 660 * Sets the UCharIterator function pointers for iteration using the
jpayne@69 661 * CharacterIterator charIter.
jpayne@69 662 *
jpayne@69 663 * The CharacterIterator pointer charIter is set into UCharIterator.context
jpayne@69 664 * without copying or cloning the CharacterIterator object.
jpayne@69 665 * The other "protected" UCharIterator fields are set to 0 and will be ignored.
jpayne@69 666 * The iteration index and boundaries are controlled by the CharacterIterator.
jpayne@69 667 *
jpayne@69 668 * getState() simply returns the current index.
jpayne@69 669 * move() will always return the final index.
jpayne@69 670 *
jpayne@69 671 * @param iter UCharIterator structure to be set for iteration
jpayne@69 672 * @param charIter CharacterIterator to wrap
jpayne@69 673 *
jpayne@69 674 * @see UCharIterator
jpayne@69 675 * @stable ICU 2.1
jpayne@69 676 */
jpayne@69 677 U_STABLE void U_EXPORT2
jpayne@69 678 uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
jpayne@69 679
jpayne@69 680 /**
jpayne@69 681 * Set up a UCharIterator to iterate over a C++ Replaceable.
jpayne@69 682 *
jpayne@69 683 * Sets the UCharIterator function pointers for iteration over the
jpayne@69 684 * Replaceable rep with iteration boundaries start=index=0 and
jpayne@69 685 * length=limit=rep->length().
jpayne@69 686 * The "provider" may set the start, index, and limit values at any time
jpayne@69 687 * within the range 0..length=rep->length().
jpayne@69 688 * The length field will be ignored.
jpayne@69 689 *
jpayne@69 690 * The Replaceable pointer rep is set into UCharIterator.context without copying
jpayne@69 691 * or cloning/reallocating the Replaceable object.
jpayne@69 692 *
jpayne@69 693 * getState() simply returns the current index.
jpayne@69 694 * move() will always return the final index.
jpayne@69 695 *
jpayne@69 696 * @param iter UCharIterator structure to be set for iteration
jpayne@69 697 * @param rep Replaceable to iterate over
jpayne@69 698 *
jpayne@69 699 * @see UCharIterator
jpayne@69 700 * @stable ICU 2.1
jpayne@69 701 */
jpayne@69 702 U_STABLE void U_EXPORT2
jpayne@69 703 uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
jpayne@69 704
jpayne@69 705 #endif
jpayne@69 706
jpayne@69 707 U_CDECL_END
jpayne@69 708
jpayne@69 709 #endif