jpayne@69: // © 2016 and later: Unicode, Inc. and others. jpayne@69: // License & terms of use: http://www.unicode.org/copyright.html jpayne@69: /* jpayne@69: ******************************************************************************* jpayne@69: * jpayne@69: * Copyright (C) 2002-2011 International Business Machines jpayne@69: * Corporation and others. All Rights Reserved. jpayne@69: * jpayne@69: ******************************************************************************* jpayne@69: * file name: uiter.h jpayne@69: * encoding: UTF-8 jpayne@69: * tab size: 8 (not used) jpayne@69: * indentation:4 jpayne@69: * jpayne@69: * created on: 2002jan18 jpayne@69: * created by: Markus W. Scherer jpayne@69: */ jpayne@69: jpayne@69: #ifndef __UITER_H__ jpayne@69: #define __UITER_H__ jpayne@69: jpayne@69: /** jpayne@69: * \file jpayne@69: * \brief C API: Unicode Character Iteration jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: */ jpayne@69: jpayne@69: #include "unicode/utypes.h" jpayne@69: jpayne@69: #if U_SHOW_CPLUSPLUS_API jpayne@69: U_NAMESPACE_BEGIN jpayne@69: jpayne@69: class CharacterIterator; jpayne@69: class Replaceable; jpayne@69: jpayne@69: U_NAMESPACE_END jpayne@69: #endif jpayne@69: jpayne@69: U_CDECL_BEGIN jpayne@69: jpayne@69: struct UCharIterator; jpayne@69: typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ jpayne@69: jpayne@69: /** jpayne@69: * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). jpayne@69: * @see UCharIteratorMove jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef enum UCharIteratorOrigin { jpayne@69: UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH jpayne@69: } UCharIteratorOrigin; jpayne@69: jpayne@69: /** Constants for UCharIterator. @stable ICU 2.6 */ jpayne@69: enum { jpayne@69: /** jpayne@69: * Constant value that may be returned by UCharIteratorMove jpayne@69: * indicating that the final UTF-16 index is not known, but that the move succeeded. jpayne@69: * This can occur when moving relative to limit or length, or jpayne@69: * when moving relative to the current index after a setState() jpayne@69: * when the current UTF-16 index is not known. jpayne@69: * jpayne@69: * It would be very inefficient to have to count from the beginning of the text jpayne@69: * just to get the current/limit/length index after moving relative to it. jpayne@69: * The actual index can be determined with getIndex(UITER_CURRENT) jpayne@69: * which will count the UChars if necessary. jpayne@69: * jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: UITER_UNKNOWN_INDEX=-2 jpayne@69: }; jpayne@69: jpayne@69: jpayne@69: /** jpayne@69: * Constant for UCharIterator getState() indicating an error or jpayne@69: * an unknown state. jpayne@69: * Returned by uiter_getState()/UCharIteratorGetState jpayne@69: * when an error occurs. jpayne@69: * Also, some UCharIterator implementations may not be able to return jpayne@69: * a valid state for each position. This will be clearly documented jpayne@69: * for each such iterator (none of the public ones here). jpayne@69: * jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: #define UITER_NO_STATE ((uint32_t)0xffffffff) jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.getIndex(). jpayne@69: * jpayne@69: * Gets the current position, or the start or limit of the jpayne@69: * iteration range. jpayne@69: * jpayne@69: * This function may perform slowly for UITER_CURRENT after setState() was called, jpayne@69: * or for UITER_LENGTH, because an iterator implementation may have to count jpayne@69: * UChars if the underlying storage is not UTF-16. jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @param origin get the 0, start, limit, length, or current index jpayne@69: * @return the requested index, or U_SENTINEL in an error condition jpayne@69: * jpayne@69: * @see UCharIteratorOrigin jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef int32_t U_CALLCONV jpayne@69: UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.move(). jpayne@69: * jpayne@69: * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). jpayne@69: * jpayne@69: * Moves the current position relative to the start or limit of the jpayne@69: * iteration range, or relative to the current position itself. jpayne@69: * The movement is expressed in numbers of code units forward jpayne@69: * or backward by specifying a positive or negative delta. jpayne@69: * Out of bounds movement will be pinned to the start or limit. jpayne@69: * jpayne@69: * This function may perform slowly for moving relative to UITER_LENGTH jpayne@69: * because an iterator implementation may have to count the rest of the jpayne@69: * UChars if the native storage is not UTF-16. jpayne@69: * jpayne@69: * When moving relative to the limit or length, or jpayne@69: * relative to the current position after setState() was called, jpayne@69: * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient jpayne@69: * determination of the actual UTF-16 index. jpayne@69: * The actual index can be determined with getIndex(UITER_CURRENT) jpayne@69: * which will count the UChars if necessary. jpayne@69: * See UITER_UNKNOWN_INDEX for details. jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @param delta can be positive, zero, or negative jpayne@69: * @param origin move relative to the 0, start, limit, length, or current index jpayne@69: * @return the new index, or U_SENTINEL on an error condition, jpayne@69: * or UITER_UNKNOWN_INDEX when the index is not known. jpayne@69: * jpayne@69: * @see UCharIteratorOrigin jpayne@69: * @see UCharIterator jpayne@69: * @see UITER_UNKNOWN_INDEX jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef int32_t U_CALLCONV jpayne@69: UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.hasNext(). jpayne@69: * jpayne@69: * Check if current() and next() can still jpayne@69: * return another code unit. jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return boolean value for whether current() and next() can still return another code unit jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef UBool U_CALLCONV jpayne@69: UCharIteratorHasNext(UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.hasPrevious(). jpayne@69: * jpayne@69: * Check if previous() can still return another code unit. jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return boolean value for whether previous() can still return another code unit jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef UBool U_CALLCONV jpayne@69: UCharIteratorHasPrevious(UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.current(). jpayne@69: * jpayne@69: * Return the code unit at the current position, jpayne@69: * or U_SENTINEL if there is none (index is at the limit). jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return the current code unit jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef UChar32 U_CALLCONV jpayne@69: UCharIteratorCurrent(UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.next(). jpayne@69: * jpayne@69: * Return the code unit at the current index and increment jpayne@69: * the index (post-increment, like s[i++]), jpayne@69: * or return U_SENTINEL if there is none (index is at the limit). jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return the current code unit (and post-increment the current index) jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef UChar32 U_CALLCONV jpayne@69: UCharIteratorNext(UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.previous(). jpayne@69: * jpayne@69: * Decrement the index and return the code unit from there jpayne@69: * (pre-decrement, like s[--i]), jpayne@69: * or return U_SENTINEL if there is none (index is at the start). jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return the previous code unit (after pre-decrementing the current index) jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef UChar32 U_CALLCONV jpayne@69: UCharIteratorPrevious(UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.reservedFn(). jpayne@69: * Reserved for future use. jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @param something some integer argument jpayne@69: * @return some integer jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: typedef int32_t U_CALLCONV jpayne@69: UCharIteratorReserved(UCharIterator *iter, int32_t something); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.getState(). jpayne@69: * jpayne@69: * Get the "state" of the iterator in the form of a single 32-bit word. jpayne@69: * It is recommended that the state value be calculated to be as small as jpayne@69: * is feasible. For strings with limited lengths, fewer than 32 bits may jpayne@69: * be sufficient. jpayne@69: * jpayne@69: * This is used together with setState()/UCharIteratorSetState jpayne@69: * to save and restore the iterator position more efficiently than with jpayne@69: * getIndex()/move(). jpayne@69: * jpayne@69: * The iterator state is defined as a uint32_t value because it is designed jpayne@69: * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state jpayne@69: * of the character iterator. jpayne@69: * jpayne@69: * With some UCharIterator implementations (e.g., UTF-8), jpayne@69: * getting and setting the UTF-16 index with existing functions jpayne@69: * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but jpayne@69: * relatively slow because the iterator has to "walk" from a known index jpayne@69: * to the requested one. jpayne@69: * This takes more time the farther it needs to go. jpayne@69: * jpayne@69: * An opaque state value allows an iterator implementation to provide jpayne@69: * an internal index (UTF-8: the source byte array index) for jpayne@69: * fast, constant-time restoration. jpayne@69: * jpayne@69: * After calling setState(), a getIndex(UITER_CURRENT) may be slow because jpayne@69: * the UTF-16 index may not be restored as well, but the iterator can deliver jpayne@69: * the correct text contents and move relative to the current position jpayne@69: * without performance degradation. jpayne@69: * jpayne@69: * Some UCharIterator implementations may not be able to return jpayne@69: * a valid state for each position, in which case they return UITER_NO_STATE instead. jpayne@69: * This will be clearly documented for each such iterator (none of the public ones here). jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return the state word jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @see UCharIteratorSetState jpayne@69: * @see UITER_NO_STATE jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: typedef uint32_t U_CALLCONV jpayne@69: UCharIteratorGetState(const UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Function type declaration for UCharIterator.setState(). jpayne@69: * jpayne@69: * Restore the "state" of the iterator using a state word from a getState() call. jpayne@69: * The iterator object need not be the same one as for which getState() was called, jpayne@69: * but it must be of the same type (set up using the same uiter_setXYZ function) jpayne@69: * and it must iterate over the same string jpayne@69: * (binary identical regardless of memory address). jpayne@69: * For more about the state word see UCharIteratorGetState. jpayne@69: * jpayne@69: * After calling setState(), a getIndex(UITER_CURRENT) may be slow because jpayne@69: * the UTF-16 index may not be restored as well, but the iterator can deliver jpayne@69: * the correct text contents and move relative to the current position jpayne@69: * without performance degradation. jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @param state the state word from a getState() call jpayne@69: * on a same-type, same-string iterator jpayne@69: * @param pErrorCode Must be a valid pointer to an error code value, jpayne@69: * which must not indicate a failure before the function call. jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @see UCharIteratorGetState jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: typedef void U_CALLCONV jpayne@69: UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); jpayne@69: jpayne@69: jpayne@69: /** jpayne@69: * C API for code unit iteration. jpayne@69: * This can be used as a C wrapper around jpayne@69: * CharacterIterator, Replaceable, or implemented using simple strings, etc. jpayne@69: * jpayne@69: * There are two roles for using UCharIterator: jpayne@69: * jpayne@69: * A "provider" sets the necessary function pointers and controls the "protected" jpayne@69: * fields of the UCharIterator structure. A "provider" passes a UCharIterator jpayne@69: * into C APIs that need a UCharIterator as an abstract, flexible string interface. jpayne@69: * jpayne@69: * Implementations of such C APIs are "callers" of UCharIterator functions; jpayne@69: * they only use the "public" function pointers and never access the "protected" jpayne@69: * fields directly. jpayne@69: * jpayne@69: * The current() and next() functions only check the current index against the jpayne@69: * limit, and previous() only checks the current index against the start, jpayne@69: * to see if the iterator already reached the end of the iteration range. jpayne@69: * jpayne@69: * The assumption - in all iterators - is that the index is moved via the API, jpayne@69: * which means it won't go out of bounds, or the index is modified by jpayne@69: * user code that knows enough about the iterator implementation to set valid jpayne@69: * index values. jpayne@69: * jpayne@69: * UCharIterator functions return code unit values 0..0xffff, jpayne@69: * or U_SENTINEL if the iteration bounds are reached. jpayne@69: * jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: struct UCharIterator { jpayne@69: /** jpayne@69: * (protected) Pointer to string or wrapped object or similar. jpayne@69: * Not used by caller. jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: const void *context; jpayne@69: jpayne@69: /** jpayne@69: * (protected) Length of string or similar. jpayne@69: * Not used by caller. jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: int32_t length; jpayne@69: jpayne@69: /** jpayne@69: * (protected) Start index or similar. jpayne@69: * Not used by caller. jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: int32_t start; jpayne@69: jpayne@69: /** jpayne@69: * (protected) Current index or similar. jpayne@69: * Not used by caller. jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: int32_t index; jpayne@69: jpayne@69: /** jpayne@69: * (protected) Limit index or similar. jpayne@69: * Not used by caller. jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: int32_t limit; jpayne@69: jpayne@69: /** jpayne@69: * (protected) Used by UTF-8 iterators and possibly others. jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: int32_t reservedField; jpayne@69: jpayne@69: /** jpayne@69: * (public) Returns the current position or the jpayne@69: * start or limit index of the iteration range. jpayne@69: * jpayne@69: * @see UCharIteratorGetIndex jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: UCharIteratorGetIndex *getIndex; jpayne@69: jpayne@69: /** jpayne@69: * (public) Moves the current position relative to the start or limit of the jpayne@69: * iteration range, or relative to the current position itself. jpayne@69: * The movement is expressed in numbers of code units forward jpayne@69: * or backward by specifying a positive or negative delta. jpayne@69: * jpayne@69: * @see UCharIteratorMove jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: UCharIteratorMove *move; jpayne@69: jpayne@69: /** jpayne@69: * (public) Check if current() and next() can still jpayne@69: * return another code unit. jpayne@69: * jpayne@69: * @see UCharIteratorHasNext jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: UCharIteratorHasNext *hasNext; jpayne@69: jpayne@69: /** jpayne@69: * (public) Check if previous() can still return another code unit. jpayne@69: * jpayne@69: * @see UCharIteratorHasPrevious jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: UCharIteratorHasPrevious *hasPrevious; jpayne@69: jpayne@69: /** jpayne@69: * (public) Return the code unit at the current position, jpayne@69: * or U_SENTINEL if there is none (index is at the limit). jpayne@69: * jpayne@69: * @see UCharIteratorCurrent jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: UCharIteratorCurrent *current; jpayne@69: jpayne@69: /** jpayne@69: * (public) Return the code unit at the current index and increment jpayne@69: * the index (post-increment, like s[i++]), jpayne@69: * or return U_SENTINEL if there is none (index is at the limit). jpayne@69: * jpayne@69: * @see UCharIteratorNext jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: UCharIteratorNext *next; jpayne@69: jpayne@69: /** jpayne@69: * (public) Decrement the index and return the code unit from there jpayne@69: * (pre-decrement, like s[--i]), jpayne@69: * or return U_SENTINEL if there is none (index is at the start). jpayne@69: * jpayne@69: * @see UCharIteratorPrevious jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: UCharIteratorPrevious *previous; jpayne@69: jpayne@69: /** jpayne@69: * (public) Reserved for future use. Currently NULL. jpayne@69: * jpayne@69: * @see UCharIteratorReserved jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: UCharIteratorReserved *reservedFn; jpayne@69: jpayne@69: /** jpayne@69: * (public) Return the state of the iterator, to be restored later with setState(). jpayne@69: * This function pointer is NULL if the iterator does not implement it. jpayne@69: * jpayne@69: * @see UCharIteratorGet jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: UCharIteratorGetState *getState; jpayne@69: jpayne@69: /** jpayne@69: * (public) Restore the iterator state from the state word from a call jpayne@69: * to getState(). jpayne@69: * This function pointer is NULL if the iterator does not implement it. jpayne@69: * jpayne@69: * @see UCharIteratorSet jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: UCharIteratorSetState *setState; jpayne@69: }; jpayne@69: jpayne@69: /** jpayne@69: * Helper function for UCharIterator to get the code point jpayne@69: * at the current index. jpayne@69: * jpayne@69: * Return the code point that includes the code unit at the current position, jpayne@69: * or U_SENTINEL if there is none (index is at the limit). jpayne@69: * If the current code unit is a lead or trail surrogate, jpayne@69: * then the following or preceding surrogate is used to form jpayne@69: * the code point value. jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return the current code point jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @see U16_GET jpayne@69: * @see UnicodeString::char32At() jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: U_STABLE UChar32 U_EXPORT2 jpayne@69: uiter_current32(UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Helper function for UCharIterator to get the next code point. jpayne@69: * jpayne@69: * Return the code point at the current index and increment jpayne@69: * the index (post-increment, like s[i++]), jpayne@69: * or return U_SENTINEL if there is none (index is at the limit). jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return the current code point (and post-increment the current index) jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @see U16_NEXT jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: U_STABLE UChar32 U_EXPORT2 jpayne@69: uiter_next32(UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Helper function for UCharIterator to get the previous code point. jpayne@69: * jpayne@69: * Decrement the index and return the code point from there jpayne@69: * (pre-decrement, like s[--i]), jpayne@69: * or return U_SENTINEL if there is none (index is at the start). jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return the previous code point (after pre-decrementing the current index) jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @see U16_PREV jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: U_STABLE UChar32 U_EXPORT2 jpayne@69: uiter_previous32(UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Get the "state" of the iterator in the form of a single 32-bit word. jpayne@69: * This is a convenience function that calls iter->getState(iter) jpayne@69: * if iter->getState is not NULL; jpayne@69: * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. jpayne@69: * jpayne@69: * Some UCharIterator implementations may not be able to return jpayne@69: * a valid state for each position, in which case they return UITER_NO_STATE instead. jpayne@69: * This will be clearly documented for each such iterator (none of the public ones here). jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @return the state word jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @see UCharIteratorGetState jpayne@69: * @see UITER_NO_STATE jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: U_STABLE uint32_t U_EXPORT2 jpayne@69: uiter_getState(const UCharIterator *iter); jpayne@69: jpayne@69: /** jpayne@69: * Restore the "state" of the iterator using a state word from a getState() call. jpayne@69: * This is a convenience function that calls iter->setState(iter, state, pErrorCode) jpayne@69: * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. jpayne@69: * jpayne@69: * @param iter the UCharIterator structure ("this pointer") jpayne@69: * @param state the state word from a getState() call jpayne@69: * on a same-type, same-string iterator jpayne@69: * @param pErrorCode Must be a valid pointer to an error code value, jpayne@69: * which must not indicate a failure before the function call. jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @see UCharIteratorSetState jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 jpayne@69: uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); jpayne@69: jpayne@69: /** jpayne@69: * Set up a UCharIterator to iterate over a string. jpayne@69: * jpayne@69: * Sets the UCharIterator function pointers for iteration over the string s jpayne@69: * with iteration boundaries start=index=0 and length=limit=string length. jpayne@69: * The "provider" may set the start, index, and limit values at any time jpayne@69: * within the range 0..length. jpayne@69: * The length field will be ignored. jpayne@69: * jpayne@69: * The string pointer s is set into UCharIterator.context without copying jpayne@69: * or reallocating the string contents. jpayne@69: * jpayne@69: * getState() simply returns the current index. jpayne@69: * move() will always return the final index. jpayne@69: * jpayne@69: * @param iter UCharIterator structure to be set for iteration jpayne@69: * @param s String to iterate over jpayne@69: * @param length Length of s, or -1 if NUL-terminated jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 jpayne@69: uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); jpayne@69: jpayne@69: /** jpayne@69: * Set up a UCharIterator to iterate over a UTF-16BE string jpayne@69: * (byte vector with a big-endian pair of bytes per UChar). jpayne@69: * jpayne@69: * Everything works just like with a normal UChar iterator (uiter_setString), jpayne@69: * except that UChars are assembled from byte pairs, jpayne@69: * and that the length argument here indicates an even number of bytes. jpayne@69: * jpayne@69: * getState() simply returns the current index. jpayne@69: * move() will always return the final index. jpayne@69: * jpayne@69: * @param iter UCharIterator structure to be set for iteration jpayne@69: * @param s UTF-16BE string to iterate over jpayne@69: * @param length Length of s as an even number of bytes, or -1 if NUL-terminated jpayne@69: * (NUL means pair of 0 bytes at even index from s) jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @see uiter_setString jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 jpayne@69: uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); jpayne@69: jpayne@69: /** jpayne@69: * Set up a UCharIterator to iterate over a UTF-8 string. jpayne@69: * jpayne@69: * Sets the UCharIterator function pointers for iteration over the UTF-8 string s jpayne@69: * with UTF-8 iteration boundaries 0 and length. jpayne@69: * The implementation counts the UTF-16 index on the fly and jpayne@69: * lazily evaluates the UTF-16 length of the text. jpayne@69: * jpayne@69: * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. jpayne@69: * When the reservedField is not 0, then it contains a supplementary code point jpayne@69: * and the UTF-16 index is between the two corresponding surrogates. jpayne@69: * At that point, the UTF-8 index is behind that code point. jpayne@69: * jpayne@69: * The UTF-8 string pointer s is set into UCharIterator.context without copying jpayne@69: * or reallocating the string contents. jpayne@69: * jpayne@69: * getState() returns a state value consisting of jpayne@69: * - the current UTF-8 source byte index (bits 31..1) jpayne@69: * - a flag (bit 0) that indicates whether the UChar position is in the middle jpayne@69: * of a surrogate pair jpayne@69: * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) jpayne@69: * jpayne@69: * getState() cannot also encode the UTF-16 index in the state value. jpayne@69: * move(relative to limit or length), or jpayne@69: * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. jpayne@69: * jpayne@69: * @param iter UCharIterator structure to be set for iteration jpayne@69: * @param s UTF-8 string to iterate over jpayne@69: * @param length Length of s in bytes, or -1 if NUL-terminated jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.6 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 jpayne@69: uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); jpayne@69: jpayne@69: #if U_SHOW_CPLUSPLUS_API jpayne@69: jpayne@69: /** jpayne@69: * Set up a UCharIterator to wrap around a C++ CharacterIterator. jpayne@69: * jpayne@69: * Sets the UCharIterator function pointers for iteration using the jpayne@69: * CharacterIterator charIter. jpayne@69: * jpayne@69: * The CharacterIterator pointer charIter is set into UCharIterator.context jpayne@69: * without copying or cloning the CharacterIterator object. jpayne@69: * The other "protected" UCharIterator fields are set to 0 and will be ignored. jpayne@69: * The iteration index and boundaries are controlled by the CharacterIterator. jpayne@69: * jpayne@69: * getState() simply returns the current index. jpayne@69: * move() will always return the final index. jpayne@69: * jpayne@69: * @param iter UCharIterator structure to be set for iteration jpayne@69: * @param charIter CharacterIterator to wrap jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 jpayne@69: uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); jpayne@69: jpayne@69: /** jpayne@69: * Set up a UCharIterator to iterate over a C++ Replaceable. jpayne@69: * jpayne@69: * Sets the UCharIterator function pointers for iteration over the jpayne@69: * Replaceable rep with iteration boundaries start=index=0 and jpayne@69: * length=limit=rep->length(). jpayne@69: * The "provider" may set the start, index, and limit values at any time jpayne@69: * within the range 0..length=rep->length(). jpayne@69: * The length field will be ignored. jpayne@69: * jpayne@69: * The Replaceable pointer rep is set into UCharIterator.context without copying jpayne@69: * or cloning/reallocating the Replaceable object. jpayne@69: * jpayne@69: * getState() simply returns the current index. jpayne@69: * move() will always return the final index. jpayne@69: * jpayne@69: * @param iter UCharIterator structure to be set for iteration jpayne@69: * @param rep Replaceable to iterate over jpayne@69: * jpayne@69: * @see UCharIterator jpayne@69: * @stable ICU 2.1 jpayne@69: */ jpayne@69: U_STABLE void U_EXPORT2 jpayne@69: uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); jpayne@69: jpayne@69: #endif jpayne@69: jpayne@69: U_CDECL_END jpayne@69: jpayne@69: #endif