Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/uiter.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 // © 2016 and later: Unicode, Inc. and others. | |
2 // License & terms of use: http://www.unicode.org/copyright.html | |
3 /* | |
4 ******************************************************************************* | |
5 * | |
6 * Copyright (C) 2002-2011 International Business Machines | |
7 * Corporation and others. All Rights Reserved. | |
8 * | |
9 ******************************************************************************* | |
10 * file name: uiter.h | |
11 * encoding: UTF-8 | |
12 * tab size: 8 (not used) | |
13 * indentation:4 | |
14 * | |
15 * created on: 2002jan18 | |
16 * created by: Markus W. Scherer | |
17 */ | |
18 | |
19 #ifndef __UITER_H__ | |
20 #define __UITER_H__ | |
21 | |
22 /** | |
23 * \file | |
24 * \brief C API: Unicode Character Iteration | |
25 * | |
26 * @see UCharIterator | |
27 */ | |
28 | |
29 #include "unicode/utypes.h" | |
30 | |
31 #if U_SHOW_CPLUSPLUS_API | |
32 U_NAMESPACE_BEGIN | |
33 | |
34 class CharacterIterator; | |
35 class Replaceable; | |
36 | |
37 U_NAMESPACE_END | |
38 #endif | |
39 | |
40 U_CDECL_BEGIN | |
41 | |
42 struct UCharIterator; | |
43 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ | |
44 | |
45 /** | |
46 * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). | |
47 * @see UCharIteratorMove | |
48 * @see UCharIterator | |
49 * @stable ICU 2.1 | |
50 */ | |
51 typedef enum UCharIteratorOrigin { | |
52 UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH | |
53 } UCharIteratorOrigin; | |
54 | |
55 /** Constants for UCharIterator. @stable ICU 2.6 */ | |
56 enum { | |
57 /** | |
58 * Constant value that may be returned by UCharIteratorMove | |
59 * indicating that the final UTF-16 index is not known, but that the move succeeded. | |
60 * This can occur when moving relative to limit or length, or | |
61 * when moving relative to the current index after a setState() | |
62 * when the current UTF-16 index is not known. | |
63 * | |
64 * It would be very inefficient to have to count from the beginning of the text | |
65 * just to get the current/limit/length index after moving relative to it. | |
66 * The actual index can be determined with getIndex(UITER_CURRENT) | |
67 * which will count the UChars if necessary. | |
68 * | |
69 * @stable ICU 2.6 | |
70 */ | |
71 UITER_UNKNOWN_INDEX=-2 | |
72 }; | |
73 | |
74 | |
75 /** | |
76 * Constant for UCharIterator getState() indicating an error or | |
77 * an unknown state. | |
78 * Returned by uiter_getState()/UCharIteratorGetState | |
79 * when an error occurs. | |
80 * Also, some UCharIterator implementations may not be able to return | |
81 * a valid state for each position. This will be clearly documented | |
82 * for each such iterator (none of the public ones here). | |
83 * | |
84 * @stable ICU 2.6 | |
85 */ | |
86 #define UITER_NO_STATE ((uint32_t)0xffffffff) | |
87 | |
88 /** | |
89 * Function type declaration for UCharIterator.getIndex(). | |
90 * | |
91 * Gets the current position, or the start or limit of the | |
92 * iteration range. | |
93 * | |
94 * This function may perform slowly for UITER_CURRENT after setState() was called, | |
95 * or for UITER_LENGTH, because an iterator implementation may have to count | |
96 * UChars if the underlying storage is not UTF-16. | |
97 * | |
98 * @param iter the UCharIterator structure ("this pointer") | |
99 * @param origin get the 0, start, limit, length, or current index | |
100 * @return the requested index, or U_SENTINEL in an error condition | |
101 * | |
102 * @see UCharIteratorOrigin | |
103 * @see UCharIterator | |
104 * @stable ICU 2.1 | |
105 */ | |
106 typedef int32_t U_CALLCONV | |
107 UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); | |
108 | |
109 /** | |
110 * Function type declaration for UCharIterator.move(). | |
111 * | |
112 * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). | |
113 * | |
114 * Moves the current position relative to the start or limit of the | |
115 * iteration range, or relative to the current position itself. | |
116 * The movement is expressed in numbers of code units forward | |
117 * or backward by specifying a positive or negative delta. | |
118 * Out of bounds movement will be pinned to the start or limit. | |
119 * | |
120 * This function may perform slowly for moving relative to UITER_LENGTH | |
121 * because an iterator implementation may have to count the rest of the | |
122 * UChars if the native storage is not UTF-16. | |
123 * | |
124 * When moving relative to the limit or length, or | |
125 * relative to the current position after setState() was called, | |
126 * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient | |
127 * determination of the actual UTF-16 index. | |
128 * The actual index can be determined with getIndex(UITER_CURRENT) | |
129 * which will count the UChars if necessary. | |
130 * See UITER_UNKNOWN_INDEX for details. | |
131 * | |
132 * @param iter the UCharIterator structure ("this pointer") | |
133 * @param delta can be positive, zero, or negative | |
134 * @param origin move relative to the 0, start, limit, length, or current index | |
135 * @return the new index, or U_SENTINEL on an error condition, | |
136 * or UITER_UNKNOWN_INDEX when the index is not known. | |
137 * | |
138 * @see UCharIteratorOrigin | |
139 * @see UCharIterator | |
140 * @see UITER_UNKNOWN_INDEX | |
141 * @stable ICU 2.1 | |
142 */ | |
143 typedef int32_t U_CALLCONV | |
144 UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); | |
145 | |
146 /** | |
147 * Function type declaration for UCharIterator.hasNext(). | |
148 * | |
149 * Check if current() and next() can still | |
150 * return another code unit. | |
151 * | |
152 * @param iter the UCharIterator structure ("this pointer") | |
153 * @return boolean value for whether current() and next() can still return another code unit | |
154 * | |
155 * @see UCharIterator | |
156 * @stable ICU 2.1 | |
157 */ | |
158 typedef UBool U_CALLCONV | |
159 UCharIteratorHasNext(UCharIterator *iter); | |
160 | |
161 /** | |
162 * Function type declaration for UCharIterator.hasPrevious(). | |
163 * | |
164 * Check if previous() can still return another code unit. | |
165 * | |
166 * @param iter the UCharIterator structure ("this pointer") | |
167 * @return boolean value for whether previous() can still return another code unit | |
168 * | |
169 * @see UCharIterator | |
170 * @stable ICU 2.1 | |
171 */ | |
172 typedef UBool U_CALLCONV | |
173 UCharIteratorHasPrevious(UCharIterator *iter); | |
174 | |
175 /** | |
176 * Function type declaration for UCharIterator.current(). | |
177 * | |
178 * Return the code unit at the current position, | |
179 * or U_SENTINEL if there is none (index is at the limit). | |
180 * | |
181 * @param iter the UCharIterator structure ("this pointer") | |
182 * @return the current code unit | |
183 * | |
184 * @see UCharIterator | |
185 * @stable ICU 2.1 | |
186 */ | |
187 typedef UChar32 U_CALLCONV | |
188 UCharIteratorCurrent(UCharIterator *iter); | |
189 | |
190 /** | |
191 * Function type declaration for UCharIterator.next(). | |
192 * | |
193 * Return the code unit at the current index and increment | |
194 * the index (post-increment, like s[i++]), | |
195 * or return U_SENTINEL if there is none (index is at the limit). | |
196 * | |
197 * @param iter the UCharIterator structure ("this pointer") | |
198 * @return the current code unit (and post-increment the current index) | |
199 * | |
200 * @see UCharIterator | |
201 * @stable ICU 2.1 | |
202 */ | |
203 typedef UChar32 U_CALLCONV | |
204 UCharIteratorNext(UCharIterator *iter); | |
205 | |
206 /** | |
207 * Function type declaration for UCharIterator.previous(). | |
208 * | |
209 * Decrement the index and return the code unit from there | |
210 * (pre-decrement, like s[--i]), | |
211 * or return U_SENTINEL if there is none (index is at the start). | |
212 * | |
213 * @param iter the UCharIterator structure ("this pointer") | |
214 * @return the previous code unit (after pre-decrementing the current index) | |
215 * | |
216 * @see UCharIterator | |
217 * @stable ICU 2.1 | |
218 */ | |
219 typedef UChar32 U_CALLCONV | |
220 UCharIteratorPrevious(UCharIterator *iter); | |
221 | |
222 /** | |
223 * Function type declaration for UCharIterator.reservedFn(). | |
224 * Reserved for future use. | |
225 * | |
226 * @param iter the UCharIterator structure ("this pointer") | |
227 * @param something some integer argument | |
228 * @return some integer | |
229 * | |
230 * @see UCharIterator | |
231 * @stable ICU 2.1 | |
232 */ | |
233 typedef int32_t U_CALLCONV | |
234 UCharIteratorReserved(UCharIterator *iter, int32_t something); | |
235 | |
236 /** | |
237 * Function type declaration for UCharIterator.getState(). | |
238 * | |
239 * Get the "state" of the iterator in the form of a single 32-bit word. | |
240 * It is recommended that the state value be calculated to be as small as | |
241 * is feasible. For strings with limited lengths, fewer than 32 bits may | |
242 * be sufficient. | |
243 * | |
244 * This is used together with setState()/UCharIteratorSetState | |
245 * to save and restore the iterator position more efficiently than with | |
246 * getIndex()/move(). | |
247 * | |
248 * The iterator state is defined as a uint32_t value because it is designed | |
249 * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state | |
250 * of the character iterator. | |
251 * | |
252 * With some UCharIterator implementations (e.g., UTF-8), | |
253 * getting and setting the UTF-16 index with existing functions | |
254 * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but | |
255 * relatively slow because the iterator has to "walk" from a known index | |
256 * to the requested one. | |
257 * This takes more time the farther it needs to go. | |
258 * | |
259 * An opaque state value allows an iterator implementation to provide | |
260 * an internal index (UTF-8: the source byte array index) for | |
261 * fast, constant-time restoration. | |
262 * | |
263 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | |
264 * the UTF-16 index may not be restored as well, but the iterator can deliver | |
265 * the correct text contents and move relative to the current position | |
266 * without performance degradation. | |
267 * | |
268 * Some UCharIterator implementations may not be able to return | |
269 * a valid state for each position, in which case they return UITER_NO_STATE instead. | |
270 * This will be clearly documented for each such iterator (none of the public ones here). | |
271 * | |
272 * @param iter the UCharIterator structure ("this pointer") | |
273 * @return the state word | |
274 * | |
275 * @see UCharIterator | |
276 * @see UCharIteratorSetState | |
277 * @see UITER_NO_STATE | |
278 * @stable ICU 2.6 | |
279 */ | |
280 typedef uint32_t U_CALLCONV | |
281 UCharIteratorGetState(const UCharIterator *iter); | |
282 | |
283 /** | |
284 * Function type declaration for UCharIterator.setState(). | |
285 * | |
286 * Restore the "state" of the iterator using a state word from a getState() call. | |
287 * The iterator object need not be the same one as for which getState() was called, | |
288 * but it must be of the same type (set up using the same uiter_setXYZ function) | |
289 * and it must iterate over the same string | |
290 * (binary identical regardless of memory address). | |
291 * For more about the state word see UCharIteratorGetState. | |
292 * | |
293 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | |
294 * the UTF-16 index may not be restored as well, but the iterator can deliver | |
295 * the correct text contents and move relative to the current position | |
296 * without performance degradation. | |
297 * | |
298 * @param iter the UCharIterator structure ("this pointer") | |
299 * @param state the state word from a getState() call | |
300 * on a same-type, same-string iterator | |
301 * @param pErrorCode Must be a valid pointer to an error code value, | |
302 * which must not indicate a failure before the function call. | |
303 * | |
304 * @see UCharIterator | |
305 * @see UCharIteratorGetState | |
306 * @stable ICU 2.6 | |
307 */ | |
308 typedef void U_CALLCONV | |
309 UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | |
310 | |
311 | |
312 /** | |
313 * C API for code unit iteration. | |
314 * This can be used as a C wrapper around | |
315 * CharacterIterator, Replaceable, or implemented using simple strings, etc. | |
316 * | |
317 * There are two roles for using UCharIterator: | |
318 * | |
319 * A "provider" sets the necessary function pointers and controls the "protected" | |
320 * fields of the UCharIterator structure. A "provider" passes a UCharIterator | |
321 * into C APIs that need a UCharIterator as an abstract, flexible string interface. | |
322 * | |
323 * Implementations of such C APIs are "callers" of UCharIterator functions; | |
324 * they only use the "public" function pointers and never access the "protected" | |
325 * fields directly. | |
326 * | |
327 * The current() and next() functions only check the current index against the | |
328 * limit, and previous() only checks the current index against the start, | |
329 * to see if the iterator already reached the end of the iteration range. | |
330 * | |
331 * The assumption - in all iterators - is that the index is moved via the API, | |
332 * which means it won't go out of bounds, or the index is modified by | |
333 * user code that knows enough about the iterator implementation to set valid | |
334 * index values. | |
335 * | |
336 * UCharIterator functions return code unit values 0..0xffff, | |
337 * or U_SENTINEL if the iteration bounds are reached. | |
338 * | |
339 * @stable ICU 2.1 | |
340 */ | |
341 struct UCharIterator { | |
342 /** | |
343 * (protected) Pointer to string or wrapped object or similar. | |
344 * Not used by caller. | |
345 * @stable ICU 2.1 | |
346 */ | |
347 const void *context; | |
348 | |
349 /** | |
350 * (protected) Length of string or similar. | |
351 * Not used by caller. | |
352 * @stable ICU 2.1 | |
353 */ | |
354 int32_t length; | |
355 | |
356 /** | |
357 * (protected) Start index or similar. | |
358 * Not used by caller. | |
359 * @stable ICU 2.1 | |
360 */ | |
361 int32_t start; | |
362 | |
363 /** | |
364 * (protected) Current index or similar. | |
365 * Not used by caller. | |
366 * @stable ICU 2.1 | |
367 */ | |
368 int32_t index; | |
369 | |
370 /** | |
371 * (protected) Limit index or similar. | |
372 * Not used by caller. | |
373 * @stable ICU 2.1 | |
374 */ | |
375 int32_t limit; | |
376 | |
377 /** | |
378 * (protected) Used by UTF-8 iterators and possibly others. | |
379 * @stable ICU 2.1 | |
380 */ | |
381 int32_t reservedField; | |
382 | |
383 /** | |
384 * (public) Returns the current position or the | |
385 * start or limit index of the iteration range. | |
386 * | |
387 * @see UCharIteratorGetIndex | |
388 * @stable ICU 2.1 | |
389 */ | |
390 UCharIteratorGetIndex *getIndex; | |
391 | |
392 /** | |
393 * (public) Moves the current position relative to the start or limit of the | |
394 * iteration range, or relative to the current position itself. | |
395 * The movement is expressed in numbers of code units forward | |
396 * or backward by specifying a positive or negative delta. | |
397 * | |
398 * @see UCharIteratorMove | |
399 * @stable ICU 2.1 | |
400 */ | |
401 UCharIteratorMove *move; | |
402 | |
403 /** | |
404 * (public) Check if current() and next() can still | |
405 * return another code unit. | |
406 * | |
407 * @see UCharIteratorHasNext | |
408 * @stable ICU 2.1 | |
409 */ | |
410 UCharIteratorHasNext *hasNext; | |
411 | |
412 /** | |
413 * (public) Check if previous() can still return another code unit. | |
414 * | |
415 * @see UCharIteratorHasPrevious | |
416 * @stable ICU 2.1 | |
417 */ | |
418 UCharIteratorHasPrevious *hasPrevious; | |
419 | |
420 /** | |
421 * (public) Return the code unit at the current position, | |
422 * or U_SENTINEL if there is none (index is at the limit). | |
423 * | |
424 * @see UCharIteratorCurrent | |
425 * @stable ICU 2.1 | |
426 */ | |
427 UCharIteratorCurrent *current; | |
428 | |
429 /** | |
430 * (public) Return the code unit at the current index and increment | |
431 * the index (post-increment, like s[i++]), | |
432 * or return U_SENTINEL if there is none (index is at the limit). | |
433 * | |
434 * @see UCharIteratorNext | |
435 * @stable ICU 2.1 | |
436 */ | |
437 UCharIteratorNext *next; | |
438 | |
439 /** | |
440 * (public) Decrement the index and return the code unit from there | |
441 * (pre-decrement, like s[--i]), | |
442 * or return U_SENTINEL if there is none (index is at the start). | |
443 * | |
444 * @see UCharIteratorPrevious | |
445 * @stable ICU 2.1 | |
446 */ | |
447 UCharIteratorPrevious *previous; | |
448 | |
449 /** | |
450 * (public) Reserved for future use. Currently NULL. | |
451 * | |
452 * @see UCharIteratorReserved | |
453 * @stable ICU 2.1 | |
454 */ | |
455 UCharIteratorReserved *reservedFn; | |
456 | |
457 /** | |
458 * (public) Return the state of the iterator, to be restored later with setState(). | |
459 * This function pointer is NULL if the iterator does not implement it. | |
460 * | |
461 * @see UCharIteratorGet | |
462 * @stable ICU 2.6 | |
463 */ | |
464 UCharIteratorGetState *getState; | |
465 | |
466 /** | |
467 * (public) Restore the iterator state from the state word from a call | |
468 * to getState(). | |
469 * This function pointer is NULL if the iterator does not implement it. | |
470 * | |
471 * @see UCharIteratorSet | |
472 * @stable ICU 2.6 | |
473 */ | |
474 UCharIteratorSetState *setState; | |
475 }; | |
476 | |
477 /** | |
478 * Helper function for UCharIterator to get the code point | |
479 * at the current index. | |
480 * | |
481 * Return the code point that includes the code unit at the current position, | |
482 * or U_SENTINEL if there is none (index is at the limit). | |
483 * If the current code unit is a lead or trail surrogate, | |
484 * then the following or preceding surrogate is used to form | |
485 * the code point value. | |
486 * | |
487 * @param iter the UCharIterator structure ("this pointer") | |
488 * @return the current code point | |
489 * | |
490 * @see UCharIterator | |
491 * @see U16_GET | |
492 * @see UnicodeString::char32At() | |
493 * @stable ICU 2.1 | |
494 */ | |
495 U_STABLE UChar32 U_EXPORT2 | |
496 uiter_current32(UCharIterator *iter); | |
497 | |
498 /** | |
499 * Helper function for UCharIterator to get the next code point. | |
500 * | |
501 * Return the code point at the current index and increment | |
502 * the index (post-increment, like s[i++]), | |
503 * or return U_SENTINEL if there is none (index is at the limit). | |
504 * | |
505 * @param iter the UCharIterator structure ("this pointer") | |
506 * @return the current code point (and post-increment the current index) | |
507 * | |
508 * @see UCharIterator | |
509 * @see U16_NEXT | |
510 * @stable ICU 2.1 | |
511 */ | |
512 U_STABLE UChar32 U_EXPORT2 | |
513 uiter_next32(UCharIterator *iter); | |
514 | |
515 /** | |
516 * Helper function for UCharIterator to get the previous code point. | |
517 * | |
518 * Decrement the index and return the code point from there | |
519 * (pre-decrement, like s[--i]), | |
520 * or return U_SENTINEL if there is none (index is at the start). | |
521 * | |
522 * @param iter the UCharIterator structure ("this pointer") | |
523 * @return the previous code point (after pre-decrementing the current index) | |
524 * | |
525 * @see UCharIterator | |
526 * @see U16_PREV | |
527 * @stable ICU 2.1 | |
528 */ | |
529 U_STABLE UChar32 U_EXPORT2 | |
530 uiter_previous32(UCharIterator *iter); | |
531 | |
532 /** | |
533 * Get the "state" of the iterator in the form of a single 32-bit word. | |
534 * This is a convenience function that calls iter->getState(iter) | |
535 * if iter->getState is not NULL; | |
536 * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. | |
537 * | |
538 * Some UCharIterator implementations may not be able to return | |
539 * a valid state for each position, in which case they return UITER_NO_STATE instead. | |
540 * This will be clearly documented for each such iterator (none of the public ones here). | |
541 * | |
542 * @param iter the UCharIterator structure ("this pointer") | |
543 * @return the state word | |
544 * | |
545 * @see UCharIterator | |
546 * @see UCharIteratorGetState | |
547 * @see UITER_NO_STATE | |
548 * @stable ICU 2.6 | |
549 */ | |
550 U_STABLE uint32_t U_EXPORT2 | |
551 uiter_getState(const UCharIterator *iter); | |
552 | |
553 /** | |
554 * Restore the "state" of the iterator using a state word from a getState() call. | |
555 * This is a convenience function that calls iter->setState(iter, state, pErrorCode) | |
556 * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. | |
557 * | |
558 * @param iter the UCharIterator structure ("this pointer") | |
559 * @param state the state word from a getState() call | |
560 * on a same-type, same-string iterator | |
561 * @param pErrorCode Must be a valid pointer to an error code value, | |
562 * which must not indicate a failure before the function call. | |
563 * | |
564 * @see UCharIterator | |
565 * @see UCharIteratorSetState | |
566 * @stable ICU 2.6 | |
567 */ | |
568 U_STABLE void U_EXPORT2 | |
569 uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | |
570 | |
571 /** | |
572 * Set up a UCharIterator to iterate over a string. | |
573 * | |
574 * Sets the UCharIterator function pointers for iteration over the string s | |
575 * with iteration boundaries start=index=0 and length=limit=string length. | |
576 * The "provider" may set the start, index, and limit values at any time | |
577 * within the range 0..length. | |
578 * The length field will be ignored. | |
579 * | |
580 * The string pointer s is set into UCharIterator.context without copying | |
581 * or reallocating the string contents. | |
582 * | |
583 * getState() simply returns the current index. | |
584 * move() will always return the final index. | |
585 * | |
586 * @param iter UCharIterator structure to be set for iteration | |
587 * @param s String to iterate over | |
588 * @param length Length of s, or -1 if NUL-terminated | |
589 * | |
590 * @see UCharIterator | |
591 * @stable ICU 2.1 | |
592 */ | |
593 U_STABLE void U_EXPORT2 | |
594 uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); | |
595 | |
596 /** | |
597 * Set up a UCharIterator to iterate over a UTF-16BE string | |
598 * (byte vector with a big-endian pair of bytes per UChar). | |
599 * | |
600 * Everything works just like with a normal UChar iterator (uiter_setString), | |
601 * except that UChars are assembled from byte pairs, | |
602 * and that the length argument here indicates an even number of bytes. | |
603 * | |
604 * getState() simply returns the current index. | |
605 * move() will always return the final index. | |
606 * | |
607 * @param iter UCharIterator structure to be set for iteration | |
608 * @param s UTF-16BE string to iterate over | |
609 * @param length Length of s as an even number of bytes, or -1 if NUL-terminated | |
610 * (NUL means pair of 0 bytes at even index from s) | |
611 * | |
612 * @see UCharIterator | |
613 * @see uiter_setString | |
614 * @stable ICU 2.6 | |
615 */ | |
616 U_STABLE void U_EXPORT2 | |
617 uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); | |
618 | |
619 /** | |
620 * Set up a UCharIterator to iterate over a UTF-8 string. | |
621 * | |
622 * Sets the UCharIterator function pointers for iteration over the UTF-8 string s | |
623 * with UTF-8 iteration boundaries 0 and length. | |
624 * The implementation counts the UTF-16 index on the fly and | |
625 * lazily evaluates the UTF-16 length of the text. | |
626 * | |
627 * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. | |
628 * When the reservedField is not 0, then it contains a supplementary code point | |
629 * and the UTF-16 index is between the two corresponding surrogates. | |
630 * At that point, the UTF-8 index is behind that code point. | |
631 * | |
632 * The UTF-8 string pointer s is set into UCharIterator.context without copying | |
633 * or reallocating the string contents. | |
634 * | |
635 * getState() returns a state value consisting of | |
636 * - the current UTF-8 source byte index (bits 31..1) | |
637 * - a flag (bit 0) that indicates whether the UChar position is in the middle | |
638 * of a surrogate pair | |
639 * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) | |
640 * | |
641 * getState() cannot also encode the UTF-16 index in the state value. | |
642 * move(relative to limit or length), or | |
643 * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. | |
644 * | |
645 * @param iter UCharIterator structure to be set for iteration | |
646 * @param s UTF-8 string to iterate over | |
647 * @param length Length of s in bytes, or -1 if NUL-terminated | |
648 * | |
649 * @see UCharIterator | |
650 * @stable ICU 2.6 | |
651 */ | |
652 U_STABLE void U_EXPORT2 | |
653 uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); | |
654 | |
655 #if U_SHOW_CPLUSPLUS_API | |
656 | |
657 /** | |
658 * Set up a UCharIterator to wrap around a C++ CharacterIterator. | |
659 * | |
660 * Sets the UCharIterator function pointers for iteration using the | |
661 * CharacterIterator charIter. | |
662 * | |
663 * The CharacterIterator pointer charIter is set into UCharIterator.context | |
664 * without copying or cloning the CharacterIterator object. | |
665 * The other "protected" UCharIterator fields are set to 0 and will be ignored. | |
666 * The iteration index and boundaries are controlled by the CharacterIterator. | |
667 * | |
668 * getState() simply returns the current index. | |
669 * move() will always return the final index. | |
670 * | |
671 * @param iter UCharIterator structure to be set for iteration | |
672 * @param charIter CharacterIterator to wrap | |
673 * | |
674 * @see UCharIterator | |
675 * @stable ICU 2.1 | |
676 */ | |
677 U_STABLE void U_EXPORT2 | |
678 uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); | |
679 | |
680 /** | |
681 * Set up a UCharIterator to iterate over a C++ Replaceable. | |
682 * | |
683 * Sets the UCharIterator function pointers for iteration over the | |
684 * Replaceable rep with iteration boundaries start=index=0 and | |
685 * length=limit=rep->length(). | |
686 * The "provider" may set the start, index, and limit values at any time | |
687 * within the range 0..length=rep->length(). | |
688 * The length field will be ignored. | |
689 * | |
690 * The Replaceable pointer rep is set into UCharIterator.context without copying | |
691 * or cloning/reallocating the Replaceable object. | |
692 * | |
693 * getState() simply returns the current index. | |
694 * move() will always return the final index. | |
695 * | |
696 * @param iter UCharIterator structure to be set for iteration | |
697 * @param rep Replaceable to iterate over | |
698 * | |
699 * @see UCharIterator | |
700 * @stable ICU 2.1 | |
701 */ | |
702 U_STABLE void U_EXPORT2 | |
703 uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); | |
704 | |
705 #endif | |
706 | |
707 U_CDECL_END | |
708 | |
709 #endif |