Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/utext.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 // © 2016 and later: Unicode, Inc. and others. | |
2 // License & terms of use: http://www.unicode.org/copyright.html | |
3 /* | |
4 ******************************************************************************* | |
5 * | |
6 * Copyright (C) 2004-2012, International Business Machines | |
7 * Corporation and others. All Rights Reserved. | |
8 * | |
9 ******************************************************************************* | |
10 * file name: utext.h | |
11 * encoding: UTF-8 | |
12 * tab size: 8 (not used) | |
13 * indentation:4 | |
14 * | |
15 * created on: 2004oct06 | |
16 * created by: Markus W. Scherer | |
17 */ | |
18 | |
19 #ifndef __UTEXT_H__ | |
20 #define __UTEXT_H__ | |
21 | |
22 /** | |
23 * \file | |
24 * \brief C API: Abstract Unicode Text API | |
25 * | |
26 * The Text Access API provides a means to allow text that is stored in alternative | |
27 * formats to work with ICU services. ICU normally operates on text that is | |
28 * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type | |
29 * UnicodeString for C++ APIs. | |
30 * | |
31 * ICU Text Access allows other formats, such as UTF-8 or non-contiguous | |
32 * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. | |
33 * | |
34 * There are three general classes of usage for UText: | |
35 * | |
36 * Application Level Use. This is the simplest usage - applications would | |
37 * use one of the utext_open() functions on their input text, and pass | |
38 * the resulting UText to the desired ICU service. | |
39 * | |
40 * Second is usage in ICU Services, such as break iteration, that will need to | |
41 * operate on input presented to them as a UText. These implementations | |
42 * will need to use the iteration and related UText functions to gain | |
43 * access to the actual text. | |
44 * | |
45 * The third class of UText users are "text providers." These are the | |
46 * UText implementations for the various text storage formats. An application | |
47 * or system with a unique text storage format can implement a set of | |
48 * UText provider functions for that format, which will then allow | |
49 * ICU services to operate on that format. | |
50 * | |
51 * | |
52 * <em>Iterating over text</em> | |
53 * | |
54 * Here is sample code for a forward iteration over the contents of a UText | |
55 * | |
56 * \code | |
57 * UChar32 c; | |
58 * UText *ut = whatever(); | |
59 * | |
60 * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { | |
61 * // do whatever with the codepoint c here. | |
62 * } | |
63 * \endcode | |
64 * | |
65 * And here is similar code to iterate in the reverse direction, from the end | |
66 * of the text towards the beginning. | |
67 * | |
68 * \code | |
69 * UChar32 c; | |
70 * UText *ut = whatever(); | |
71 * int textLength = utext_nativeLength(ut); | |
72 * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { | |
73 * // do whatever with the codepoint c here. | |
74 * } | |
75 * \endcode | |
76 * | |
77 * <em>Characters and Indexing</em> | |
78 * | |
79 * Indexing into text by UText functions is nearly always in terms of the native | |
80 * indexing of the underlying text storage. The storage format could be UTF-8 | |
81 * or UTF-32, for example. When coding to the UText access API, no assumptions | |
82 * can be made regarding the size of characters, or how far an index | |
83 * may move when iterating between characters. | |
84 * | |
85 * All indices supplied to UText functions are pinned to the length of the | |
86 * text. An out-of-bounds index is not considered to be an error, but is | |
87 * adjusted to be in the range 0 <= index <= length of input text. | |
88 * | |
89 * | |
90 * When an index position is returned from a UText function, it will be | |
91 * a native index to the underlying text. In the case of multi-unit characters, | |
92 * it will always refer to the first position of the character, | |
93 * never to the interior. This is essentially the same thing as saying that | |
94 * a returned index will always point to a boundary between characters. | |
95 * | |
96 * When a native index is supplied to a UText function, all indices that | |
97 * refer to any part of a multi-unit character representation are considered | |
98 * to be equivalent. In the case of multi-unit characters, an incoming index | |
99 * will be logically normalized to refer to the start of the character. | |
100 * | |
101 * It is possible to test whether a native index is on a code point boundary | |
102 * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). | |
103 * If the index is returned unchanged, it was on a code point boundary. If | |
104 * an adjusted index is returned, the original index referred to the | |
105 * interior of a character. | |
106 * | |
107 * <em>Conventions for calling UText functions</em> | |
108 * | |
109 * Most UText access functions have as their first parameter a (UText *) pointer, | |
110 * which specifies the UText to be used. Unless otherwise noted, the | |
111 * pointer must refer to a valid, open UText. Attempting to | |
112 * use a closed UText or passing a NULL pointer is a programming error and | |
113 * will produce undefined results or NULL pointer exceptions. | |
114 * | |
115 * The UText_Open family of functions can either open an existing (closed) | |
116 * UText, or heap allocate a new UText. Here is sample code for creating | |
117 * a stack-allocated UText. | |
118 * | |
119 * \code | |
120 * char *s = whatever(); // A utf-8 string | |
121 * U_ErrorCode status = U_ZERO_ERROR; | |
122 * UText ut = UTEXT_INITIALIZER; | |
123 * utext_openUTF8(ut, s, -1, &status); | |
124 * if (U_FAILURE(status)) { | |
125 * // error handling | |
126 * } else { | |
127 * // work with the UText | |
128 * } | |
129 * \endcode | |
130 * | |
131 * Any existing UText passed to an open function _must_ have been initialized, | |
132 * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated | |
133 * by an open function. Passing NULL will cause the open function to | |
134 * heap-allocate and fully initialize a new UText. | |
135 * | |
136 */ | |
137 | |
138 | |
139 | |
140 #include "unicode/utypes.h" | |
141 #include "unicode/uchar.h" | |
142 #if U_SHOW_CPLUSPLUS_API | |
143 #include "unicode/localpointer.h" | |
144 #include "unicode/rep.h" | |
145 #include "unicode/unistr.h" | |
146 #include "unicode/chariter.h" | |
147 #endif | |
148 | |
149 | |
150 U_CDECL_BEGIN | |
151 | |
152 struct UText; | |
153 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ | |
154 | |
155 | |
156 /*************************************************************************************** | |
157 * | |
158 * C Functions for creating UText wrappers around various kinds of text strings. | |
159 * | |
160 ****************************************************************************************/ | |
161 | |
162 | |
163 /** | |
164 * Close function for UText instances. | |
165 * Cleans up, releases any resources being held by an open UText. | |
166 * <p> | |
167 * If the UText was originally allocated by one of the utext_open functions, | |
168 * the storage associated with the utext will also be freed. | |
169 * If the UText storage originated with the application, as it would with | |
170 * a local or static instance, the storage will not be deleted. | |
171 * | |
172 * An open UText can be reset to refer to new string by using one of the utext_open() | |
173 * functions without first closing the UText. | |
174 * | |
175 * @param ut The UText to be closed. | |
176 * @return NULL if the UText struct was deleted by the close. If the UText struct | |
177 * was originally provided by the caller to the open function, it is | |
178 * returned by this function, and may be safely used again in | |
179 * a subsequent utext_open. | |
180 * | |
181 * @stable ICU 3.4 | |
182 */ | |
183 U_STABLE UText * U_EXPORT2 | |
184 utext_close(UText *ut); | |
185 | |
186 /** | |
187 * Open a read-only UText implementation for UTF-8 strings. | |
188 * | |
189 * \htmlonly | |
190 * Any invalid UTF-8 in the input will be handled in this way: | |
191 * a sequence of bytes that has the form of a truncated, but otherwise valid, | |
192 * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. | |
193 * Any other illegal bytes will each be replaced by a \uFFFD. | |
194 * \endhtmlonly | |
195 * | |
196 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
197 * If non-NULL, must refer to an initialized UText struct, which will then | |
198 * be reset to reference the specified UTF-8 string. | |
199 * @param s A UTF-8 string. Must not be NULL. | |
200 * @param length The length of the UTF-8 string in bytes, or -1 if the string is | |
201 * zero terminated. | |
202 * @param status Errors are returned here. | |
203 * @return A pointer to the UText. If a pre-allocated UText was provided, it | |
204 * will always be used and returned. | |
205 * @stable ICU 3.4 | |
206 */ | |
207 U_STABLE UText * U_EXPORT2 | |
208 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); | |
209 | |
210 | |
211 /** | |
212 * Open a read-only UText for UChar * string. | |
213 * | |
214 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
215 * If non-NULL, must refer to an initialized UText struct, which will then | |
216 * be reset to reference the specified UChar string. | |
217 * @param s A UChar (UTF-16) string | |
218 * @param length The number of UChars in the input string, or -1 if the string is | |
219 * zero terminated. | |
220 * @param status Errors are returned here. | |
221 * @return A pointer to the UText. If a pre-allocated UText was provided, it | |
222 * will always be used and returned. | |
223 * @stable ICU 3.4 | |
224 */ | |
225 U_STABLE UText * U_EXPORT2 | |
226 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); | |
227 | |
228 | |
229 #if U_SHOW_CPLUSPLUS_API | |
230 /** | |
231 * Open a writable UText for a non-const UnicodeString. | |
232 * | |
233 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
234 * If non-NULL, must refer to an initialized UText struct, which will then | |
235 * be reset to reference the specified input string. | |
236 * @param s A UnicodeString. | |
237 * @param status Errors are returned here. | |
238 * @return Pointer to the UText. If a UText was supplied as input, this | |
239 * will always be used and returned. | |
240 * @stable ICU 3.4 | |
241 */ | |
242 U_STABLE UText * U_EXPORT2 | |
243 utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); | |
244 | |
245 | |
246 /** | |
247 * Open a UText for a const UnicodeString. The resulting UText will not be writable. | |
248 * | |
249 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
250 * If non-NULL, must refer to an initialized UText struct, which will then | |
251 * be reset to reference the specified input string. | |
252 * @param s A const UnicodeString to be wrapped. | |
253 * @param status Errors are returned here. | |
254 * @return Pointer to the UText. If a UText was supplied as input, this | |
255 * will always be used and returned. | |
256 * @stable ICU 3.4 | |
257 */ | |
258 U_STABLE UText * U_EXPORT2 | |
259 utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); | |
260 | |
261 | |
262 /** | |
263 * Open a writable UText implementation for an ICU Replaceable object. | |
264 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
265 * If non-NULL, must refer to an already existing UText, which will then | |
266 * be reset to reference the specified replaceable text. | |
267 * @param rep A Replaceable text object. | |
268 * @param status Errors are returned here. | |
269 * @return Pointer to the UText. If a UText was supplied as input, this | |
270 * will always be used and returned. | |
271 * @see Replaceable | |
272 * @stable ICU 3.4 | |
273 */ | |
274 U_STABLE UText * U_EXPORT2 | |
275 utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); | |
276 | |
277 /** | |
278 * Open a UText implementation over an ICU CharacterIterator. | |
279 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
280 * If non-NULL, must refer to an already existing UText, which will then | |
281 * be reset to reference the specified replaceable text. | |
282 * @param ci A Character Iterator. | |
283 * @param status Errors are returned here. | |
284 * @return Pointer to the UText. If a UText was supplied as input, this | |
285 * will always be used and returned. | |
286 * @see Replaceable | |
287 * @stable ICU 3.4 | |
288 */ | |
289 U_STABLE UText * U_EXPORT2 | |
290 utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); | |
291 | |
292 #endif | |
293 | |
294 | |
295 /** | |
296 * Clone a UText. This is much like opening a UText where the source text is itself | |
297 * another UText. | |
298 * | |
299 * A deep clone will copy both the UText data structures and the underlying text. | |
300 * The original and cloned UText will operate completely independently; modifications | |
301 * made to the text in one will not affect the other. Text providers are not | |
302 * required to support deep clones. The user of clone() must check the status return | |
303 * and be prepared to handle failures. | |
304 * | |
305 * The standard UText implementations for UTF8, UChar *, UnicodeString and | |
306 * Replaceable all support deep cloning. | |
307 * | |
308 * The UText returned from a deep clone will be writable, assuming that the text | |
309 * provider is able to support writing, even if the source UText had been made | |
310 * non-writable by means of UText_freeze(). | |
311 * | |
312 * A shallow clone replicates only the UText data structures; it does not make | |
313 * a copy of the underlying text. Shallow clones can be used as an efficient way to | |
314 * have multiple iterators active in a single text string that is not being | |
315 * modified. | |
316 * | |
317 * A shallow clone operation will not fail, barring truly exceptional conditions such | |
318 * as memory allocation failures. | |
319 * | |
320 * Shallow UText clones should be avoided if the UText functions that modify the | |
321 * text are expected to be used, either on the original or the cloned UText. | |
322 * Any such modifications can cause unpredictable behavior. Read Only | |
323 * shallow clones provide some protection against errors of this type by | |
324 * disabling text modification via the cloned UText. | |
325 * | |
326 * A shallow clone made with the readOnly parameter == FALSE will preserve the | |
327 * utext_isWritable() state of the source object. Note, however, that | |
328 * write operations must be avoided while more than one UText exists that refer | |
329 * to the same underlying text. | |
330 * | |
331 * A UText and its clone may be safely concurrently accessed by separate threads. | |
332 * This is true for read access only with shallow clones, and for both read and | |
333 * write access with deep clones. | |
334 * It is the responsibility of the Text Provider to ensure that this thread safety | |
335 * constraint is met. | |
336 * | |
337 * @param dest A UText struct to be filled in with the result of the clone operation, | |
338 * or NULL if the clone function should heap-allocate a new UText struct. | |
339 * If non-NULL, must refer to an already existing UText, which will then | |
340 * be reset to become the clone. | |
341 * @param src The UText to be cloned. | |
342 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. | |
343 * @param readOnly TRUE to request that the cloned UText have read only access to the | |
344 * underlying text. | |
345 | |
346 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR | |
347 * will be returned if the text provider is unable to clone the | |
348 * original text. | |
349 * @return The newly created clone, or NULL if the clone operation failed. | |
350 * @stable ICU 3.4 | |
351 */ | |
352 U_STABLE UText * U_EXPORT2 | |
353 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); | |
354 | |
355 | |
356 /** | |
357 * Compare two UText objects for equality. | |
358 * UTexts are equal if they are iterating over the same text, and | |
359 * have the same iteration position within the text. | |
360 * If either or both of the parameters are NULL, the comparison is FALSE. | |
361 * | |
362 * @param a The first of the two UTexts to compare. | |
363 * @param b The other UText to be compared. | |
364 * @return TRUE if the two UTexts are equal. | |
365 * @stable ICU 3.6 | |
366 */ | |
367 U_STABLE UBool U_EXPORT2 | |
368 utext_equals(const UText *a, const UText *b); | |
369 | |
370 | |
371 /***************************************************************************** | |
372 * | |
373 * Functions to work with the text represented by a UText wrapper | |
374 * | |
375 *****************************************************************************/ | |
376 | |
377 /** | |
378 * Get the length of the text. Depending on the characteristics | |
379 * of the underlying text representation, this may be expensive. | |
380 * @see utext_isLengthExpensive() | |
381 * | |
382 * | |
383 * @param ut the text to be accessed. | |
384 * @return the length of the text, expressed in native units. | |
385 * | |
386 * @stable ICU 3.4 | |
387 */ | |
388 U_STABLE int64_t U_EXPORT2 | |
389 utext_nativeLength(UText *ut); | |
390 | |
391 /** | |
392 * Return TRUE if calculating the length of the text could be expensive. | |
393 * Finding the length of NUL terminated strings is considered to be expensive. | |
394 * | |
395 * Note that the value of this function may change | |
396 * as the result of other operations on a UText. | |
397 * Once the length of a string has been discovered, it will no longer | |
398 * be expensive to report it. | |
399 * | |
400 * @param ut the text to be accessed. | |
401 * @return TRUE if determining the length of the text could be time consuming. | |
402 * @stable ICU 3.4 | |
403 */ | |
404 U_STABLE UBool U_EXPORT2 | |
405 utext_isLengthExpensive(const UText *ut); | |
406 | |
407 /** | |
408 * Returns the code point at the requested index, | |
409 * or U_SENTINEL (-1) if it is out of bounds. | |
410 * | |
411 * If the specified index points to the interior of a multi-unit | |
412 * character - one of the trail bytes of a UTF-8 sequence, for example - | |
413 * the complete code point will be returned. | |
414 * | |
415 * The iteration position will be set to the start of the returned code point. | |
416 * | |
417 * This function is roughly equivalent to the sequence | |
418 * utext_setNativeIndex(index); | |
419 * utext_current32(); | |
420 * (There is a subtle difference if the index is out of bounds by being less than zero - | |
421 * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() | |
422 * will return the char at zero. utext_char32At(negative index), on the other hand, will | |
423 * return the U_SENTINEL value of -1.) | |
424 * | |
425 * @param ut the text to be accessed | |
426 * @param nativeIndex the native index of the character to be accessed. If the index points | |
427 * to other than the first unit of a multi-unit character, it will be adjusted | |
428 * to the start of the character. | |
429 * @return the code point at the specified index. | |
430 * @stable ICU 3.4 | |
431 */ | |
432 U_STABLE UChar32 U_EXPORT2 | |
433 utext_char32At(UText *ut, int64_t nativeIndex); | |
434 | |
435 | |
436 /** | |
437 * | |
438 * Get the code point at the current iteration position, | |
439 * or U_SENTINEL (-1) if the iteration has reached the end of | |
440 * the input text. | |
441 * | |
442 * @param ut the text to be accessed. | |
443 * @return the Unicode code point at the current iterator position. | |
444 * @stable ICU 3.4 | |
445 */ | |
446 U_STABLE UChar32 U_EXPORT2 | |
447 utext_current32(UText *ut); | |
448 | |
449 | |
450 /** | |
451 * Get the code point at the current iteration position of the UText, and | |
452 * advance the position to the first index following the character. | |
453 * | |
454 * If the position is at the end of the text (the index following | |
455 * the last character, which is also the length of the text), | |
456 * return U_SENTINEL (-1) and do not advance the index. | |
457 * | |
458 * This is a post-increment operation. | |
459 * | |
460 * An inline macro version of this function, UTEXT_NEXT32(), | |
461 * is available for performance critical use. | |
462 * | |
463 * @param ut the text to be accessed. | |
464 * @return the Unicode code point at the iteration position. | |
465 * @see UTEXT_NEXT32 | |
466 * @stable ICU 3.4 | |
467 */ | |
468 U_STABLE UChar32 U_EXPORT2 | |
469 utext_next32(UText *ut); | |
470 | |
471 | |
472 /** | |
473 * Move the iterator position to the character (code point) whose | |
474 * index precedes the current position, and return that character. | |
475 * This is a pre-decrement operation. | |
476 * | |
477 * If the initial position is at the start of the text (index of 0) | |
478 * return U_SENTINEL (-1), and leave the position unchanged. | |
479 * | |
480 * An inline macro version of this function, UTEXT_PREVIOUS32(), | |
481 * is available for performance critical use. | |
482 * | |
483 * @param ut the text to be accessed. | |
484 * @return the previous UChar32 code point, or U_SENTINEL (-1) | |
485 * if the iteration has reached the start of the text. | |
486 * @see UTEXT_PREVIOUS32 | |
487 * @stable ICU 3.4 | |
488 */ | |
489 U_STABLE UChar32 U_EXPORT2 | |
490 utext_previous32(UText *ut); | |
491 | |
492 | |
493 /** | |
494 * Set the iteration index and return the code point at that index. | |
495 * Leave the iteration index at the start of the following code point. | |
496 * | |
497 * This function is the most efficient and convenient way to | |
498 * begin a forward iteration. The results are identical to the those | |
499 * from the sequence | |
500 * \code | |
501 * utext_setIndex(); | |
502 * utext_next32(); | |
503 * \endcode | |
504 * | |
505 * @param ut the text to be accessed. | |
506 * @param nativeIndex Iteration index, in the native units of the text provider. | |
507 * @return Code point which starts at or before index, | |
508 * or U_SENTINEL (-1) if it is out of bounds. | |
509 * @stable ICU 3.4 | |
510 */ | |
511 U_STABLE UChar32 U_EXPORT2 | |
512 utext_next32From(UText *ut, int64_t nativeIndex); | |
513 | |
514 | |
515 | |
516 /** | |
517 * Set the iteration index, and return the code point preceding the | |
518 * one specified by the initial index. Leave the iteration position | |
519 * at the start of the returned code point. | |
520 * | |
521 * This function is the most efficient and convenient way to | |
522 * begin a backwards iteration. | |
523 * | |
524 * @param ut the text to be accessed. | |
525 * @param nativeIndex Iteration index in the native units of the text provider. | |
526 * @return Code point preceding the one at the initial index, | |
527 * or U_SENTINEL (-1) if it is out of bounds. | |
528 * | |
529 * @stable ICU 3.4 | |
530 */ | |
531 U_STABLE UChar32 U_EXPORT2 | |
532 utext_previous32From(UText *ut, int64_t nativeIndex); | |
533 | |
534 /** | |
535 * Get the current iterator position, which can range from 0 to | |
536 * the length of the text. | |
537 * The position is a native index into the input text, in whatever format it | |
538 * may have (possibly UTF-8 for example), and may not always be the same as | |
539 * the corresponding UChar (UTF-16) index. | |
540 * The returned position will always be aligned to a code point boundary. | |
541 * | |
542 * @param ut the text to be accessed. | |
543 * @return the current index position, in the native units of the text provider. | |
544 * @stable ICU 3.4 | |
545 */ | |
546 U_STABLE int64_t U_EXPORT2 | |
547 utext_getNativeIndex(const UText *ut); | |
548 | |
549 /** | |
550 * Set the current iteration position to the nearest code point | |
551 * boundary at or preceding the specified index. | |
552 * The index is in the native units of the original input text. | |
553 * If the index is out of range, it will be pinned to be within | |
554 * the range of the input text. | |
555 * <p> | |
556 * It will usually be more efficient to begin an iteration | |
557 * using the functions utext_next32From() or utext_previous32From() | |
558 * rather than setIndex(). | |
559 * <p> | |
560 * Moving the index position to an adjacent character is best done | |
561 * with utext_next32(), utext_previous32() or utext_moveIndex32(). | |
562 * Attempting to do direct arithmetic on the index position is | |
563 * complicated by the fact that the size (in native units) of a | |
564 * character depends on the underlying representation of the character | |
565 * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not | |
566 * easily knowable. | |
567 * | |
568 * @param ut the text to be accessed. | |
569 * @param nativeIndex the native unit index of the new iteration position. | |
570 * @stable ICU 3.4 | |
571 */ | |
572 U_STABLE void U_EXPORT2 | |
573 utext_setNativeIndex(UText *ut, int64_t nativeIndex); | |
574 | |
575 /** | |
576 * Move the iterator position by delta code points. The number of code points | |
577 * is a signed number; a negative delta will move the iterator backwards, | |
578 * towards the start of the text. | |
579 * <p> | |
580 * The index is moved by <code>delta</code> code points | |
581 * forward or backward, but no further backward than to 0 and | |
582 * no further forward than to utext_nativeLength(). | |
583 * The resulting index value will be in between 0 and length, inclusive. | |
584 * | |
585 * @param ut the text to be accessed. | |
586 * @param delta the signed number of code points to move the iteration position. | |
587 * @return TRUE if the position could be moved the requested number of positions while | |
588 * staying within the range [0 - text length]. | |
589 * @stable ICU 3.4 | |
590 */ | |
591 U_STABLE UBool U_EXPORT2 | |
592 utext_moveIndex32(UText *ut, int32_t delta); | |
593 | |
594 /** | |
595 * Get the native index of the character preceding the current position. | |
596 * If the iteration position is already at the start of the text, zero | |
597 * is returned. | |
598 * The value returned is the same as that obtained from the following sequence, | |
599 * but without the side effect of changing the iteration position. | |
600 * | |
601 * \code | |
602 * UText *ut = whatever; | |
603 * ... | |
604 * utext_previous(ut) | |
605 * utext_getNativeIndex(ut); | |
606 * \endcode | |
607 * | |
608 * This function is most useful during forwards iteration, where it will get the | |
609 * native index of the character most recently returned from utext_next(). | |
610 * | |
611 * @param ut the text to be accessed | |
612 * @return the native index of the character preceding the current index position, | |
613 * or zero if the current position is at the start of the text. | |
614 * @stable ICU 3.6 | |
615 */ | |
616 U_STABLE int64_t U_EXPORT2 | |
617 utext_getPreviousNativeIndex(UText *ut); | |
618 | |
619 | |
620 /** | |
621 * | |
622 * Extract text from a UText into a UChar buffer. The range of text to be extracted | |
623 * is specified in the native indices of the UText provider. These may not necessarily | |
624 * be UTF-16 indices. | |
625 * <p> | |
626 * The size (number of 16 bit UChars) of the data to be extracted is returned. The | |
627 * full number of UChars is returned, even when the extracted text is truncated | |
628 * because the specified buffer size is too small. | |
629 * <p> | |
630 * The extracted string will (if you are a user) / must (if you are a text provider) | |
631 * be NUL-terminated if there is sufficient space in the destination buffer. This | |
632 * terminating NUL is not included in the returned length. | |
633 * <p> | |
634 * The iteration index is left at the position following the last extracted character. | |
635 * | |
636 * @param ut the UText from which to extract data. | |
637 * @param nativeStart the native index of the first character to extract.\ | |
638 * If the specified index is out of range, | |
639 * it will be pinned to be within 0 <= index <= textLength | |
640 * @param nativeLimit the native string index of the position following the last | |
641 * character to extract. If the specified index is out of range, | |
642 * it will be pinned to be within 0 <= index <= textLength. | |
643 * nativeLimit must be >= nativeStart. | |
644 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed | |
645 * @param destCapacity The size, in UChars, of the destination buffer. May be zero | |
646 * for precomputing the required size. | |
647 * @param status receives any error status. | |
648 * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the | |
649 * buffer was too small. Returns number of UChars for preflighting. | |
650 * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. | |
651 * | |
652 * @stable ICU 3.4 | |
653 */ | |
654 U_STABLE int32_t U_EXPORT2 | |
655 utext_extract(UText *ut, | |
656 int64_t nativeStart, int64_t nativeLimit, | |
657 UChar *dest, int32_t destCapacity, | |
658 UErrorCode *status); | |
659 | |
660 | |
661 | |
662 /************************************************************************************ | |
663 * | |
664 * #define inline versions of selected performance-critical text access functions | |
665 * Caution: do not use auto increment++ or decrement-- expressions | |
666 * as parameters to these macros. | |
667 * | |
668 * For most use, where there is no extreme performance constraint, the | |
669 * normal, non-inline functions are a better choice. The resulting code | |
670 * will be smaller, and, if the need ever arises, easier to debug. | |
671 * | |
672 * These are implemented as #defines rather than real functions | |
673 * because there is no fully portable way to do inline functions in plain C. | |
674 * | |
675 ************************************************************************************/ | |
676 | |
677 #ifndef U_HIDE_INTERNAL_API | |
678 /** | |
679 * inline version of utext_current32(), for performance-critical situations. | |
680 * | |
681 * Get the code point at the current iteration position of the UText. | |
682 * Returns U_SENTINEL (-1) if the position is at the end of the | |
683 * text. | |
684 * | |
685 * @internal ICU 4.4 technology preview | |
686 */ | |
687 #define UTEXT_CURRENT32(ut) \ | |
688 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ | |
689 ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) | |
690 #endif /* U_HIDE_INTERNAL_API */ | |
691 | |
692 /** | |
693 * inline version of utext_next32(), for performance-critical situations. | |
694 * | |
695 * Get the code point at the current iteration position of the UText, and | |
696 * advance the position to the first index following the character. | |
697 * This is a post-increment operation. | |
698 * Returns U_SENTINEL (-1) if the position is at the end of the | |
699 * text. | |
700 * | |
701 * @stable ICU 3.4 | |
702 */ | |
703 #define UTEXT_NEXT32(ut) \ | |
704 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ | |
705 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) | |
706 | |
707 /** | |
708 * inline version of utext_previous32(), for performance-critical situations. | |
709 * | |
710 * Move the iterator position to the character (code point) whose | |
711 * index precedes the current position, and return that character. | |
712 * This is a pre-decrement operation. | |
713 * Returns U_SENTINEL (-1) if the position is at the start of the text. | |
714 * | |
715 * @stable ICU 3.4 | |
716 */ | |
717 #define UTEXT_PREVIOUS32(ut) \ | |
718 ((ut)->chunkOffset > 0 && \ | |
719 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ | |
720 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) | |
721 | |
722 /** | |
723 * inline version of utext_getNativeIndex(), for performance-critical situations. | |
724 * | |
725 * Get the current iterator position, which can range from 0 to | |
726 * the length of the text. | |
727 * The position is a native index into the input text, in whatever format it | |
728 * may have (possibly UTF-8 for example), and may not always be the same as | |
729 * the corresponding UChar (UTF-16) index. | |
730 * The returned position will always be aligned to a code point boundary. | |
731 * | |
732 * @stable ICU 3.6 | |
733 */ | |
734 #define UTEXT_GETNATIVEINDEX(ut) \ | |
735 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ | |
736 (ut)->chunkNativeStart+(ut)->chunkOffset : \ | |
737 (ut)->pFuncs->mapOffsetToNative(ut)) | |
738 | |
739 /** | |
740 * inline version of utext_setNativeIndex(), for performance-critical situations. | |
741 * | |
742 * Set the current iteration position to the nearest code point | |
743 * boundary at or preceding the specified index. | |
744 * The index is in the native units of the original input text. | |
745 * If the index is out of range, it will be pinned to be within | |
746 * the range of the input text. | |
747 * | |
748 * @stable ICU 3.8 | |
749 */ | |
750 #define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \ | |
751 int64_t __offset = (ix) - (ut)->chunkNativeStart; \ | |
752 if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ | |
753 (ut)->chunkOffset=(int32_t)__offset; \ | |
754 } else { \ | |
755 utext_setNativeIndex((ut), (ix)); \ | |
756 } \ | |
757 } UPRV_BLOCK_MACRO_END | |
758 | |
759 | |
760 | |
761 /************************************************************************************ | |
762 * | |
763 * Functions related to writing or modifying the text. | |
764 * These will work only with modifiable UTexts. Attempting to | |
765 * modify a read-only UText will return an error status. | |
766 * | |
767 ************************************************************************************/ | |
768 | |
769 | |
770 /** | |
771 * Return TRUE if the text can be written (modified) with utext_replace() or | |
772 * utext_copy(). For the text to be writable, the text provider must | |
773 * be of a type that supports writing and the UText must not be frozen. | |
774 * | |
775 * Attempting to modify text when utext_isWriteable() is FALSE will fail - | |
776 * the text will not be modified, and an error will be returned from the function | |
777 * that attempted the modification. | |
778 * | |
779 * @param ut the UText to be tested. | |
780 * @return TRUE if the text is modifiable. | |
781 * | |
782 * @see utext_freeze() | |
783 * @see utext_replace() | |
784 * @see utext_copy() | |
785 * @stable ICU 3.4 | |
786 * | |
787 */ | |
788 U_STABLE UBool U_EXPORT2 | |
789 utext_isWritable(const UText *ut); | |
790 | |
791 | |
792 /** | |
793 * Test whether there is meta data associated with the text. | |
794 * @see Replaceable::hasMetaData() | |
795 * | |
796 * @param ut The UText to be tested | |
797 * @return TRUE if the underlying text includes meta data. | |
798 * @stable ICU 3.4 | |
799 */ | |
800 U_STABLE UBool U_EXPORT2 | |
801 utext_hasMetaData(const UText *ut); | |
802 | |
803 | |
804 /** | |
805 * Replace a range of the original text with a replacement text. | |
806 * | |
807 * Leaves the current iteration position at the position following the | |
808 * newly inserted replacement text. | |
809 * | |
810 * This function is only available on UText types that support writing, | |
811 * that is, ones where utext_isWritable() returns TRUE. | |
812 * | |
813 * When using this function, there should be only a single UText opened onto the | |
814 * underlying native text string. Behavior after a replace operation | |
815 * on a UText is undefined for any other additional UTexts that refer to the | |
816 * modified string. | |
817 * | |
818 * @param ut the UText representing the text to be operated on. | |
819 * @param nativeStart the native index of the start of the region to be replaced | |
820 * @param nativeLimit the native index of the character following the region to be replaced. | |
821 * @param replacementText pointer to the replacement text | |
822 * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. | |
823 * @param status receives any error status. Possible errors include | |
824 * U_NO_WRITE_PERMISSION | |
825 * | |
826 * @return The signed number of (native) storage units by which | |
827 * the length of the text expanded or contracted. | |
828 * | |
829 * @stable ICU 3.4 | |
830 */ | |
831 U_STABLE int32_t U_EXPORT2 | |
832 utext_replace(UText *ut, | |
833 int64_t nativeStart, int64_t nativeLimit, | |
834 const UChar *replacementText, int32_t replacementLength, | |
835 UErrorCode *status); | |
836 | |
837 | |
838 | |
839 /** | |
840 * | |
841 * Copy or move a substring from one position to another within the text, | |
842 * while retaining any metadata associated with the text. | |
843 * This function is used to duplicate or reorder substrings. | |
844 * The destination index must not overlap the source range. | |
845 * | |
846 * The text to be copied or moved is inserted at destIndex; | |
847 * it does not replace or overwrite any existing text. | |
848 * | |
849 * The iteration position is left following the newly inserted text | |
850 * at the destination position. | |
851 * | |
852 * This function is only available on UText types that support writing, | |
853 * that is, ones where utext_isWritable() returns TRUE. | |
854 * | |
855 * When using this function, there should be only a single UText opened onto the | |
856 * underlying native text string. Behavior after a copy operation | |
857 * on a UText is undefined in any other additional UTexts that refer to the | |
858 * modified string. | |
859 * | |
860 * @param ut The UText representing the text to be operated on. | |
861 * @param nativeStart The native index of the start of the region to be copied or moved | |
862 * @param nativeLimit The native index of the character position following the region | |
863 * to be copied. | |
864 * @param destIndex The native destination index to which the source substring is | |
865 * copied or moved. | |
866 * @param move If TRUE, then the substring is moved, not copied/duplicated. | |
867 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION | |
868 * | |
869 * @stable ICU 3.4 | |
870 */ | |
871 U_STABLE void U_EXPORT2 | |
872 utext_copy(UText *ut, | |
873 int64_t nativeStart, int64_t nativeLimit, | |
874 int64_t destIndex, | |
875 UBool move, | |
876 UErrorCode *status); | |
877 | |
878 | |
879 /** | |
880 * <p> | |
881 * Freeze a UText. This prevents any modification to the underlying text itself | |
882 * by means of functions operating on this UText. | |
883 * </p> | |
884 * <p> | |
885 * Once frozen, a UText can not be unfrozen. The intent is to ensure | |
886 * that a the text underlying a frozen UText wrapper cannot be modified via that UText. | |
887 * </p> | |
888 * <p> | |
889 * Caution: freezing a UText will disable changes made via the specific | |
890 * frozen UText wrapper only; it will not have any effect on the ability to | |
891 * directly modify the text by bypassing the UText. Any such backdoor modifications | |
892 * are always an error while UText access is occurring because the underlying | |
893 * text can get out of sync with UText's buffering. | |
894 * </p> | |
895 * | |
896 * @param ut The UText to be frozen. | |
897 * @see utext_isWritable() | |
898 * @stable ICU 3.6 | |
899 */ | |
900 U_STABLE void U_EXPORT2 | |
901 utext_freeze(UText *ut); | |
902 | |
903 | |
904 /** | |
905 * UText provider properties (bit field indexes). | |
906 * | |
907 * @see UText | |
908 * @stable ICU 3.4 | |
909 */ | |
910 enum { | |
911 /** | |
912 * It is potentially time consuming for the provider to determine the length of the text. | |
913 * @stable ICU 3.4 | |
914 */ | |
915 UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, | |
916 /** | |
917 * Text chunks remain valid and usable until the text object is modified or | |
918 * deleted, not just until the next time the access() function is called | |
919 * (which is the default). | |
920 * @stable ICU 3.4 | |
921 */ | |
922 UTEXT_PROVIDER_STABLE_CHUNKS = 2, | |
923 /** | |
924 * The provider supports modifying the text via the replace() and copy() | |
925 * functions. | |
926 * @see Replaceable | |
927 * @stable ICU 3.4 | |
928 */ | |
929 UTEXT_PROVIDER_WRITABLE = 3, | |
930 /** | |
931 * There is meta data associated with the text. | |
932 * @see Replaceable::hasMetaData() | |
933 * @stable ICU 3.4 | |
934 */ | |
935 UTEXT_PROVIDER_HAS_META_DATA = 4, | |
936 /** | |
937 * Text provider owns the text storage. | |
938 * Generally occurs as the result of a deep clone of the UText. | |
939 * When closing the UText, the associated text must | |
940 * also be closed/deleted/freed/ whatever is appropriate. | |
941 * @stable ICU 3.6 | |
942 */ | |
943 UTEXT_PROVIDER_OWNS_TEXT = 5 | |
944 }; | |
945 | |
946 /** | |
947 * Function type declaration for UText.clone(). | |
948 * | |
949 * clone a UText. Much like opening a UText where the source text is itself | |
950 * another UText. | |
951 * | |
952 * A deep clone will copy both the UText data structures and the underlying text. | |
953 * The original and cloned UText will operate completely independently; modifications | |
954 * made to the text in one will not effect the other. Text providers are not | |
955 * required to support deep clones. The user of clone() must check the status return | |
956 * and be prepared to handle failures. | |
957 * | |
958 * A shallow clone replicates only the UText data structures; it does not make | |
959 * a copy of the underlying text. Shallow clones can be used as an efficient way to | |
960 * have multiple iterators active in a single text string that is not being | |
961 * modified. | |
962 * | |
963 * A shallow clone operation must not fail except for truly exceptional conditions such | |
964 * as memory allocation failures. | |
965 * | |
966 * A UText and its clone may be safely concurrently accessed by separate threads. | |
967 * This is true for both shallow and deep clones. | |
968 * It is the responsibility of the Text Provider to ensure that this thread safety | |
969 * constraint is met. | |
970 | |
971 * | |
972 * @param dest A UText struct to be filled in with the result of the clone operation, | |
973 * or NULL if the clone function should heap-allocate a new UText struct. | |
974 * @param src The UText to be cloned. | |
975 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. | |
976 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR | |
977 * should be returned if the text provider is unable to clone the | |
978 * original text. | |
979 * @return The newly created clone, or NULL if the clone operation failed. | |
980 * | |
981 * @stable ICU 3.4 | |
982 */ | |
983 typedef UText * U_CALLCONV | |
984 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); | |
985 | |
986 | |
987 /** | |
988 * Function type declaration for UText.nativeLength(). | |
989 * | |
990 * @param ut the UText to get the length of. | |
991 * @return the length, in the native units of the original text string. | |
992 * @see UText | |
993 * @stable ICU 3.4 | |
994 */ | |
995 typedef int64_t U_CALLCONV | |
996 UTextNativeLength(UText *ut); | |
997 | |
998 /** | |
999 * Function type declaration for UText.access(). Get the description of the text chunk | |
1000 * containing the text at a requested native index. The UText's iteration | |
1001 * position will be left at the requested index. If the index is out | |
1002 * of bounds, the iteration position will be left at the start or end | |
1003 * of the string, as appropriate. | |
1004 * | |
1005 * Chunks must begin and end on code point boundaries. A single code point | |
1006 * comprised of multiple storage units must never span a chunk boundary. | |
1007 * | |
1008 * | |
1009 * @param ut the UText being accessed. | |
1010 * @param nativeIndex Requested index of the text to be accessed. | |
1011 * @param forward If TRUE, then the returned chunk must contain text | |
1012 * starting from the index, so that start<=index<limit. | |
1013 * If FALSE, then the returned chunk must contain text | |
1014 * before the index, so that start<index<=limit. | |
1015 * @return True if the requested index could be accessed. The chunk | |
1016 * will contain the requested text. | |
1017 * False value if a chunk cannot be accessed | |
1018 * (the requested index is out of bounds). | |
1019 * | |
1020 * @see UText | |
1021 * @stable ICU 3.4 | |
1022 */ | |
1023 typedef UBool U_CALLCONV | |
1024 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); | |
1025 | |
1026 /** | |
1027 * Function type declaration for UText.extract(). | |
1028 * | |
1029 * Extract text from a UText into a UChar buffer. The range of text to be extracted | |
1030 * is specified in the native indices of the UText provider. These may not necessarily | |
1031 * be UTF-16 indices. | |
1032 * <p> | |
1033 * The size (number of 16 bit UChars) in the data to be extracted is returned. The | |
1034 * full amount is returned, even when the specified buffer size is smaller. | |
1035 * <p> | |
1036 * The extracted string will (if you are a user) / must (if you are a text provider) | |
1037 * be NUL-terminated if there is sufficient space in the destination buffer. | |
1038 * | |
1039 * @param ut the UText from which to extract data. | |
1040 * @param nativeStart the native index of the first character to extract. | |
1041 * @param nativeLimit the native string index of the position following the last | |
1042 * character to extract. | |
1043 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed | |
1044 * @param destCapacity The size, in UChars, of the destination buffer. May be zero | |
1045 * for precomputing the required size. | |
1046 * @param status receives any error status. | |
1047 * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for | |
1048 * preflighting. | |
1049 * @return Number of UChars in the data. Does not include a trailing NUL. | |
1050 * | |
1051 * @stable ICU 3.4 | |
1052 */ | |
1053 typedef int32_t U_CALLCONV | |
1054 UTextExtract(UText *ut, | |
1055 int64_t nativeStart, int64_t nativeLimit, | |
1056 UChar *dest, int32_t destCapacity, | |
1057 UErrorCode *status); | |
1058 | |
1059 /** | |
1060 * Function type declaration for UText.replace(). | |
1061 * | |
1062 * Replace a range of the original text with a replacement text. | |
1063 * | |
1064 * Leaves the current iteration position at the position following the | |
1065 * newly inserted replacement text. | |
1066 * | |
1067 * This function need only be implemented on UText types that support writing. | |
1068 * | |
1069 * When using this function, there should be only a single UText opened onto the | |
1070 * underlying native text string. The function is responsible for updating the | |
1071 * text chunk within the UText to reflect the updated iteration position, | |
1072 * taking into account any changes to the underlying string's structure caused | |
1073 * by the replace operation. | |
1074 * | |
1075 * @param ut the UText representing the text to be operated on. | |
1076 * @param nativeStart the index of the start of the region to be replaced | |
1077 * @param nativeLimit the index of the character following the region to be replaced. | |
1078 * @param replacementText pointer to the replacement text | |
1079 * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. | |
1080 * @param status receives any error status. Possible errors include | |
1081 * U_NO_WRITE_PERMISSION | |
1082 * | |
1083 * @return The signed number of (native) storage units by which | |
1084 * the length of the text expanded or contracted. | |
1085 * | |
1086 * @stable ICU 3.4 | |
1087 */ | |
1088 typedef int32_t U_CALLCONV | |
1089 UTextReplace(UText *ut, | |
1090 int64_t nativeStart, int64_t nativeLimit, | |
1091 const UChar *replacementText, int32_t replacmentLength, | |
1092 UErrorCode *status); | |
1093 | |
1094 /** | |
1095 * Function type declaration for UText.copy(). | |
1096 * | |
1097 * Copy or move a substring from one position to another within the text, | |
1098 * while retaining any metadata associated with the text. | |
1099 * This function is used to duplicate or reorder substrings. | |
1100 * The destination index must not overlap the source range. | |
1101 * | |
1102 * The text to be copied or moved is inserted at destIndex; | |
1103 * it does not replace or overwrite any existing text. | |
1104 * | |
1105 * This function need only be implemented for UText types that support writing. | |
1106 * | |
1107 * When using this function, there should be only a single UText opened onto the | |
1108 * underlying native text string. The function is responsible for updating the | |
1109 * text chunk within the UText to reflect the updated iteration position, | |
1110 * taking into account any changes to the underlying string's structure caused | |
1111 * by the replace operation. | |
1112 * | |
1113 * @param ut The UText representing the text to be operated on. | |
1114 * @param nativeStart The index of the start of the region to be copied or moved | |
1115 * @param nativeLimit The index of the character following the region to be replaced. | |
1116 * @param nativeDest The destination index to which the source substring is copied or moved. | |
1117 * @param move If TRUE, then the substring is moved, not copied/duplicated. | |
1118 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION | |
1119 * | |
1120 * @stable ICU 3.4 | |
1121 */ | |
1122 typedef void U_CALLCONV | |
1123 UTextCopy(UText *ut, | |
1124 int64_t nativeStart, int64_t nativeLimit, | |
1125 int64_t nativeDest, | |
1126 UBool move, | |
1127 UErrorCode *status); | |
1128 | |
1129 /** | |
1130 * Function type declaration for UText.mapOffsetToNative(). | |
1131 * Map from the current UChar offset within the current text chunk to | |
1132 * the corresponding native index in the original source text. | |
1133 * | |
1134 * This is required only for text providers that do not use native UTF-16 indexes. | |
1135 * | |
1136 * @param ut the UText. | |
1137 * @return Absolute (native) index corresponding to chunkOffset in the current chunk. | |
1138 * The returned native index should always be to a code point boundary. | |
1139 * | |
1140 * @stable ICU 3.4 | |
1141 */ | |
1142 typedef int64_t U_CALLCONV | |
1143 UTextMapOffsetToNative(const UText *ut); | |
1144 | |
1145 /** | |
1146 * Function type declaration for UText.mapIndexToUTF16(). | |
1147 * Map from a native index to a UChar offset within a text chunk. | |
1148 * Behavior is undefined if the native index does not fall within the | |
1149 * current chunk. | |
1150 * | |
1151 * This function is required only for text providers that do not use native UTF-16 indexes. | |
1152 * | |
1153 * @param ut The UText containing the text chunk. | |
1154 * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. | |
1155 * @return Chunk-relative UTF-16 offset corresponding to the specified native | |
1156 * index. | |
1157 * | |
1158 * @stable ICU 3.4 | |
1159 */ | |
1160 typedef int32_t U_CALLCONV | |
1161 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); | |
1162 | |
1163 | |
1164 /** | |
1165 * Function type declaration for UText.utextClose(). | |
1166 * | |
1167 * A Text Provider close function is only required for provider types that make | |
1168 * allocations in their open function (or other functions) that must be | |
1169 * cleaned when the UText is closed. | |
1170 * | |
1171 * The allocation of the UText struct itself and any "extra" storage | |
1172 * associated with the UText is handled by the common UText implementation | |
1173 * and does not require provider specific cleanup in a close function. | |
1174 * | |
1175 * Most UText provider implementations do not need to implement this function. | |
1176 * | |
1177 * @param ut A UText object to be closed. | |
1178 * | |
1179 * @stable ICU 3.4 | |
1180 */ | |
1181 typedef void U_CALLCONV | |
1182 UTextClose(UText *ut); | |
1183 | |
1184 | |
1185 /** | |
1186 * (public) Function dispatch table for UText. | |
1187 * Conceptually very much like a C++ Virtual Function Table. | |
1188 * This struct defines the organization of the table. | |
1189 * Each text provider implementation must provide an | |
1190 * actual table that is initialized with the appropriate functions | |
1191 * for the type of text being handled. | |
1192 * @stable ICU 3.6 | |
1193 */ | |
1194 struct UTextFuncs { | |
1195 /** | |
1196 * (public) Function table size, sizeof(UTextFuncs) | |
1197 * Intended for use should the table grow to accommodate added | |
1198 * functions in the future, to allow tests for older format | |
1199 * function tables that do not contain the extensions. | |
1200 * | |
1201 * Fields are placed for optimal alignment on | |
1202 * 32/64/128-bit-pointer machines, by normally grouping together | |
1203 * 4 32-bit fields, | |
1204 * 4 pointers, | |
1205 * 2 64-bit fields | |
1206 * in sequence. | |
1207 * @stable ICU 3.6 | |
1208 */ | |
1209 int32_t tableSize; | |
1210 | |
1211 /** | |
1212 * (private) Alignment padding. | |
1213 * Do not use, reserved for use by the UText framework only. | |
1214 * @internal | |
1215 */ | |
1216 int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; | |
1217 | |
1218 | |
1219 /** | |
1220 * (public) Function pointer for UTextClone | |
1221 * | |
1222 * @see UTextClone | |
1223 * @stable ICU 3.6 | |
1224 */ | |
1225 UTextClone *clone; | |
1226 | |
1227 /** | |
1228 * (public) function pointer for UTextLength | |
1229 * May be expensive to compute! | |
1230 * | |
1231 * @see UTextLength | |
1232 * @stable ICU 3.6 | |
1233 */ | |
1234 UTextNativeLength *nativeLength; | |
1235 | |
1236 /** | |
1237 * (public) Function pointer for UTextAccess. | |
1238 * | |
1239 * @see UTextAccess | |
1240 * @stable ICU 3.6 | |
1241 */ | |
1242 UTextAccess *access; | |
1243 | |
1244 /** | |
1245 * (public) Function pointer for UTextExtract. | |
1246 * | |
1247 * @see UTextExtract | |
1248 * @stable ICU 3.6 | |
1249 */ | |
1250 UTextExtract *extract; | |
1251 | |
1252 /** | |
1253 * (public) Function pointer for UTextReplace. | |
1254 * | |
1255 * @see UTextReplace | |
1256 * @stable ICU 3.6 | |
1257 */ | |
1258 UTextReplace *replace; | |
1259 | |
1260 /** | |
1261 * (public) Function pointer for UTextCopy. | |
1262 * | |
1263 * @see UTextCopy | |
1264 * @stable ICU 3.6 | |
1265 */ | |
1266 UTextCopy *copy; | |
1267 | |
1268 /** | |
1269 * (public) Function pointer for UTextMapOffsetToNative. | |
1270 * | |
1271 * @see UTextMapOffsetToNative | |
1272 * @stable ICU 3.6 | |
1273 */ | |
1274 UTextMapOffsetToNative *mapOffsetToNative; | |
1275 | |
1276 /** | |
1277 * (public) Function pointer for UTextMapNativeIndexToUTF16. | |
1278 * | |
1279 * @see UTextMapNativeIndexToUTF16 | |
1280 * @stable ICU 3.6 | |
1281 */ | |
1282 UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; | |
1283 | |
1284 /** | |
1285 * (public) Function pointer for UTextClose. | |
1286 * | |
1287 * @see UTextClose | |
1288 * @stable ICU 3.6 | |
1289 */ | |
1290 UTextClose *close; | |
1291 | |
1292 /** | |
1293 * (private) Spare function pointer | |
1294 * @internal | |
1295 */ | |
1296 UTextClose *spare1; | |
1297 | |
1298 /** | |
1299 * (private) Spare function pointer | |
1300 * @internal | |
1301 */ | |
1302 UTextClose *spare2; | |
1303 | |
1304 /** | |
1305 * (private) Spare function pointer | |
1306 * @internal | |
1307 */ | |
1308 UTextClose *spare3; | |
1309 | |
1310 }; | |
1311 /** | |
1312 * Function dispatch table for UText | |
1313 * @see UTextFuncs | |
1314 */ | |
1315 typedef struct UTextFuncs UTextFuncs; | |
1316 | |
1317 /** | |
1318 * UText struct. Provides the interface between the generic UText access code | |
1319 * and the UText provider code that works on specific kinds of | |
1320 * text (UTF-8, noncontiguous UTF-16, whatever.) | |
1321 * | |
1322 * Applications that are using predefined types of text providers | |
1323 * to pass text data to ICU services will have no need to view the | |
1324 * internals of the UText structs that they open. | |
1325 * | |
1326 * @stable ICU 3.6 | |
1327 */ | |
1328 struct UText { | |
1329 /** | |
1330 * (private) Magic. Used to help detect when UText functions are handed | |
1331 * invalid or uninitialized UText structs. | |
1332 * utext_openXYZ() functions take an initialized, | |
1333 * but not necessarily open, UText struct as an | |
1334 * optional fill-in parameter. This magic field | |
1335 * is used to check for that initialization. | |
1336 * Text provider close functions must NOT clear | |
1337 * the magic field because that would prevent | |
1338 * reuse of the UText struct. | |
1339 * @internal | |
1340 */ | |
1341 uint32_t magic; | |
1342 | |
1343 | |
1344 /** | |
1345 * (private) Flags for managing the allocation and freeing of | |
1346 * memory associated with this UText. | |
1347 * @internal | |
1348 */ | |
1349 int32_t flags; | |
1350 | |
1351 | |
1352 /** | |
1353 * Text provider properties. This set of flags is maintained by the | |
1354 * text provider implementation. | |
1355 * @stable ICU 3.4 | |
1356 */ | |
1357 int32_t providerProperties; | |
1358 | |
1359 /** | |
1360 * (public) sizeOfStruct=sizeof(UText) | |
1361 * Allows possible backward compatible extension. | |
1362 * | |
1363 * @stable ICU 3.4 | |
1364 */ | |
1365 int32_t sizeOfStruct; | |
1366 | |
1367 /* ------ 16 byte alignment boundary ----------- */ | |
1368 | |
1369 | |
1370 /** | |
1371 * (protected) Native index of the first character position following | |
1372 * the current chunk. | |
1373 * @stable ICU 3.6 | |
1374 */ | |
1375 int64_t chunkNativeLimit; | |
1376 | |
1377 /** | |
1378 * (protected) Size in bytes of the extra space (pExtra). | |
1379 * @stable ICU 3.4 | |
1380 */ | |
1381 int32_t extraSize; | |
1382 | |
1383 /** | |
1384 * (protected) The highest chunk offset where native indexing and | |
1385 * chunk (UTF-16) indexing correspond. For UTF-16 sources, value | |
1386 * will be equal to chunkLength. | |
1387 * | |
1388 * @stable ICU 3.6 | |
1389 */ | |
1390 int32_t nativeIndexingLimit; | |
1391 | |
1392 /* ---- 16 byte alignment boundary------ */ | |
1393 | |
1394 /** | |
1395 * (protected) Native index of the first character in the text chunk. | |
1396 * @stable ICU 3.6 | |
1397 */ | |
1398 int64_t chunkNativeStart; | |
1399 | |
1400 /** | |
1401 * (protected) Current iteration position within the text chunk (UTF-16 buffer). | |
1402 * This is the index to the character that will be returned by utext_next32(). | |
1403 * @stable ICU 3.6 | |
1404 */ | |
1405 int32_t chunkOffset; | |
1406 | |
1407 /** | |
1408 * (protected) Length the text chunk (UTF-16 buffer), in UChars. | |
1409 * @stable ICU 3.6 | |
1410 */ | |
1411 int32_t chunkLength; | |
1412 | |
1413 /* ---- 16 byte alignment boundary-- */ | |
1414 | |
1415 | |
1416 /** | |
1417 * (protected) pointer to a chunk of text in UTF-16 format. | |
1418 * May refer either to original storage of the source of the text, or | |
1419 * if conversion was required, to a buffer owned by the UText. | |
1420 * @stable ICU 3.6 | |
1421 */ | |
1422 const UChar *chunkContents; | |
1423 | |
1424 /** | |
1425 * (public) Pointer to Dispatch table for accessing functions for this UText. | |
1426 * @stable ICU 3.6 | |
1427 */ | |
1428 const UTextFuncs *pFuncs; | |
1429 | |
1430 /** | |
1431 * (protected) Pointer to additional space requested by the | |
1432 * text provider during the utext_open operation. | |
1433 * @stable ICU 3.4 | |
1434 */ | |
1435 void *pExtra; | |
1436 | |
1437 /** | |
1438 * (protected) Pointer to string or text-containing object or similar. | |
1439 * This is the source of the text that this UText is wrapping, in a format | |
1440 * that is known to the text provider functions. | |
1441 * @stable ICU 3.4 | |
1442 */ | |
1443 const void *context; | |
1444 | |
1445 /* --- 16 byte alignment boundary--- */ | |
1446 | |
1447 /** | |
1448 * (protected) Pointer fields available for use by the text provider. | |
1449 * Not used by UText common code. | |
1450 * @stable ICU 3.6 | |
1451 */ | |
1452 const void *p; | |
1453 /** | |
1454 * (protected) Pointer fields available for use by the text provider. | |
1455 * Not used by UText common code. | |
1456 * @stable ICU 3.6 | |
1457 */ | |
1458 const void *q; | |
1459 /** | |
1460 * (protected) Pointer fields available for use by the text provider. | |
1461 * Not used by UText common code. | |
1462 * @stable ICU 3.6 | |
1463 */ | |
1464 const void *r; | |
1465 | |
1466 /** | |
1467 * Private field reserved for future use by the UText framework | |
1468 * itself. This is not to be touched by the text providers. | |
1469 * @internal ICU 3.4 | |
1470 */ | |
1471 void *privP; | |
1472 | |
1473 | |
1474 /* --- 16 byte alignment boundary--- */ | |
1475 | |
1476 | |
1477 /** | |
1478 * (protected) Integer field reserved for use by the text provider. | |
1479 * Not used by the UText framework, or by the client (user) of the UText. | |
1480 * @stable ICU 3.4 | |
1481 */ | |
1482 int64_t a; | |
1483 | |
1484 /** | |
1485 * (protected) Integer field reserved for use by the text provider. | |
1486 * Not used by the UText framework, or by the client (user) of the UText. | |
1487 * @stable ICU 3.4 | |
1488 */ | |
1489 int32_t b; | |
1490 | |
1491 /** | |
1492 * (protected) Integer field reserved for use by the text provider. | |
1493 * Not used by the UText framework, or by the client (user) of the UText. | |
1494 * @stable ICU 3.4 | |
1495 */ | |
1496 int32_t c; | |
1497 | |
1498 /* ---- 16 byte alignment boundary---- */ | |
1499 | |
1500 | |
1501 /** | |
1502 * Private field reserved for future use by the UText framework | |
1503 * itself. This is not to be touched by the text providers. | |
1504 * @internal ICU 3.4 | |
1505 */ | |
1506 int64_t privA; | |
1507 /** | |
1508 * Private field reserved for future use by the UText framework | |
1509 * itself. This is not to be touched by the text providers. | |
1510 * @internal ICU 3.4 | |
1511 */ | |
1512 int32_t privB; | |
1513 /** | |
1514 * Private field reserved for future use by the UText framework | |
1515 * itself. This is not to be touched by the text providers. | |
1516 * @internal ICU 3.4 | |
1517 */ | |
1518 int32_t privC; | |
1519 }; | |
1520 | |
1521 | |
1522 /** | |
1523 * Common function for use by Text Provider implementations to allocate and/or initialize | |
1524 * a new UText struct. To be called in the implementation of utext_open() functions. | |
1525 * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. | |
1526 * If the supplied UText is already open, the provider's close function will be called | |
1527 * so that the struct can be reused by the open that is in progress. | |
1528 * | |
1529 * @param ut pointer to a UText struct to be re-used, or null if a new UText | |
1530 * should be allocated. | |
1531 * @param extraSpace The amount of additional space to be allocated as part | |
1532 * of this UText, for use by types of providers that require | |
1533 * additional storage. | |
1534 * @param status Errors are returned here. | |
1535 * @return pointer to the UText, allocated if necessary, with extra space set up if requested. | |
1536 * @stable ICU 3.4 | |
1537 */ | |
1538 U_STABLE UText * U_EXPORT2 | |
1539 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); | |
1540 | |
1541 // do not use #ifndef U_HIDE_INTERNAL_API around the following! | |
1542 /** | |
1543 * @internal | |
1544 * Value used to help identify correctly initialized UText structs. | |
1545 * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. | |
1546 */ | |
1547 enum { | |
1548 UTEXT_MAGIC = 0x345ad82c | |
1549 }; | |
1550 | |
1551 /** | |
1552 * initializer to be used with local (stack) instances of a UText | |
1553 * struct. UText structs must be initialized before passing | |
1554 * them to one of the utext_open functions. | |
1555 * | |
1556 * @stable ICU 3.6 | |
1557 */ | |
1558 #define UTEXT_INITIALIZER { \ | |
1559 UTEXT_MAGIC, /* magic */ \ | |
1560 0, /* flags */ \ | |
1561 0, /* providerProps */ \ | |
1562 sizeof(UText), /* sizeOfStruct */ \ | |
1563 0, /* chunkNativeLimit */ \ | |
1564 0, /* extraSize */ \ | |
1565 0, /* nativeIndexingLimit */ \ | |
1566 0, /* chunkNativeStart */ \ | |
1567 0, /* chunkOffset */ \ | |
1568 0, /* chunkLength */ \ | |
1569 NULL, /* chunkContents */ \ | |
1570 NULL, /* pFuncs */ \ | |
1571 NULL, /* pExtra */ \ | |
1572 NULL, /* context */ \ | |
1573 NULL, NULL, NULL, /* p, q, r */ \ | |
1574 NULL, /* privP */ \ | |
1575 0, 0, 0, /* a, b, c */ \ | |
1576 0, 0, 0 /* privA,B,C, */ \ | |
1577 } | |
1578 | |
1579 | |
1580 U_CDECL_END | |
1581 | |
1582 | |
1583 #if U_SHOW_CPLUSPLUS_API | |
1584 | |
1585 U_NAMESPACE_BEGIN | |
1586 | |
1587 /** | |
1588 * \class LocalUTextPointer | |
1589 * "Smart pointer" class, closes a UText via utext_close(). | |
1590 * For most methods see the LocalPointerBase base class. | |
1591 * | |
1592 * @see LocalPointerBase | |
1593 * @see LocalPointer | |
1594 * @stable ICU 4.4 | |
1595 */ | |
1596 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); | |
1597 | |
1598 U_NAMESPACE_END | |
1599 | |
1600 #endif | |
1601 | |
1602 | |
1603 #endif |