Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/brkiter.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 // © 2016 and later: Unicode, Inc. and others. | |
2 // License & terms of use: http://www.unicode.org/copyright.html | |
3 /* | |
4 ******************************************************************************** | |
5 * Copyright (C) 1997-2016, International Business Machines | |
6 * Corporation and others. All Rights Reserved. | |
7 ******************************************************************************** | |
8 * | |
9 * File brkiter.h | |
10 * | |
11 * Modification History: | |
12 * | |
13 * Date Name Description | |
14 * 02/18/97 aliu Added typedef for TextCount. Made DONE const. | |
15 * 05/07/97 aliu Fixed DLL declaration. | |
16 * 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK | |
17 * 08/11/98 helena Sync-up JDK1.2. | |
18 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. | |
19 ******************************************************************************** | |
20 */ | |
21 | |
22 #ifndef BRKITER_H | |
23 #define BRKITER_H | |
24 | |
25 #include "unicode/utypes.h" | |
26 | |
27 /** | |
28 * \file | |
29 * \brief C++ API: Break Iterator. | |
30 */ | |
31 | |
32 #include "unicode/utypes.h" | |
33 | |
34 #if U_SHOW_CPLUSPLUS_API | |
35 | |
36 #if UCONFIG_NO_BREAK_ITERATION | |
37 | |
38 U_NAMESPACE_BEGIN | |
39 | |
40 /* | |
41 * Allow the declaration of APIs with pointers to BreakIterator | |
42 * even when break iteration is removed from the build. | |
43 */ | |
44 class BreakIterator; | |
45 | |
46 U_NAMESPACE_END | |
47 | |
48 #else | |
49 | |
50 #include "unicode/uobject.h" | |
51 #include "unicode/unistr.h" | |
52 #include "unicode/chariter.h" | |
53 #include "unicode/locid.h" | |
54 #include "unicode/ubrk.h" | |
55 #include "unicode/strenum.h" | |
56 #include "unicode/utext.h" | |
57 #include "unicode/umisc.h" | |
58 | |
59 U_NAMESPACE_BEGIN | |
60 | |
61 /** | |
62 * The BreakIterator class implements methods for finding the location | |
63 * of boundaries in text. BreakIterator is an abstract base class. | |
64 * Instances of BreakIterator maintain a current position and scan over | |
65 * text returning the index of characters where boundaries occur. | |
66 * <p> | |
67 * Line boundary analysis determines where a text string can be broken | |
68 * when line-wrapping. The mechanism correctly handles punctuation and | |
69 * hyphenated words. | |
70 * <p> | |
71 * Sentence boundary analysis allows selection with correct | |
72 * interpretation of periods within numbers and abbreviations, and | |
73 * trailing punctuation marks such as quotation marks and parentheses. | |
74 * <p> | |
75 * Word boundary analysis is used by search and replace functions, as | |
76 * well as within text editing applications that allow the user to | |
77 * select words with a double click. Word selection provides correct | |
78 * interpretation of punctuation marks within and following | |
79 * words. Characters that are not part of a word, such as symbols or | |
80 * punctuation marks, have word-breaks on both sides. | |
81 * <p> | |
82 * Character boundary analysis allows users to interact with | |
83 * characters as they expect to, for example, when moving the cursor | |
84 * through a text string. Character boundary analysis provides correct | |
85 * navigation of through character strings, regardless of how the | |
86 * character is stored. For example, an accented character might be | |
87 * stored as a base character and a diacritical mark. What users | |
88 * consider to be a character can differ between languages. | |
89 * <p> | |
90 * The text boundary positions are found according to the rules | |
91 * described in Unicode Standard Annex #29, Text Boundaries, and | |
92 * Unicode Standard Annex #14, Line Breaking Properties. These | |
93 * are available at http://www.unicode.org/reports/tr14/ and | |
94 * http://www.unicode.org/reports/tr29/. | |
95 * <p> | |
96 * In addition to the C++ API defined in this header file, a | |
97 * plain C API with equivalent functionality is defined in the | |
98 * file ubrk.h | |
99 * <p> | |
100 * Code snippets illustrating the use of the Break Iterator APIs | |
101 * are available in the ICU User Guide, | |
102 * http://icu-project.org/userguide/boundaryAnalysis.html | |
103 * and in the sample program icu/source/samples/break/break.cpp | |
104 * | |
105 */ | |
106 class U_COMMON_API BreakIterator : public UObject { | |
107 public: | |
108 /** | |
109 * destructor | |
110 * @stable ICU 2.0 | |
111 */ | |
112 virtual ~BreakIterator(); | |
113 | |
114 /** | |
115 * Return true if another object is semantically equal to this | |
116 * one. The other object should be an instance of the same subclass of | |
117 * BreakIterator. Objects of different subclasses are considered | |
118 * unequal. | |
119 * <P> | |
120 * Return true if this BreakIterator is at the same position in the | |
121 * same text, and is the same class and type (word, line, etc.) of | |
122 * BreakIterator, as the argument. Text is considered the same if | |
123 * it contains the same characters, it need not be the same | |
124 * object, and styles are not considered. | |
125 * @stable ICU 2.0 | |
126 */ | |
127 virtual UBool operator==(const BreakIterator&) const = 0; | |
128 | |
129 /** | |
130 * Returns the complement of the result of operator== | |
131 * @param rhs The BreakIterator to be compared for inequality | |
132 * @return the complement of the result of operator== | |
133 * @stable ICU 2.0 | |
134 */ | |
135 UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); } | |
136 | |
137 /** | |
138 * Return a polymorphic copy of this object. This is an abstract | |
139 * method which subclasses implement. | |
140 * @stable ICU 2.0 | |
141 */ | |
142 virtual BreakIterator* clone() const = 0; | |
143 | |
144 /** | |
145 * Return a polymorphic class ID for this object. Different subclasses | |
146 * will return distinct unequal values. | |
147 * @stable ICU 2.0 | |
148 */ | |
149 virtual UClassID getDynamicClassID(void) const = 0; | |
150 | |
151 /** | |
152 * Return a CharacterIterator over the text being analyzed. | |
153 * @stable ICU 2.0 | |
154 */ | |
155 virtual CharacterIterator& getText(void) const = 0; | |
156 | |
157 | |
158 /** | |
159 * Get a UText for the text being analyzed. | |
160 * The returned UText is a shallow clone of the UText used internally | |
161 * by the break iterator implementation. It can safely be used to | |
162 * access the text without impacting any break iterator operations, | |
163 * but the underlying text itself must not be altered. | |
164 * | |
165 * @param fillIn A UText to be filled in. If NULL, a new UText will be | |
166 * allocated to hold the result. | |
167 * @param status receives any error codes. | |
168 * @return The current UText for this break iterator. If an input | |
169 * UText was provided, it will always be returned. | |
170 * @stable ICU 3.4 | |
171 */ | |
172 virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0; | |
173 | |
174 /** | |
175 * Change the text over which this operates. The text boundary is | |
176 * reset to the start. | |
177 * | |
178 * The BreakIterator will retain a reference to the supplied string. | |
179 * The caller must not modify or delete the text while the BreakIterator | |
180 * retains the reference. | |
181 * | |
182 * @param text The UnicodeString used to change the text. | |
183 * @stable ICU 2.0 | |
184 */ | |
185 virtual void setText(const UnicodeString &text) = 0; | |
186 | |
187 /** | |
188 * Reset the break iterator to operate over the text represented by | |
189 * the UText. The iterator position is reset to the start. | |
190 * | |
191 * This function makes a shallow clone of the supplied UText. This means | |
192 * that the caller is free to immediately close or otherwise reuse the | |
193 * Utext that was passed as a parameter, but that the underlying text itself | |
194 * must not be altered while being referenced by the break iterator. | |
195 * | |
196 * All index positions returned by break iterator functions are | |
197 * native indices from the UText. For example, when breaking UTF-8 | |
198 * encoded text, the break positions returned by next(), previous(), etc. | |
199 * will be UTF-8 string indices, not UTF-16 positions. | |
200 * | |
201 * @param text The UText used to change the text. | |
202 * @param status receives any error codes. | |
203 * @stable ICU 3.4 | |
204 */ | |
205 virtual void setText(UText *text, UErrorCode &status) = 0; | |
206 | |
207 /** | |
208 * Change the text over which this operates. The text boundary is | |
209 * reset to the start. | |
210 * Note that setText(UText *) provides similar functionality to this function, | |
211 * and is more efficient. | |
212 * @param it The CharacterIterator used to change the text. | |
213 * @stable ICU 2.0 | |
214 */ | |
215 virtual void adoptText(CharacterIterator* it) = 0; | |
216 | |
217 enum { | |
218 /** | |
219 * DONE is returned by previous() and next() after all valid | |
220 * boundaries have been returned. | |
221 * @stable ICU 2.0 | |
222 */ | |
223 DONE = (int32_t)-1 | |
224 }; | |
225 | |
226 /** | |
227 * Sets the current iteration position to the beginning of the text, position zero. | |
228 * @return The offset of the beginning of the text, zero. | |
229 * @stable ICU 2.0 | |
230 */ | |
231 virtual int32_t first(void) = 0; | |
232 | |
233 /** | |
234 * Set the iterator position to the index immediately BEYOND the last character in the text being scanned. | |
235 * @return The index immediately BEYOND the last character in the text being scanned. | |
236 * @stable ICU 2.0 | |
237 */ | |
238 virtual int32_t last(void) = 0; | |
239 | |
240 /** | |
241 * Set the iterator position to the boundary preceding the current boundary. | |
242 * @return The character index of the previous text boundary or DONE if all | |
243 * boundaries have been returned. | |
244 * @stable ICU 2.0 | |
245 */ | |
246 virtual int32_t previous(void) = 0; | |
247 | |
248 /** | |
249 * Advance the iterator to the boundary following the current boundary. | |
250 * @return The character index of the next text boundary or DONE if all | |
251 * boundaries have been returned. | |
252 * @stable ICU 2.0 | |
253 */ | |
254 virtual int32_t next(void) = 0; | |
255 | |
256 /** | |
257 * Return character index of the current iterator position within the text. | |
258 * @return The boundary most recently returned. | |
259 * @stable ICU 2.0 | |
260 */ | |
261 virtual int32_t current(void) const = 0; | |
262 | |
263 /** | |
264 * Advance the iterator to the first boundary following the specified offset. | |
265 * The value returned is always greater than the offset or | |
266 * the value BreakIterator.DONE | |
267 * @param offset the offset to begin scanning. | |
268 * @return The first boundary after the specified offset. | |
269 * @stable ICU 2.0 | |
270 */ | |
271 virtual int32_t following(int32_t offset) = 0; | |
272 | |
273 /** | |
274 * Set the iterator position to the first boundary preceding the specified offset. | |
275 * The value returned is always smaller than the offset or | |
276 * the value BreakIterator.DONE | |
277 * @param offset the offset to begin scanning. | |
278 * @return The first boundary before the specified offset. | |
279 * @stable ICU 2.0 | |
280 */ | |
281 virtual int32_t preceding(int32_t offset) = 0; | |
282 | |
283 /** | |
284 * Return true if the specified position is a boundary position. | |
285 * As a side effect, the current position of the iterator is set | |
286 * to the first boundary position at or following the specified offset. | |
287 * @param offset the offset to check. | |
288 * @return True if "offset" is a boundary position. | |
289 * @stable ICU 2.0 | |
290 */ | |
291 virtual UBool isBoundary(int32_t offset) = 0; | |
292 | |
293 /** | |
294 * Set the iterator position to the nth boundary from the current boundary | |
295 * @param n the number of boundaries to move by. A value of 0 | |
296 * does nothing. Negative values move to previous boundaries | |
297 * and positive values move to later boundaries. | |
298 * @return The new iterator position, or | |
299 * DONE if there are fewer than |n| boundaries in the specified direction. | |
300 * @stable ICU 2.0 | |
301 */ | |
302 virtual int32_t next(int32_t n) = 0; | |
303 | |
304 /** | |
305 * For RuleBasedBreakIterators, return the status tag from the break rule | |
306 * that determined the boundary at the current iteration position. | |
307 * <p> | |
308 * For break iterator types that do not support a rule status, | |
309 * a default value of 0 is returned. | |
310 * <p> | |
311 * @return the status from the break rule that determined the boundary at | |
312 * the current iteration position. | |
313 * @see RuleBaseBreakIterator::getRuleStatus() | |
314 * @see UWordBreak | |
315 * @stable ICU 52 | |
316 */ | |
317 virtual int32_t getRuleStatus() const; | |
318 | |
319 /** | |
320 * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) | |
321 * that determined the boundary at the current iteration position. | |
322 * <p> | |
323 * For break iterator types that do not support rule status, | |
324 * no values are returned. | |
325 * <p> | |
326 * The returned status value(s) are stored into an array provided by the caller. | |
327 * The values are stored in sorted (ascending) order. | |
328 * If the capacity of the output array is insufficient to hold the data, | |
329 * the output will be truncated to the available length, and a | |
330 * U_BUFFER_OVERFLOW_ERROR will be signaled. | |
331 * <p> | |
332 * @see RuleBaseBreakIterator::getRuleStatusVec | |
333 * | |
334 * @param fillInVec an array to be filled in with the status values. | |
335 * @param capacity the length of the supplied vector. A length of zero causes | |
336 * the function to return the number of status values, in the | |
337 * normal way, without attempting to store any values. | |
338 * @param status receives error codes. | |
339 * @return The number of rule status values from rules that determined | |
340 * the boundary at the current iteration position. | |
341 * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value | |
342 * is the total number of status values that were available, | |
343 * not the reduced number that were actually returned. | |
344 * @see getRuleStatus | |
345 * @stable ICU 52 | |
346 */ | |
347 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); | |
348 | |
349 /** | |
350 * Create BreakIterator for word-breaks using the given locale. | |
351 * Returns an instance of a BreakIterator implementing word breaks. | |
352 * WordBreak is useful for word selection (ex. double click) | |
353 * @param where the locale. | |
354 * @param status the error code | |
355 * @return A BreakIterator for word-breaks. The UErrorCode& status | |
356 * parameter is used to return status information to the user. | |
357 * To check whether the construction succeeded or not, you should check | |
358 * the value of U_SUCCESS(err). If you wish more detailed information, you | |
359 * can check for informational error results which still indicate success. | |
360 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For | |
361 * example, 'de_CH' was requested, but nothing was found there, so 'de' was | |
362 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was | |
363 * used; neither the requested locale nor any of its fall back locales | |
364 * could be found. | |
365 * The caller owns the returned object and is responsible for deleting it. | |
366 * @stable ICU 2.0 | |
367 */ | |
368 static BreakIterator* U_EXPORT2 | |
369 createWordInstance(const Locale& where, UErrorCode& status); | |
370 | |
371 /** | |
372 * Create BreakIterator for line-breaks using specified locale. | |
373 * Returns an instance of a BreakIterator implementing line breaks. Line | |
374 * breaks are logically possible line breaks, actual line breaks are | |
375 * usually determined based on display width. | |
376 * LineBreak is useful for word wrapping text. | |
377 * @param where the locale. | |
378 * @param status The error code. | |
379 * @return A BreakIterator for line-breaks. The UErrorCode& status | |
380 * parameter is used to return status information to the user. | |
381 * To check whether the construction succeeded or not, you should check | |
382 * the value of U_SUCCESS(err). If you wish more detailed information, you | |
383 * can check for informational error results which still indicate success. | |
384 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For | |
385 * example, 'de_CH' was requested, but nothing was found there, so 'de' was | |
386 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was | |
387 * used; neither the requested locale nor any of its fall back locales | |
388 * could be found. | |
389 * The caller owns the returned object and is responsible for deleting it. | |
390 * @stable ICU 2.0 | |
391 */ | |
392 static BreakIterator* U_EXPORT2 | |
393 createLineInstance(const Locale& where, UErrorCode& status); | |
394 | |
395 /** | |
396 * Create BreakIterator for character-breaks using specified locale | |
397 * Returns an instance of a BreakIterator implementing character breaks. | |
398 * Character breaks are boundaries of combining character sequences. | |
399 * @param where the locale. | |
400 * @param status The error code. | |
401 * @return A BreakIterator for character-breaks. The UErrorCode& status | |
402 * parameter is used to return status information to the user. | |
403 * To check whether the construction succeeded or not, you should check | |
404 * the value of U_SUCCESS(err). If you wish more detailed information, you | |
405 * can check for informational error results which still indicate success. | |
406 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For | |
407 * example, 'de_CH' was requested, but nothing was found there, so 'de' was | |
408 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was | |
409 * used; neither the requested locale nor any of its fall back locales | |
410 * could be found. | |
411 * The caller owns the returned object and is responsible for deleting it. | |
412 * @stable ICU 2.0 | |
413 */ | |
414 static BreakIterator* U_EXPORT2 | |
415 createCharacterInstance(const Locale& where, UErrorCode& status); | |
416 | |
417 /** | |
418 * Create BreakIterator for sentence-breaks using specified locale | |
419 * Returns an instance of a BreakIterator implementing sentence breaks. | |
420 * @param where the locale. | |
421 * @param status The error code. | |
422 * @return A BreakIterator for sentence-breaks. The UErrorCode& status | |
423 * parameter is used to return status information to the user. | |
424 * To check whether the construction succeeded or not, you should check | |
425 * the value of U_SUCCESS(err). If you wish more detailed information, you | |
426 * can check for informational error results which still indicate success. | |
427 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For | |
428 * example, 'de_CH' was requested, but nothing was found there, so 'de' was | |
429 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was | |
430 * used; neither the requested locale nor any of its fall back locales | |
431 * could be found. | |
432 * The caller owns the returned object and is responsible for deleting it. | |
433 * @stable ICU 2.0 | |
434 */ | |
435 static BreakIterator* U_EXPORT2 | |
436 createSentenceInstance(const Locale& where, UErrorCode& status); | |
437 | |
438 #ifndef U_HIDE_DEPRECATED_API | |
439 /** | |
440 * Create BreakIterator for title-casing breaks using the specified locale | |
441 * Returns an instance of a BreakIterator implementing title breaks. | |
442 * The iterator returned locates title boundaries as described for | |
443 * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, | |
444 * please use a word boundary iterator. See {@link #createWordInstance }. | |
445 * | |
446 * @param where the locale. | |
447 * @param status The error code. | |
448 * @return A BreakIterator for title-breaks. The UErrorCode& status | |
449 * parameter is used to return status information to the user. | |
450 * To check whether the construction succeeded or not, you should check | |
451 * the value of U_SUCCESS(err). If you wish more detailed information, you | |
452 * can check for informational error results which still indicate success. | |
453 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For | |
454 * example, 'de_CH' was requested, but nothing was found there, so 'de' was | |
455 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was | |
456 * used; neither the requested locale nor any of its fall back locales | |
457 * could be found. | |
458 * The caller owns the returned object and is responsible for deleting it. | |
459 * @deprecated ICU 64 Use createWordInstance instead. | |
460 */ | |
461 static BreakIterator* U_EXPORT2 | |
462 createTitleInstance(const Locale& where, UErrorCode& status); | |
463 #endif /* U_HIDE_DEPRECATED_API */ | |
464 | |
465 /** | |
466 * Get the set of Locales for which TextBoundaries are installed. | |
467 * <p><b>Note:</b> this will not return locales added through the register | |
468 * call. To see the registered locales too, use the getAvailableLocales | |
469 * function that returns a StringEnumeration object </p> | |
470 * @param count the output parameter of number of elements in the locale list | |
471 * @return available locales | |
472 * @stable ICU 2.0 | |
473 */ | |
474 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); | |
475 | |
476 /** | |
477 * Get name of the object for the desired Locale, in the desired language. | |
478 * @param objectLocale must be from getAvailableLocales. | |
479 * @param displayLocale specifies the desired locale for output. | |
480 * @param name the fill-in parameter of the return value | |
481 * Uses best match. | |
482 * @return user-displayable name | |
483 * @stable ICU 2.0 | |
484 */ | |
485 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, | |
486 const Locale& displayLocale, | |
487 UnicodeString& name); | |
488 | |
489 /** | |
490 * Get name of the object for the desired Locale, in the language of the | |
491 * default locale. | |
492 * @param objectLocale must be from getMatchingLocales | |
493 * @param name the fill-in parameter of the return value | |
494 * @return user-displayable name | |
495 * @stable ICU 2.0 | |
496 */ | |
497 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, | |
498 UnicodeString& name); | |
499 | |
500 #ifndef U_FORCE_HIDE_DEPRECATED_API | |
501 /** | |
502 * Deprecated functionality. Use clone() instead. | |
503 * | |
504 * Thread safe client-buffer-based cloning operation | |
505 * Do NOT call delete on a safeclone, since 'new' is not used to create it. | |
506 * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. | |
507 * If buffer is not large enough, new memory will be allocated. | |
508 * @param BufferSize reference to size of allocated space. | |
509 * If BufferSize == 0, a sufficient size for use in cloning will | |
510 * be returned ('pre-flighting') | |
511 * If BufferSize is not enough for a stack-based safe clone, | |
512 * new memory will be allocated. | |
513 * @param status to indicate whether the operation went on smoothly or there were errors | |
514 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were | |
515 * necessary. | |
516 * @return pointer to the new clone | |
517 * | |
518 * @deprecated ICU 52. Use clone() instead. | |
519 */ | |
520 virtual BreakIterator * createBufferClone(void *stackBuffer, | |
521 int32_t &BufferSize, | |
522 UErrorCode &status) = 0; | |
523 #endif // U_FORCE_HIDE_DEPRECATED_API | |
524 | |
525 #ifndef U_HIDE_DEPRECATED_API | |
526 | |
527 /** | |
528 * Determine whether the BreakIterator was created in user memory by | |
529 * createBufferClone(), and thus should not be deleted. Such objects | |
530 * must be closed by an explicit call to the destructor (not delete). | |
531 * @deprecated ICU 52. Always delete the BreakIterator. | |
532 */ | |
533 inline UBool isBufferClone(void); | |
534 | |
535 #endif /* U_HIDE_DEPRECATED_API */ | |
536 | |
537 #if !UCONFIG_NO_SERVICE | |
538 /** | |
539 * Register a new break iterator of the indicated kind, to use in the given locale. | |
540 * The break iterator will be adopted. Clones of the iterator will be returned | |
541 * if a request for a break iterator of the given kind matches or falls back to | |
542 * this locale. | |
543 * Because ICU may choose to cache BreakIterators internally, this must | |
544 * be called at application startup, prior to any calls to | |
545 * BreakIterator::createXXXInstance to avoid undefined behavior. | |
546 * @param toAdopt the BreakIterator instance to be adopted | |
547 * @param locale the Locale for which this instance is to be registered | |
548 * @param kind the type of iterator for which this instance is to be registered | |
549 * @param status the in/out status code, no special meanings are assigned | |
550 * @return a registry key that can be used to unregister this instance | |
551 * @stable ICU 2.4 | |
552 */ | |
553 static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt, | |
554 const Locale& locale, | |
555 UBreakIteratorType kind, | |
556 UErrorCode& status); | |
557 | |
558 /** | |
559 * Unregister a previously-registered BreakIterator using the key returned from the | |
560 * register call. Key becomes invalid after a successful call and should not be used again. | |
561 * The BreakIterator corresponding to the key will be deleted. | |
562 * Because ICU may choose to cache BreakIterators internally, this should | |
563 * be called during application shutdown, after all calls to | |
564 * BreakIterator::createXXXInstance to avoid undefined behavior. | |
565 * @param key the registry key returned by a previous call to registerInstance | |
566 * @param status the in/out status code, no special meanings are assigned | |
567 * @return TRUE if the iterator for the key was successfully unregistered | |
568 * @stable ICU 2.4 | |
569 */ | |
570 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); | |
571 | |
572 /** | |
573 * Return a StringEnumeration over the locales available at the time of the call, | |
574 * including registered locales. | |
575 * @return a StringEnumeration over the locales available at the time of the call | |
576 * @stable ICU 2.4 | |
577 */ | |
578 static StringEnumeration* U_EXPORT2 getAvailableLocales(void); | |
579 #endif | |
580 | |
581 /** | |
582 * Returns the locale for this break iterator. Two flavors are available: valid and | |
583 * actual locale. | |
584 * @stable ICU 2.8 | |
585 */ | |
586 Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; | |
587 | |
588 #ifndef U_HIDE_INTERNAL_API | |
589 /** Get the locale for this break iterator object. You can choose between valid and actual locale. | |
590 * @param type type of the locale we're looking for (valid or actual) | |
591 * @param status error code for the operation | |
592 * @return the locale | |
593 * @internal | |
594 */ | |
595 const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; | |
596 #endif /* U_HIDE_INTERNAL_API */ | |
597 | |
598 /** | |
599 * Set the subject text string upon which the break iterator is operating | |
600 * without changing any other aspect of the matching state. | |
601 * The new and previous text strings must have the same content. | |
602 * | |
603 * This function is intended for use in environments where ICU is operating on | |
604 * strings that may move around in memory. It provides a mechanism for notifying | |
605 * ICU that the string has been relocated, and providing a new UText to access the | |
606 * string in its new position. | |
607 * | |
608 * Note that the break iterator implementation never copies the underlying text | |
609 * of a string being processed, but always operates directly on the original text | |
610 * provided by the user. Refreshing simply drops the references to the old text | |
611 * and replaces them with references to the new. | |
612 * | |
613 * Caution: this function is normally used only by very specialized, | |
614 * system-level code. One example use case is with garbage collection that moves | |
615 * the text in memory. | |
616 * | |
617 * @param input The new (moved) text string. | |
618 * @param status Receives errors detected by this function. | |
619 * @return *this | |
620 * | |
621 * @stable ICU 49 | |
622 */ | |
623 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; | |
624 | |
625 private: | |
626 static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status); | |
627 static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); | |
628 static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); | |
629 | |
630 friend class ICUBreakIteratorFactory; | |
631 friend class ICUBreakIteratorService; | |
632 | |
633 protected: | |
634 // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API | |
635 // or else the compiler will create a public ones. | |
636 /** @internal */ | |
637 BreakIterator(); | |
638 /** @internal */ | |
639 BreakIterator (const BreakIterator &other); | |
640 #ifndef U_HIDE_INTERNAL_API | |
641 /** @internal */ | |
642 BreakIterator (const Locale& valid, const Locale &actual); | |
643 /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */ | |
644 BreakIterator &operator = (const BreakIterator &other); | |
645 #endif /* U_HIDE_INTERNAL_API */ | |
646 | |
647 private: | |
648 | |
649 /** @internal (private) */ | |
650 char actualLocale[ULOC_FULLNAME_CAPACITY]; | |
651 char validLocale[ULOC_FULLNAME_CAPACITY]; | |
652 }; | |
653 | |
654 #ifndef U_HIDE_DEPRECATED_API | |
655 | |
656 inline UBool BreakIterator::isBufferClone() | |
657 { | |
658 return FALSE; | |
659 } | |
660 | |
661 #endif /* U_HIDE_DEPRECATED_API */ | |
662 | |
663 U_NAMESPACE_END | |
664 | |
665 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | |
666 | |
667 #endif /* U_SHOW_CPLUSPLUS_API */ | |
668 | |
669 #endif // BRKITER_H | |
670 //eof |