annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/brkiter.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 ********************************************************************************
jpayne@69 5 * Copyright (C) 1997-2016, International Business Machines
jpayne@69 6 * Corporation and others. All Rights Reserved.
jpayne@69 7 ********************************************************************************
jpayne@69 8 *
jpayne@69 9 * File brkiter.h
jpayne@69 10 *
jpayne@69 11 * Modification History:
jpayne@69 12 *
jpayne@69 13 * Date Name Description
jpayne@69 14 * 02/18/97 aliu Added typedef for TextCount. Made DONE const.
jpayne@69 15 * 05/07/97 aliu Fixed DLL declaration.
jpayne@69 16 * 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
jpayne@69 17 * 08/11/98 helena Sync-up JDK1.2.
jpayne@69 18 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
jpayne@69 19 ********************************************************************************
jpayne@69 20 */
jpayne@69 21
jpayne@69 22 #ifndef BRKITER_H
jpayne@69 23 #define BRKITER_H
jpayne@69 24
jpayne@69 25 #include "unicode/utypes.h"
jpayne@69 26
jpayne@69 27 /**
jpayne@69 28 * \file
jpayne@69 29 * \brief C++ API: Break Iterator.
jpayne@69 30 */
jpayne@69 31
jpayne@69 32 #include "unicode/utypes.h"
jpayne@69 33
jpayne@69 34 #if U_SHOW_CPLUSPLUS_API
jpayne@69 35
jpayne@69 36 #if UCONFIG_NO_BREAK_ITERATION
jpayne@69 37
jpayne@69 38 U_NAMESPACE_BEGIN
jpayne@69 39
jpayne@69 40 /*
jpayne@69 41 * Allow the declaration of APIs with pointers to BreakIterator
jpayne@69 42 * even when break iteration is removed from the build.
jpayne@69 43 */
jpayne@69 44 class BreakIterator;
jpayne@69 45
jpayne@69 46 U_NAMESPACE_END
jpayne@69 47
jpayne@69 48 #else
jpayne@69 49
jpayne@69 50 #include "unicode/uobject.h"
jpayne@69 51 #include "unicode/unistr.h"
jpayne@69 52 #include "unicode/chariter.h"
jpayne@69 53 #include "unicode/locid.h"
jpayne@69 54 #include "unicode/ubrk.h"
jpayne@69 55 #include "unicode/strenum.h"
jpayne@69 56 #include "unicode/utext.h"
jpayne@69 57 #include "unicode/umisc.h"
jpayne@69 58
jpayne@69 59 U_NAMESPACE_BEGIN
jpayne@69 60
jpayne@69 61 /**
jpayne@69 62 * The BreakIterator class implements methods for finding the location
jpayne@69 63 * of boundaries in text. BreakIterator is an abstract base class.
jpayne@69 64 * Instances of BreakIterator maintain a current position and scan over
jpayne@69 65 * text returning the index of characters where boundaries occur.
jpayne@69 66 * <p>
jpayne@69 67 * Line boundary analysis determines where a text string can be broken
jpayne@69 68 * when line-wrapping. The mechanism correctly handles punctuation and
jpayne@69 69 * hyphenated words.
jpayne@69 70 * <p>
jpayne@69 71 * Sentence boundary analysis allows selection with correct
jpayne@69 72 * interpretation of periods within numbers and abbreviations, and
jpayne@69 73 * trailing punctuation marks such as quotation marks and parentheses.
jpayne@69 74 * <p>
jpayne@69 75 * Word boundary analysis is used by search and replace functions, as
jpayne@69 76 * well as within text editing applications that allow the user to
jpayne@69 77 * select words with a double click. Word selection provides correct
jpayne@69 78 * interpretation of punctuation marks within and following
jpayne@69 79 * words. Characters that are not part of a word, such as symbols or
jpayne@69 80 * punctuation marks, have word-breaks on both sides.
jpayne@69 81 * <p>
jpayne@69 82 * Character boundary analysis allows users to interact with
jpayne@69 83 * characters as they expect to, for example, when moving the cursor
jpayne@69 84 * through a text string. Character boundary analysis provides correct
jpayne@69 85 * navigation of through character strings, regardless of how the
jpayne@69 86 * character is stored. For example, an accented character might be
jpayne@69 87 * stored as a base character and a diacritical mark. What users
jpayne@69 88 * consider to be a character can differ between languages.
jpayne@69 89 * <p>
jpayne@69 90 * The text boundary positions are found according to the rules
jpayne@69 91 * described in Unicode Standard Annex #29, Text Boundaries, and
jpayne@69 92 * Unicode Standard Annex #14, Line Breaking Properties. These
jpayne@69 93 * are available at http://www.unicode.org/reports/tr14/ and
jpayne@69 94 * http://www.unicode.org/reports/tr29/.
jpayne@69 95 * <p>
jpayne@69 96 * In addition to the C++ API defined in this header file, a
jpayne@69 97 * plain C API with equivalent functionality is defined in the
jpayne@69 98 * file ubrk.h
jpayne@69 99 * <p>
jpayne@69 100 * Code snippets illustrating the use of the Break Iterator APIs
jpayne@69 101 * are available in the ICU User Guide,
jpayne@69 102 * http://icu-project.org/userguide/boundaryAnalysis.html
jpayne@69 103 * and in the sample program icu/source/samples/break/break.cpp
jpayne@69 104 *
jpayne@69 105 */
jpayne@69 106 class U_COMMON_API BreakIterator : public UObject {
jpayne@69 107 public:
jpayne@69 108 /**
jpayne@69 109 * destructor
jpayne@69 110 * @stable ICU 2.0
jpayne@69 111 */
jpayne@69 112 virtual ~BreakIterator();
jpayne@69 113
jpayne@69 114 /**
jpayne@69 115 * Return true if another object is semantically equal to this
jpayne@69 116 * one. The other object should be an instance of the same subclass of
jpayne@69 117 * BreakIterator. Objects of different subclasses are considered
jpayne@69 118 * unequal.
jpayne@69 119 * <P>
jpayne@69 120 * Return true if this BreakIterator is at the same position in the
jpayne@69 121 * same text, and is the same class and type (word, line, etc.) of
jpayne@69 122 * BreakIterator, as the argument. Text is considered the same if
jpayne@69 123 * it contains the same characters, it need not be the same
jpayne@69 124 * object, and styles are not considered.
jpayne@69 125 * @stable ICU 2.0
jpayne@69 126 */
jpayne@69 127 virtual UBool operator==(const BreakIterator&) const = 0;
jpayne@69 128
jpayne@69 129 /**
jpayne@69 130 * Returns the complement of the result of operator==
jpayne@69 131 * @param rhs The BreakIterator to be compared for inequality
jpayne@69 132 * @return the complement of the result of operator==
jpayne@69 133 * @stable ICU 2.0
jpayne@69 134 */
jpayne@69 135 UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
jpayne@69 136
jpayne@69 137 /**
jpayne@69 138 * Return a polymorphic copy of this object. This is an abstract
jpayne@69 139 * method which subclasses implement.
jpayne@69 140 * @stable ICU 2.0
jpayne@69 141 */
jpayne@69 142 virtual BreakIterator* clone() const = 0;
jpayne@69 143
jpayne@69 144 /**
jpayne@69 145 * Return a polymorphic class ID for this object. Different subclasses
jpayne@69 146 * will return distinct unequal values.
jpayne@69 147 * @stable ICU 2.0
jpayne@69 148 */
jpayne@69 149 virtual UClassID getDynamicClassID(void) const = 0;
jpayne@69 150
jpayne@69 151 /**
jpayne@69 152 * Return a CharacterIterator over the text being analyzed.
jpayne@69 153 * @stable ICU 2.0
jpayne@69 154 */
jpayne@69 155 virtual CharacterIterator& getText(void) const = 0;
jpayne@69 156
jpayne@69 157
jpayne@69 158 /**
jpayne@69 159 * Get a UText for the text being analyzed.
jpayne@69 160 * The returned UText is a shallow clone of the UText used internally
jpayne@69 161 * by the break iterator implementation. It can safely be used to
jpayne@69 162 * access the text without impacting any break iterator operations,
jpayne@69 163 * but the underlying text itself must not be altered.
jpayne@69 164 *
jpayne@69 165 * @param fillIn A UText to be filled in. If NULL, a new UText will be
jpayne@69 166 * allocated to hold the result.
jpayne@69 167 * @param status receives any error codes.
jpayne@69 168 * @return The current UText for this break iterator. If an input
jpayne@69 169 * UText was provided, it will always be returned.
jpayne@69 170 * @stable ICU 3.4
jpayne@69 171 */
jpayne@69 172 virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
jpayne@69 173
jpayne@69 174 /**
jpayne@69 175 * Change the text over which this operates. The text boundary is
jpayne@69 176 * reset to the start.
jpayne@69 177 *
jpayne@69 178 * The BreakIterator will retain a reference to the supplied string.
jpayne@69 179 * The caller must not modify or delete the text while the BreakIterator
jpayne@69 180 * retains the reference.
jpayne@69 181 *
jpayne@69 182 * @param text The UnicodeString used to change the text.
jpayne@69 183 * @stable ICU 2.0
jpayne@69 184 */
jpayne@69 185 virtual void setText(const UnicodeString &text) = 0;
jpayne@69 186
jpayne@69 187 /**
jpayne@69 188 * Reset the break iterator to operate over the text represented by
jpayne@69 189 * the UText. The iterator position is reset to the start.
jpayne@69 190 *
jpayne@69 191 * This function makes a shallow clone of the supplied UText. This means
jpayne@69 192 * that the caller is free to immediately close or otherwise reuse the
jpayne@69 193 * Utext that was passed as a parameter, but that the underlying text itself
jpayne@69 194 * must not be altered while being referenced by the break iterator.
jpayne@69 195 *
jpayne@69 196 * All index positions returned by break iterator functions are
jpayne@69 197 * native indices from the UText. For example, when breaking UTF-8
jpayne@69 198 * encoded text, the break positions returned by next(), previous(), etc.
jpayne@69 199 * will be UTF-8 string indices, not UTF-16 positions.
jpayne@69 200 *
jpayne@69 201 * @param text The UText used to change the text.
jpayne@69 202 * @param status receives any error codes.
jpayne@69 203 * @stable ICU 3.4
jpayne@69 204 */
jpayne@69 205 virtual void setText(UText *text, UErrorCode &status) = 0;
jpayne@69 206
jpayne@69 207 /**
jpayne@69 208 * Change the text over which this operates. The text boundary is
jpayne@69 209 * reset to the start.
jpayne@69 210 * Note that setText(UText *) provides similar functionality to this function,
jpayne@69 211 * and is more efficient.
jpayne@69 212 * @param it The CharacterIterator used to change the text.
jpayne@69 213 * @stable ICU 2.0
jpayne@69 214 */
jpayne@69 215 virtual void adoptText(CharacterIterator* it) = 0;
jpayne@69 216
jpayne@69 217 enum {
jpayne@69 218 /**
jpayne@69 219 * DONE is returned by previous() and next() after all valid
jpayne@69 220 * boundaries have been returned.
jpayne@69 221 * @stable ICU 2.0
jpayne@69 222 */
jpayne@69 223 DONE = (int32_t)-1
jpayne@69 224 };
jpayne@69 225
jpayne@69 226 /**
jpayne@69 227 * Sets the current iteration position to the beginning of the text, position zero.
jpayne@69 228 * @return The offset of the beginning of the text, zero.
jpayne@69 229 * @stable ICU 2.0
jpayne@69 230 */
jpayne@69 231 virtual int32_t first(void) = 0;
jpayne@69 232
jpayne@69 233 /**
jpayne@69 234 * Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
jpayne@69 235 * @return The index immediately BEYOND the last character in the text being scanned.
jpayne@69 236 * @stable ICU 2.0
jpayne@69 237 */
jpayne@69 238 virtual int32_t last(void) = 0;
jpayne@69 239
jpayne@69 240 /**
jpayne@69 241 * Set the iterator position to the boundary preceding the current boundary.
jpayne@69 242 * @return The character index of the previous text boundary or DONE if all
jpayne@69 243 * boundaries have been returned.
jpayne@69 244 * @stable ICU 2.0
jpayne@69 245 */
jpayne@69 246 virtual int32_t previous(void) = 0;
jpayne@69 247
jpayne@69 248 /**
jpayne@69 249 * Advance the iterator to the boundary following the current boundary.
jpayne@69 250 * @return The character index of the next text boundary or DONE if all
jpayne@69 251 * boundaries have been returned.
jpayne@69 252 * @stable ICU 2.0
jpayne@69 253 */
jpayne@69 254 virtual int32_t next(void) = 0;
jpayne@69 255
jpayne@69 256 /**
jpayne@69 257 * Return character index of the current iterator position within the text.
jpayne@69 258 * @return The boundary most recently returned.
jpayne@69 259 * @stable ICU 2.0
jpayne@69 260 */
jpayne@69 261 virtual int32_t current(void) const = 0;
jpayne@69 262
jpayne@69 263 /**
jpayne@69 264 * Advance the iterator to the first boundary following the specified offset.
jpayne@69 265 * The value returned is always greater than the offset or
jpayne@69 266 * the value BreakIterator.DONE
jpayne@69 267 * @param offset the offset to begin scanning.
jpayne@69 268 * @return The first boundary after the specified offset.
jpayne@69 269 * @stable ICU 2.0
jpayne@69 270 */
jpayne@69 271 virtual int32_t following(int32_t offset) = 0;
jpayne@69 272
jpayne@69 273 /**
jpayne@69 274 * Set the iterator position to the first boundary preceding the specified offset.
jpayne@69 275 * The value returned is always smaller than the offset or
jpayne@69 276 * the value BreakIterator.DONE
jpayne@69 277 * @param offset the offset to begin scanning.
jpayne@69 278 * @return The first boundary before the specified offset.
jpayne@69 279 * @stable ICU 2.0
jpayne@69 280 */
jpayne@69 281 virtual int32_t preceding(int32_t offset) = 0;
jpayne@69 282
jpayne@69 283 /**
jpayne@69 284 * Return true if the specified position is a boundary position.
jpayne@69 285 * As a side effect, the current position of the iterator is set
jpayne@69 286 * to the first boundary position at or following the specified offset.
jpayne@69 287 * @param offset the offset to check.
jpayne@69 288 * @return True if "offset" is a boundary position.
jpayne@69 289 * @stable ICU 2.0
jpayne@69 290 */
jpayne@69 291 virtual UBool isBoundary(int32_t offset) = 0;
jpayne@69 292
jpayne@69 293 /**
jpayne@69 294 * Set the iterator position to the nth boundary from the current boundary
jpayne@69 295 * @param n the number of boundaries to move by. A value of 0
jpayne@69 296 * does nothing. Negative values move to previous boundaries
jpayne@69 297 * and positive values move to later boundaries.
jpayne@69 298 * @return The new iterator position, or
jpayne@69 299 * DONE if there are fewer than |n| boundaries in the specified direction.
jpayne@69 300 * @stable ICU 2.0
jpayne@69 301 */
jpayne@69 302 virtual int32_t next(int32_t n) = 0;
jpayne@69 303
jpayne@69 304 /**
jpayne@69 305 * For RuleBasedBreakIterators, return the status tag from the break rule
jpayne@69 306 * that determined the boundary at the current iteration position.
jpayne@69 307 * <p>
jpayne@69 308 * For break iterator types that do not support a rule status,
jpayne@69 309 * a default value of 0 is returned.
jpayne@69 310 * <p>
jpayne@69 311 * @return the status from the break rule that determined the boundary at
jpayne@69 312 * the current iteration position.
jpayne@69 313 * @see RuleBaseBreakIterator::getRuleStatus()
jpayne@69 314 * @see UWordBreak
jpayne@69 315 * @stable ICU 52
jpayne@69 316 */
jpayne@69 317 virtual int32_t getRuleStatus() const;
jpayne@69 318
jpayne@69 319 /**
jpayne@69 320 * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
jpayne@69 321 * that determined the boundary at the current iteration position.
jpayne@69 322 * <p>
jpayne@69 323 * For break iterator types that do not support rule status,
jpayne@69 324 * no values are returned.
jpayne@69 325 * <p>
jpayne@69 326 * The returned status value(s) are stored into an array provided by the caller.
jpayne@69 327 * The values are stored in sorted (ascending) order.
jpayne@69 328 * If the capacity of the output array is insufficient to hold the data,
jpayne@69 329 * the output will be truncated to the available length, and a
jpayne@69 330 * U_BUFFER_OVERFLOW_ERROR will be signaled.
jpayne@69 331 * <p>
jpayne@69 332 * @see RuleBaseBreakIterator::getRuleStatusVec
jpayne@69 333 *
jpayne@69 334 * @param fillInVec an array to be filled in with the status values.
jpayne@69 335 * @param capacity the length of the supplied vector. A length of zero causes
jpayne@69 336 * the function to return the number of status values, in the
jpayne@69 337 * normal way, without attempting to store any values.
jpayne@69 338 * @param status receives error codes.
jpayne@69 339 * @return The number of rule status values from rules that determined
jpayne@69 340 * the boundary at the current iteration position.
jpayne@69 341 * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
jpayne@69 342 * is the total number of status values that were available,
jpayne@69 343 * not the reduced number that were actually returned.
jpayne@69 344 * @see getRuleStatus
jpayne@69 345 * @stable ICU 52
jpayne@69 346 */
jpayne@69 347 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
jpayne@69 348
jpayne@69 349 /**
jpayne@69 350 * Create BreakIterator for word-breaks using the given locale.
jpayne@69 351 * Returns an instance of a BreakIterator implementing word breaks.
jpayne@69 352 * WordBreak is useful for word selection (ex. double click)
jpayne@69 353 * @param where the locale.
jpayne@69 354 * @param status the error code
jpayne@69 355 * @return A BreakIterator for word-breaks. The UErrorCode& status
jpayne@69 356 * parameter is used to return status information to the user.
jpayne@69 357 * To check whether the construction succeeded or not, you should check
jpayne@69 358 * the value of U_SUCCESS(err). If you wish more detailed information, you
jpayne@69 359 * can check for informational error results which still indicate success.
jpayne@69 360 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
jpayne@69 361 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
jpayne@69 362 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
jpayne@69 363 * used; neither the requested locale nor any of its fall back locales
jpayne@69 364 * could be found.
jpayne@69 365 * The caller owns the returned object and is responsible for deleting it.
jpayne@69 366 * @stable ICU 2.0
jpayne@69 367 */
jpayne@69 368 static BreakIterator* U_EXPORT2
jpayne@69 369 createWordInstance(const Locale& where, UErrorCode& status);
jpayne@69 370
jpayne@69 371 /**
jpayne@69 372 * Create BreakIterator for line-breaks using specified locale.
jpayne@69 373 * Returns an instance of a BreakIterator implementing line breaks. Line
jpayne@69 374 * breaks are logically possible line breaks, actual line breaks are
jpayne@69 375 * usually determined based on display width.
jpayne@69 376 * LineBreak is useful for word wrapping text.
jpayne@69 377 * @param where the locale.
jpayne@69 378 * @param status The error code.
jpayne@69 379 * @return A BreakIterator for line-breaks. The UErrorCode& status
jpayne@69 380 * parameter is used to return status information to the user.
jpayne@69 381 * To check whether the construction succeeded or not, you should check
jpayne@69 382 * the value of U_SUCCESS(err). If you wish more detailed information, you
jpayne@69 383 * can check for informational error results which still indicate success.
jpayne@69 384 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
jpayne@69 385 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
jpayne@69 386 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
jpayne@69 387 * used; neither the requested locale nor any of its fall back locales
jpayne@69 388 * could be found.
jpayne@69 389 * The caller owns the returned object and is responsible for deleting it.
jpayne@69 390 * @stable ICU 2.0
jpayne@69 391 */
jpayne@69 392 static BreakIterator* U_EXPORT2
jpayne@69 393 createLineInstance(const Locale& where, UErrorCode& status);
jpayne@69 394
jpayne@69 395 /**
jpayne@69 396 * Create BreakIterator for character-breaks using specified locale
jpayne@69 397 * Returns an instance of a BreakIterator implementing character breaks.
jpayne@69 398 * Character breaks are boundaries of combining character sequences.
jpayne@69 399 * @param where the locale.
jpayne@69 400 * @param status The error code.
jpayne@69 401 * @return A BreakIterator for character-breaks. The UErrorCode& status
jpayne@69 402 * parameter is used to return status information to the user.
jpayne@69 403 * To check whether the construction succeeded or not, you should check
jpayne@69 404 * the value of U_SUCCESS(err). If you wish more detailed information, you
jpayne@69 405 * can check for informational error results which still indicate success.
jpayne@69 406 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
jpayne@69 407 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
jpayne@69 408 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
jpayne@69 409 * used; neither the requested locale nor any of its fall back locales
jpayne@69 410 * could be found.
jpayne@69 411 * The caller owns the returned object and is responsible for deleting it.
jpayne@69 412 * @stable ICU 2.0
jpayne@69 413 */
jpayne@69 414 static BreakIterator* U_EXPORT2
jpayne@69 415 createCharacterInstance(const Locale& where, UErrorCode& status);
jpayne@69 416
jpayne@69 417 /**
jpayne@69 418 * Create BreakIterator for sentence-breaks using specified locale
jpayne@69 419 * Returns an instance of a BreakIterator implementing sentence breaks.
jpayne@69 420 * @param where the locale.
jpayne@69 421 * @param status The error code.
jpayne@69 422 * @return A BreakIterator for sentence-breaks. The UErrorCode& status
jpayne@69 423 * parameter is used to return status information to the user.
jpayne@69 424 * To check whether the construction succeeded or not, you should check
jpayne@69 425 * the value of U_SUCCESS(err). If you wish more detailed information, you
jpayne@69 426 * can check for informational error results which still indicate success.
jpayne@69 427 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
jpayne@69 428 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
jpayne@69 429 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
jpayne@69 430 * used; neither the requested locale nor any of its fall back locales
jpayne@69 431 * could be found.
jpayne@69 432 * The caller owns the returned object and is responsible for deleting it.
jpayne@69 433 * @stable ICU 2.0
jpayne@69 434 */
jpayne@69 435 static BreakIterator* U_EXPORT2
jpayne@69 436 createSentenceInstance(const Locale& where, UErrorCode& status);
jpayne@69 437
jpayne@69 438 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 439 /**
jpayne@69 440 * Create BreakIterator for title-casing breaks using the specified locale
jpayne@69 441 * Returns an instance of a BreakIterator implementing title breaks.
jpayne@69 442 * The iterator returned locates title boundaries as described for
jpayne@69 443 * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
jpayne@69 444 * please use a word boundary iterator. See {@link #createWordInstance }.
jpayne@69 445 *
jpayne@69 446 * @param where the locale.
jpayne@69 447 * @param status The error code.
jpayne@69 448 * @return A BreakIterator for title-breaks. The UErrorCode& status
jpayne@69 449 * parameter is used to return status information to the user.
jpayne@69 450 * To check whether the construction succeeded or not, you should check
jpayne@69 451 * the value of U_SUCCESS(err). If you wish more detailed information, you
jpayne@69 452 * can check for informational error results which still indicate success.
jpayne@69 453 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
jpayne@69 454 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
jpayne@69 455 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
jpayne@69 456 * used; neither the requested locale nor any of its fall back locales
jpayne@69 457 * could be found.
jpayne@69 458 * The caller owns the returned object and is responsible for deleting it.
jpayne@69 459 * @deprecated ICU 64 Use createWordInstance instead.
jpayne@69 460 */
jpayne@69 461 static BreakIterator* U_EXPORT2
jpayne@69 462 createTitleInstance(const Locale& where, UErrorCode& status);
jpayne@69 463 #endif /* U_HIDE_DEPRECATED_API */
jpayne@69 464
jpayne@69 465 /**
jpayne@69 466 * Get the set of Locales for which TextBoundaries are installed.
jpayne@69 467 * <p><b>Note:</b> this will not return locales added through the register
jpayne@69 468 * call. To see the registered locales too, use the getAvailableLocales
jpayne@69 469 * function that returns a StringEnumeration object </p>
jpayne@69 470 * @param count the output parameter of number of elements in the locale list
jpayne@69 471 * @return available locales
jpayne@69 472 * @stable ICU 2.0
jpayne@69 473 */
jpayne@69 474 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
jpayne@69 475
jpayne@69 476 /**
jpayne@69 477 * Get name of the object for the desired Locale, in the desired language.
jpayne@69 478 * @param objectLocale must be from getAvailableLocales.
jpayne@69 479 * @param displayLocale specifies the desired locale for output.
jpayne@69 480 * @param name the fill-in parameter of the return value
jpayne@69 481 * Uses best match.
jpayne@69 482 * @return user-displayable name
jpayne@69 483 * @stable ICU 2.0
jpayne@69 484 */
jpayne@69 485 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
jpayne@69 486 const Locale& displayLocale,
jpayne@69 487 UnicodeString& name);
jpayne@69 488
jpayne@69 489 /**
jpayne@69 490 * Get name of the object for the desired Locale, in the language of the
jpayne@69 491 * default locale.
jpayne@69 492 * @param objectLocale must be from getMatchingLocales
jpayne@69 493 * @param name the fill-in parameter of the return value
jpayne@69 494 * @return user-displayable name
jpayne@69 495 * @stable ICU 2.0
jpayne@69 496 */
jpayne@69 497 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
jpayne@69 498 UnicodeString& name);
jpayne@69 499
jpayne@69 500 #ifndef U_FORCE_HIDE_DEPRECATED_API
jpayne@69 501 /**
jpayne@69 502 * Deprecated functionality. Use clone() instead.
jpayne@69 503 *
jpayne@69 504 * Thread safe client-buffer-based cloning operation
jpayne@69 505 * Do NOT call delete on a safeclone, since 'new' is not used to create it.
jpayne@69 506 * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
jpayne@69 507 * If buffer is not large enough, new memory will be allocated.
jpayne@69 508 * @param BufferSize reference to size of allocated space.
jpayne@69 509 * If BufferSize == 0, a sufficient size for use in cloning will
jpayne@69 510 * be returned ('pre-flighting')
jpayne@69 511 * If BufferSize is not enough for a stack-based safe clone,
jpayne@69 512 * new memory will be allocated.
jpayne@69 513 * @param status to indicate whether the operation went on smoothly or there were errors
jpayne@69 514 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
jpayne@69 515 * necessary.
jpayne@69 516 * @return pointer to the new clone
jpayne@69 517 *
jpayne@69 518 * @deprecated ICU 52. Use clone() instead.
jpayne@69 519 */
jpayne@69 520 virtual BreakIterator * createBufferClone(void *stackBuffer,
jpayne@69 521 int32_t &BufferSize,
jpayne@69 522 UErrorCode &status) = 0;
jpayne@69 523 #endif // U_FORCE_HIDE_DEPRECATED_API
jpayne@69 524
jpayne@69 525 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 526
jpayne@69 527 /**
jpayne@69 528 * Determine whether the BreakIterator was created in user memory by
jpayne@69 529 * createBufferClone(), and thus should not be deleted. Such objects
jpayne@69 530 * must be closed by an explicit call to the destructor (not delete).
jpayne@69 531 * @deprecated ICU 52. Always delete the BreakIterator.
jpayne@69 532 */
jpayne@69 533 inline UBool isBufferClone(void);
jpayne@69 534
jpayne@69 535 #endif /* U_HIDE_DEPRECATED_API */
jpayne@69 536
jpayne@69 537 #if !UCONFIG_NO_SERVICE
jpayne@69 538 /**
jpayne@69 539 * Register a new break iterator of the indicated kind, to use in the given locale.
jpayne@69 540 * The break iterator will be adopted. Clones of the iterator will be returned
jpayne@69 541 * if a request for a break iterator of the given kind matches or falls back to
jpayne@69 542 * this locale.
jpayne@69 543 * Because ICU may choose to cache BreakIterators internally, this must
jpayne@69 544 * be called at application startup, prior to any calls to
jpayne@69 545 * BreakIterator::createXXXInstance to avoid undefined behavior.
jpayne@69 546 * @param toAdopt the BreakIterator instance to be adopted
jpayne@69 547 * @param locale the Locale for which this instance is to be registered
jpayne@69 548 * @param kind the type of iterator for which this instance is to be registered
jpayne@69 549 * @param status the in/out status code, no special meanings are assigned
jpayne@69 550 * @return a registry key that can be used to unregister this instance
jpayne@69 551 * @stable ICU 2.4
jpayne@69 552 */
jpayne@69 553 static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
jpayne@69 554 const Locale& locale,
jpayne@69 555 UBreakIteratorType kind,
jpayne@69 556 UErrorCode& status);
jpayne@69 557
jpayne@69 558 /**
jpayne@69 559 * Unregister a previously-registered BreakIterator using the key returned from the
jpayne@69 560 * register call. Key becomes invalid after a successful call and should not be used again.
jpayne@69 561 * The BreakIterator corresponding to the key will be deleted.
jpayne@69 562 * Because ICU may choose to cache BreakIterators internally, this should
jpayne@69 563 * be called during application shutdown, after all calls to
jpayne@69 564 * BreakIterator::createXXXInstance to avoid undefined behavior.
jpayne@69 565 * @param key the registry key returned by a previous call to registerInstance
jpayne@69 566 * @param status the in/out status code, no special meanings are assigned
jpayne@69 567 * @return TRUE if the iterator for the key was successfully unregistered
jpayne@69 568 * @stable ICU 2.4
jpayne@69 569 */
jpayne@69 570 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
jpayne@69 571
jpayne@69 572 /**
jpayne@69 573 * Return a StringEnumeration over the locales available at the time of the call,
jpayne@69 574 * including registered locales.
jpayne@69 575 * @return a StringEnumeration over the locales available at the time of the call
jpayne@69 576 * @stable ICU 2.4
jpayne@69 577 */
jpayne@69 578 static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
jpayne@69 579 #endif
jpayne@69 580
jpayne@69 581 /**
jpayne@69 582 * Returns the locale for this break iterator. Two flavors are available: valid and
jpayne@69 583 * actual locale.
jpayne@69 584 * @stable ICU 2.8
jpayne@69 585 */
jpayne@69 586 Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
jpayne@69 587
jpayne@69 588 #ifndef U_HIDE_INTERNAL_API
jpayne@69 589 /** Get the locale for this break iterator object. You can choose between valid and actual locale.
jpayne@69 590 * @param type type of the locale we're looking for (valid or actual)
jpayne@69 591 * @param status error code for the operation
jpayne@69 592 * @return the locale
jpayne@69 593 * @internal
jpayne@69 594 */
jpayne@69 595 const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
jpayne@69 596 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 597
jpayne@69 598 /**
jpayne@69 599 * Set the subject text string upon which the break iterator is operating
jpayne@69 600 * without changing any other aspect of the matching state.
jpayne@69 601 * The new and previous text strings must have the same content.
jpayne@69 602 *
jpayne@69 603 * This function is intended for use in environments where ICU is operating on
jpayne@69 604 * strings that may move around in memory. It provides a mechanism for notifying
jpayne@69 605 * ICU that the string has been relocated, and providing a new UText to access the
jpayne@69 606 * string in its new position.
jpayne@69 607 *
jpayne@69 608 * Note that the break iterator implementation never copies the underlying text
jpayne@69 609 * of a string being processed, but always operates directly on the original text
jpayne@69 610 * provided by the user. Refreshing simply drops the references to the old text
jpayne@69 611 * and replaces them with references to the new.
jpayne@69 612 *
jpayne@69 613 * Caution: this function is normally used only by very specialized,
jpayne@69 614 * system-level code. One example use case is with garbage collection that moves
jpayne@69 615 * the text in memory.
jpayne@69 616 *
jpayne@69 617 * @param input The new (moved) text string.
jpayne@69 618 * @param status Receives errors detected by this function.
jpayne@69 619 * @return *this
jpayne@69 620 *
jpayne@69 621 * @stable ICU 49
jpayne@69 622 */
jpayne@69 623 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
jpayne@69 624
jpayne@69 625 private:
jpayne@69 626 static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
jpayne@69 627 static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
jpayne@69 628 static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
jpayne@69 629
jpayne@69 630 friend class ICUBreakIteratorFactory;
jpayne@69 631 friend class ICUBreakIteratorService;
jpayne@69 632
jpayne@69 633 protected:
jpayne@69 634 // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
jpayne@69 635 // or else the compiler will create a public ones.
jpayne@69 636 /** @internal */
jpayne@69 637 BreakIterator();
jpayne@69 638 /** @internal */
jpayne@69 639 BreakIterator (const BreakIterator &other);
jpayne@69 640 #ifndef U_HIDE_INTERNAL_API
jpayne@69 641 /** @internal */
jpayne@69 642 BreakIterator (const Locale& valid, const Locale &actual);
jpayne@69 643 /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
jpayne@69 644 BreakIterator &operator = (const BreakIterator &other);
jpayne@69 645 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 646
jpayne@69 647 private:
jpayne@69 648
jpayne@69 649 /** @internal (private) */
jpayne@69 650 char actualLocale[ULOC_FULLNAME_CAPACITY];
jpayne@69 651 char validLocale[ULOC_FULLNAME_CAPACITY];
jpayne@69 652 };
jpayne@69 653
jpayne@69 654 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 655
jpayne@69 656 inline UBool BreakIterator::isBufferClone()
jpayne@69 657 {
jpayne@69 658 return FALSE;
jpayne@69 659 }
jpayne@69 660
jpayne@69 661 #endif /* U_HIDE_DEPRECATED_API */
jpayne@69 662
jpayne@69 663 U_NAMESPACE_END
jpayne@69 664
jpayne@69 665 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
jpayne@69 666
jpayne@69 667 #endif /* U_SHOW_CPLUSPLUS_API */
jpayne@69 668
jpayne@69 669 #endif // BRKITER_H
jpayne@69 670 //eof