annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/ucasemap.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 *******************************************************************************
jpayne@69 5 *
jpayne@69 6 * Copyright (C) 2005-2012, International Business Machines
jpayne@69 7 * Corporation and others. All Rights Reserved.
jpayne@69 8 *
jpayne@69 9 *******************************************************************************
jpayne@69 10 * file name: ucasemap.h
jpayne@69 11 * encoding: UTF-8
jpayne@69 12 * tab size: 8 (not used)
jpayne@69 13 * indentation:4
jpayne@69 14 *
jpayne@69 15 * created on: 2005may06
jpayne@69 16 * created by: Markus W. Scherer
jpayne@69 17 *
jpayne@69 18 * Case mapping service object and functions using it.
jpayne@69 19 */
jpayne@69 20
jpayne@69 21 #ifndef __UCASEMAP_H__
jpayne@69 22 #define __UCASEMAP_H__
jpayne@69 23
jpayne@69 24 #include "unicode/utypes.h"
jpayne@69 25 #include "unicode/localpointer.h"
jpayne@69 26 #include "unicode/stringoptions.h"
jpayne@69 27 #include "unicode/ustring.h"
jpayne@69 28
jpayne@69 29 /**
jpayne@69 30 * \file
jpayne@69 31 * \brief C API: Unicode case mapping functions using a UCaseMap service object.
jpayne@69 32 *
jpayne@69 33 * The service object takes care of memory allocations, data loading, and setup
jpayne@69 34 * for the attributes, as usual.
jpayne@69 35 *
jpayne@69 36 * Currently, the functionality provided here does not overlap with uchar.h
jpayne@69 37 * and ustring.h, except for ucasemap_toTitle().
jpayne@69 38 *
jpayne@69 39 * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
jpayne@69 40 */
jpayne@69 41
jpayne@69 42 /**
jpayne@69 43 * UCaseMap is an opaque service object for newer ICU case mapping functions.
jpayne@69 44 * Older functions did not use a service object.
jpayne@69 45 * @stable ICU 3.4
jpayne@69 46 */
jpayne@69 47 struct UCaseMap;
jpayne@69 48 typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
jpayne@69 49
jpayne@69 50 /**
jpayne@69 51 * Open a UCaseMap service object for a locale and a set of options.
jpayne@69 52 * The locale ID and options are preprocessed so that functions using the
jpayne@69 53 * service object need not process them in each call.
jpayne@69 54 *
jpayne@69 55 * @param locale ICU locale ID, used for language-dependent
jpayne@69 56 * upper-/lower-/title-casing according to the Unicode standard.
jpayne@69 57 * Usual semantics: ""=root, NULL=default locale, etc.
jpayne@69 58 * @param options Options bit set, used for case folding and string comparisons.
jpayne@69 59 * Same flags as for u_foldCase(), u_strFoldCase(),
jpayne@69 60 * u_strCaseCompare(), etc.
jpayne@69 61 * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
jpayne@69 62 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 63 * which must not indicate a failure before the function call.
jpayne@69 64 * @return Pointer to a UCaseMap service object, if successful.
jpayne@69 65 *
jpayne@69 66 * @see U_FOLD_CASE_DEFAULT
jpayne@69 67 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
jpayne@69 68 * @see U_TITLECASE_NO_LOWERCASE
jpayne@69 69 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
jpayne@69 70 * @stable ICU 3.4
jpayne@69 71 */
jpayne@69 72 U_STABLE UCaseMap * U_EXPORT2
jpayne@69 73 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
jpayne@69 74
jpayne@69 75 /**
jpayne@69 76 * Close a UCaseMap service object.
jpayne@69 77 * @param csm Object to be closed.
jpayne@69 78 * @stable ICU 3.4
jpayne@69 79 */
jpayne@69 80 U_STABLE void U_EXPORT2
jpayne@69 81 ucasemap_close(UCaseMap *csm);
jpayne@69 82
jpayne@69 83 #if U_SHOW_CPLUSPLUS_API
jpayne@69 84
jpayne@69 85 U_NAMESPACE_BEGIN
jpayne@69 86
jpayne@69 87 /**
jpayne@69 88 * \class LocalUCaseMapPointer
jpayne@69 89 * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
jpayne@69 90 * For most methods see the LocalPointerBase base class.
jpayne@69 91 *
jpayne@69 92 * @see LocalPointerBase
jpayne@69 93 * @see LocalPointer
jpayne@69 94 * @stable ICU 4.4
jpayne@69 95 */
jpayne@69 96 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
jpayne@69 97
jpayne@69 98 U_NAMESPACE_END
jpayne@69 99
jpayne@69 100 #endif
jpayne@69 101
jpayne@69 102 /**
jpayne@69 103 * Get the locale ID that is used for language-dependent case mappings.
jpayne@69 104 * @param csm UCaseMap service object.
jpayne@69 105 * @return locale ID
jpayne@69 106 * @stable ICU 3.4
jpayne@69 107 */
jpayne@69 108 U_STABLE const char * U_EXPORT2
jpayne@69 109 ucasemap_getLocale(const UCaseMap *csm);
jpayne@69 110
jpayne@69 111 /**
jpayne@69 112 * Get the options bit set that is used for case folding and string comparisons.
jpayne@69 113 * @param csm UCaseMap service object.
jpayne@69 114 * @return options bit set
jpayne@69 115 * @stable ICU 3.4
jpayne@69 116 */
jpayne@69 117 U_STABLE uint32_t U_EXPORT2
jpayne@69 118 ucasemap_getOptions(const UCaseMap *csm);
jpayne@69 119
jpayne@69 120 /**
jpayne@69 121 * Set the locale ID that is used for language-dependent case mappings.
jpayne@69 122 *
jpayne@69 123 * @param csm UCaseMap service object.
jpayne@69 124 * @param locale Locale ID, see ucasemap_open().
jpayne@69 125 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 126 * which must not indicate a failure before the function call.
jpayne@69 127 *
jpayne@69 128 * @see ucasemap_open
jpayne@69 129 * @stable ICU 3.4
jpayne@69 130 */
jpayne@69 131 U_STABLE void U_EXPORT2
jpayne@69 132 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
jpayne@69 133
jpayne@69 134 /**
jpayne@69 135 * Set the options bit set that is used for case folding and string comparisons.
jpayne@69 136 *
jpayne@69 137 * @param csm UCaseMap service object.
jpayne@69 138 * @param options Options bit set, see ucasemap_open().
jpayne@69 139 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 140 * which must not indicate a failure before the function call.
jpayne@69 141 *
jpayne@69 142 * @see ucasemap_open
jpayne@69 143 * @stable ICU 3.4
jpayne@69 144 */
jpayne@69 145 U_STABLE void U_EXPORT2
jpayne@69 146 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
jpayne@69 147
jpayne@69 148 #if !UCONFIG_NO_BREAK_ITERATION
jpayne@69 149
jpayne@69 150 /**
jpayne@69 151 * Get the break iterator that is used for titlecasing.
jpayne@69 152 * Do not modify the returned break iterator.
jpayne@69 153 * @param csm UCaseMap service object.
jpayne@69 154 * @return titlecasing break iterator
jpayne@69 155 * @stable ICU 3.8
jpayne@69 156 */
jpayne@69 157 U_STABLE const UBreakIterator * U_EXPORT2
jpayne@69 158 ucasemap_getBreakIterator(const UCaseMap *csm);
jpayne@69 159
jpayne@69 160 /**
jpayne@69 161 * Set the break iterator that is used for titlecasing.
jpayne@69 162 * The UCaseMap service object releases a previously set break iterator
jpayne@69 163 * and "adopts" this new one, taking ownership of it.
jpayne@69 164 * It will be released in a subsequent call to ucasemap_setBreakIterator()
jpayne@69 165 * or ucasemap_close().
jpayne@69 166 *
jpayne@69 167 * Break iterator operations are not thread-safe. Therefore, titlecasing
jpayne@69 168 * functions use non-const UCaseMap objects. It is not possible to titlecase
jpayne@69 169 * strings concurrently using the same UCaseMap.
jpayne@69 170 *
jpayne@69 171 * @param csm UCaseMap service object.
jpayne@69 172 * @param iterToAdopt Break iterator to be adopted for titlecasing.
jpayne@69 173 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 174 * which must not indicate a failure before the function call.
jpayne@69 175 *
jpayne@69 176 * @see ucasemap_toTitle
jpayne@69 177 * @see ucasemap_utf8ToTitle
jpayne@69 178 * @stable ICU 3.8
jpayne@69 179 */
jpayne@69 180 U_STABLE void U_EXPORT2
jpayne@69 181 ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
jpayne@69 182
jpayne@69 183 /**
jpayne@69 184 * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
jpayne@69 185 * except that it takes ucasemap_setOptions() into account and has performance
jpayne@69 186 * advantages from being able to use a UCaseMap object for multiple case mapping
jpayne@69 187 * operations, saving setup time.
jpayne@69 188 *
jpayne@69 189 * Casing is locale-dependent and context-sensitive.
jpayne@69 190 * Titlecasing uses a break iterator to find the first characters of words
jpayne@69 191 * that are to be titlecased. It titlecases those characters and lowercases
jpayne@69 192 * all others. (This can be modified with ucasemap_setOptions().)
jpayne@69 193 *
jpayne@69 194 * Note: This function takes a non-const UCaseMap pointer because it will
jpayne@69 195 * open a default break iterator if no break iterator was set yet,
jpayne@69 196 * and effectively call ucasemap_setBreakIterator();
jpayne@69 197 * also because the break iterator is stateful and will be modified during
jpayne@69 198 * the iteration.
jpayne@69 199 *
jpayne@69 200 * The titlecase break iterator can be provided to customize for arbitrary
jpayne@69 201 * styles, using rules and dictionaries beyond the standard iterators.
jpayne@69 202 * The standard titlecase iterator for the root locale implements the
jpayne@69 203 * algorithm of Unicode TR 21.
jpayne@69 204 *
jpayne@69 205 * This function uses only the setText(), first() and next() methods of the
jpayne@69 206 * provided break iterator.
jpayne@69 207 *
jpayne@69 208 * The result may be longer or shorter than the original.
jpayne@69 209 * The source string and the destination buffer must not overlap.
jpayne@69 210 *
jpayne@69 211 * @param csm UCaseMap service object. This pointer is non-const!
jpayne@69 212 * See the note above for details.
jpayne@69 213 * @param dest A buffer for the result string. The result will be NUL-terminated if
jpayne@69 214 * the buffer is large enough.
jpayne@69 215 * The contents is undefined in case of failure.
jpayne@69 216 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
jpayne@69 217 * dest may be NULL and the function will only return the length of the result
jpayne@69 218 * without writing any of the result string.
jpayne@69 219 * @param src The original string.
jpayne@69 220 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
jpayne@69 221 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 222 * which must not indicate a failure before the function call.
jpayne@69 223 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 224 * in which case it will be greater than destCapacity.
jpayne@69 225 *
jpayne@69 226 * @see u_strToTitle
jpayne@69 227 * @stable ICU 3.8
jpayne@69 228 */
jpayne@69 229 U_STABLE int32_t U_EXPORT2
jpayne@69 230 ucasemap_toTitle(UCaseMap *csm,
jpayne@69 231 UChar *dest, int32_t destCapacity,
jpayne@69 232 const UChar *src, int32_t srcLength,
jpayne@69 233 UErrorCode *pErrorCode);
jpayne@69 234
jpayne@69 235 #endif // UCONFIG_NO_BREAK_ITERATION
jpayne@69 236
jpayne@69 237 /**
jpayne@69 238 * Lowercase the characters in a UTF-8 string.
jpayne@69 239 * Casing is locale-dependent and context-sensitive.
jpayne@69 240 * The result may be longer or shorter than the original.
jpayne@69 241 * The source string and the destination buffer must not overlap.
jpayne@69 242 *
jpayne@69 243 * @param csm UCaseMap service object.
jpayne@69 244 * @param dest A buffer for the result string. The result will be NUL-terminated if
jpayne@69 245 * the buffer is large enough.
jpayne@69 246 * The contents is undefined in case of failure.
jpayne@69 247 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
jpayne@69 248 * dest may be NULL and the function will only return the length of the result
jpayne@69 249 * without writing any of the result string.
jpayne@69 250 * @param src The original string.
jpayne@69 251 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
jpayne@69 252 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 253 * which must not indicate a failure before the function call.
jpayne@69 254 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 255 * in which case it will be greater than destCapacity.
jpayne@69 256 *
jpayne@69 257 * @see u_strToLower
jpayne@69 258 * @stable ICU 3.4
jpayne@69 259 */
jpayne@69 260 U_STABLE int32_t U_EXPORT2
jpayne@69 261 ucasemap_utf8ToLower(const UCaseMap *csm,
jpayne@69 262 char *dest, int32_t destCapacity,
jpayne@69 263 const char *src, int32_t srcLength,
jpayne@69 264 UErrorCode *pErrorCode);
jpayne@69 265
jpayne@69 266 /**
jpayne@69 267 * Uppercase the characters in a UTF-8 string.
jpayne@69 268 * Casing is locale-dependent and context-sensitive.
jpayne@69 269 * The result may be longer or shorter than the original.
jpayne@69 270 * The source string and the destination buffer must not overlap.
jpayne@69 271 *
jpayne@69 272 * @param csm UCaseMap service object.
jpayne@69 273 * @param dest A buffer for the result string. The result will be NUL-terminated if
jpayne@69 274 * the buffer is large enough.
jpayne@69 275 * The contents is undefined in case of failure.
jpayne@69 276 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
jpayne@69 277 * dest may be NULL and the function will only return the length of the result
jpayne@69 278 * without writing any of the result string.
jpayne@69 279 * @param src The original string.
jpayne@69 280 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
jpayne@69 281 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 282 * which must not indicate a failure before the function call.
jpayne@69 283 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 284 * in which case it will be greater than destCapacity.
jpayne@69 285 *
jpayne@69 286 * @see u_strToUpper
jpayne@69 287 * @stable ICU 3.4
jpayne@69 288 */
jpayne@69 289 U_STABLE int32_t U_EXPORT2
jpayne@69 290 ucasemap_utf8ToUpper(const UCaseMap *csm,
jpayne@69 291 char *dest, int32_t destCapacity,
jpayne@69 292 const char *src, int32_t srcLength,
jpayne@69 293 UErrorCode *pErrorCode);
jpayne@69 294
jpayne@69 295 #if !UCONFIG_NO_BREAK_ITERATION
jpayne@69 296
jpayne@69 297 /**
jpayne@69 298 * Titlecase a UTF-8 string.
jpayne@69 299 * Casing is locale-dependent and context-sensitive.
jpayne@69 300 * Titlecasing uses a break iterator to find the first characters of words
jpayne@69 301 * that are to be titlecased. It titlecases those characters and lowercases
jpayne@69 302 * all others. (This can be modified with ucasemap_setOptions().)
jpayne@69 303 *
jpayne@69 304 * Note: This function takes a non-const UCaseMap pointer because it will
jpayne@69 305 * open a default break iterator if no break iterator was set yet,
jpayne@69 306 * and effectively call ucasemap_setBreakIterator();
jpayne@69 307 * also because the break iterator is stateful and will be modified during
jpayne@69 308 * the iteration.
jpayne@69 309 *
jpayne@69 310 * The titlecase break iterator can be provided to customize for arbitrary
jpayne@69 311 * styles, using rules and dictionaries beyond the standard iterators.
jpayne@69 312 * The standard titlecase iterator for the root locale implements the
jpayne@69 313 * algorithm of Unicode TR 21.
jpayne@69 314 *
jpayne@69 315 * This function uses only the setUText(), first(), next() and close() methods of the
jpayne@69 316 * provided break iterator.
jpayne@69 317 *
jpayne@69 318 * The result may be longer or shorter than the original.
jpayne@69 319 * The source string and the destination buffer must not overlap.
jpayne@69 320 *
jpayne@69 321 * @param csm UCaseMap service object. This pointer is non-const!
jpayne@69 322 * See the note above for details.
jpayne@69 323 * @param dest A buffer for the result string. The result will be NUL-terminated if
jpayne@69 324 * the buffer is large enough.
jpayne@69 325 * The contents is undefined in case of failure.
jpayne@69 326 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
jpayne@69 327 * dest may be NULL and the function will only return the length of the result
jpayne@69 328 * without writing any of the result string.
jpayne@69 329 * @param src The original string.
jpayne@69 330 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
jpayne@69 331 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 332 * which must not indicate a failure before the function call.
jpayne@69 333 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 334 * in which case it will be greater than destCapacity.
jpayne@69 335 *
jpayne@69 336 * @see u_strToTitle
jpayne@69 337 * @see U_TITLECASE_NO_LOWERCASE
jpayne@69 338 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
jpayne@69 339 * @stable ICU 3.8
jpayne@69 340 */
jpayne@69 341 U_STABLE int32_t U_EXPORT2
jpayne@69 342 ucasemap_utf8ToTitle(UCaseMap *csm,
jpayne@69 343 char *dest, int32_t destCapacity,
jpayne@69 344 const char *src, int32_t srcLength,
jpayne@69 345 UErrorCode *pErrorCode);
jpayne@69 346
jpayne@69 347 #endif
jpayne@69 348
jpayne@69 349 /**
jpayne@69 350 * Case-folds the characters in a UTF-8 string.
jpayne@69 351 *
jpayne@69 352 * Case-folding is locale-independent and not context-sensitive,
jpayne@69 353 * but there is an option for whether to include or exclude mappings for dotted I
jpayne@69 354 * and dotless i that are marked with 'T' in CaseFolding.txt.
jpayne@69 355 *
jpayne@69 356 * The result may be longer or shorter than the original.
jpayne@69 357 * The source string and the destination buffer must not overlap.
jpayne@69 358 *
jpayne@69 359 * @param csm UCaseMap service object.
jpayne@69 360 * @param dest A buffer for the result string. The result will be NUL-terminated if
jpayne@69 361 * the buffer is large enough.
jpayne@69 362 * The contents is undefined in case of failure.
jpayne@69 363 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
jpayne@69 364 * dest may be NULL and the function will only return the length of the result
jpayne@69 365 * without writing any of the result string.
jpayne@69 366 * @param src The original string.
jpayne@69 367 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
jpayne@69 368 * @param pErrorCode Must be a valid pointer to an error code value,
jpayne@69 369 * which must not indicate a failure before the function call.
jpayne@69 370 * @return The length of the result string, if successful - or in case of a buffer overflow,
jpayne@69 371 * in which case it will be greater than destCapacity.
jpayne@69 372 *
jpayne@69 373 * @see u_strFoldCase
jpayne@69 374 * @see ucasemap_setOptions
jpayne@69 375 * @see U_FOLD_CASE_DEFAULT
jpayne@69 376 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
jpayne@69 377 * @stable ICU 3.8
jpayne@69 378 */
jpayne@69 379 U_STABLE int32_t U_EXPORT2
jpayne@69 380 ucasemap_utf8FoldCase(const UCaseMap *csm,
jpayne@69 381 char *dest, int32_t destCapacity,
jpayne@69 382 const char *src, int32_t srcLength,
jpayne@69 383 UErrorCode *pErrorCode);
jpayne@69 384
jpayne@69 385 #endif