annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/usearch.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // © 2016 and later: Unicode, Inc. and others.
jpayne@69 2 // License & terms of use: http://www.unicode.org/copyright.html
jpayne@69 3 /*
jpayne@69 4 **********************************************************************
jpayne@69 5 * Copyright (C) 2001-2011,2014 IBM and others. All rights reserved.
jpayne@69 6 **********************************************************************
jpayne@69 7 * Date Name Description
jpayne@69 8 * 06/28/2001 synwee Creation.
jpayne@69 9 **********************************************************************
jpayne@69 10 */
jpayne@69 11 #ifndef USEARCH_H
jpayne@69 12 #define USEARCH_H
jpayne@69 13
jpayne@69 14 #include "unicode/utypes.h"
jpayne@69 15
jpayne@69 16 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
jpayne@69 17
jpayne@69 18 #include "unicode/localpointer.h"
jpayne@69 19 #include "unicode/ucol.h"
jpayne@69 20 #include "unicode/ucoleitr.h"
jpayne@69 21 #include "unicode/ubrk.h"
jpayne@69 22
jpayne@69 23 /**
jpayne@69 24 * \file
jpayne@69 25 * \brief C API: StringSearch
jpayne@69 26 *
jpayne@69 27 * C APIs for an engine that provides language-sensitive text searching based
jpayne@69 28 * on the comparison rules defined in a <tt>UCollator</tt> data struct,
jpayne@69 29 * see <tt>ucol.h</tt>. This ensures that language eccentricity can be
jpayne@69 30 * handled, e.g. for the German collator, characters &szlig; and SS will be matched
jpayne@69 31 * if case is chosen to be ignored.
jpayne@69 32 * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
jpayne@69 33 * "ICU Collation Design Document"</a> for more information.
jpayne@69 34 * <p>
jpayne@69 35 * The implementation may use a linear search or a modified form of the Boyer-Moore
jpayne@69 36 * search; for more information on the latter see
jpayne@69 37 * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
jpayne@69 38 * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i>
jpayne@69 39 * in February, 1999.
jpayne@69 40 * <p>
jpayne@69 41 * There are 2 match options for selection:<br>
jpayne@69 42 * Let S' be the sub-string of a text string S between the offsets start and
jpayne@69 43 * end <start, end>.
jpayne@69 44 * <br>
jpayne@69 45 * A pattern string P matches a text string S at the offsets <start, end>
jpayne@69 46 * if
jpayne@69 47 * <pre>
jpayne@69 48 * option 1. Some canonical equivalent of P matches some canonical equivalent
jpayne@69 49 * of S'
jpayne@69 50 * option 2. P matches S' and if P starts or ends with a combining mark,
jpayne@69 51 * there exists no non-ignorable combining mark before or after S'
jpayne@69 52 * in S respectively.
jpayne@69 53 * </pre>
jpayne@69 54 * Option 2. will be the default.
jpayne@69 55 * <p>
jpayne@69 56 * This search has APIs similar to that of other text iteration mechanisms
jpayne@69 57 * such as the break iterators in <tt>ubrk.h</tt>. Using these
jpayne@69 58 * APIs, it is easy to scan through text looking for all occurrences of
jpayne@69 59 * a given pattern. This search iterator allows changing of direction by
jpayne@69 60 * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>.
jpayne@69 61 * Though a direction change can occur without calling <tt>reset</tt> first,
jpayne@69 62 * this operation comes with some speed penalty.
jpayne@69 63 * Generally, match results in the forward direction will match the result
jpayne@69 64 * matches in the backwards direction in the reverse order
jpayne@69 65 * <p>
jpayne@69 66 * <tt>usearch.h</tt> provides APIs to specify the starting position
jpayne@69 67 * within the text string to be searched, e.g. <tt>usearch_setOffset</tt>,
jpayne@69 68 * <tt>usearch_preceding</tt> and <tt>usearch_following</tt>. Since the
jpayne@69 69 * starting position will be set as it is specified, please take note that
jpayne@69 70 * there are some dangerous positions which the search may render incorrect
jpayne@69 71 * results:
jpayne@69 72 * <ul>
jpayne@69 73 * <li> The midst of a substring that requires normalization.
jpayne@69 74 * <li> If the following match is to be found, the position should not be the
jpayne@69 75 * second character which requires to be swapped with the preceding
jpayne@69 76 * character. Vice versa, if the preceding match is to be found,
jpayne@69 77 * position to search from should not be the first character which
jpayne@69 78 * requires to be swapped with the next character. E.g certain Thai and
jpayne@69 79 * Lao characters require swapping.
jpayne@69 80 * <li> If a following pattern match is to be found, any position within a
jpayne@69 81 * contracting sequence except the first will fail. Vice versa if a
jpayne@69 82 * preceding pattern match is to be found, a invalid starting point
jpayne@69 83 * would be any character within a contracting sequence except the last.
jpayne@69 84 * </ul>
jpayne@69 85 * <p>
jpayne@69 86 * A breakiterator can be used if only matches at logical breaks are desired.
jpayne@69 87 * Using a breakiterator will only give you results that exactly matches the
jpayne@69 88 * boundaries given by the breakiterator. For instance the pattern "e" will
jpayne@69 89 * not be found in the string "\u00e9" if a character break iterator is used.
jpayne@69 90 * <p>
jpayne@69 91 * Options are provided to handle overlapping matches.
jpayne@69 92 * E.g. In English, overlapping matches produces the result 0 and 2
jpayne@69 93 * for the pattern "abab" in the text "ababab", where else mutually
jpayne@69 94 * exclusive matches only produce the result of 0.
jpayne@69 95 * <p>
jpayne@69 96 * Options are also provided to implement "asymmetric search" as described in
jpayne@69 97 * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search">
jpayne@69 98 * UTS #10 Unicode Collation Algorithm</a>, specifically the USearchAttribute
jpayne@69 99 * USEARCH_ELEMENT_COMPARISON and its values.
jpayne@69 100 * <p>
jpayne@69 101 * Though collator attributes will be taken into consideration while
jpayne@69 102 * performing matches, there are no APIs here for setting and getting the
jpayne@69 103 * attributes. These attributes can be set by getting the collator
jpayne@69 104 * from <tt>usearch_getCollator</tt> and using the APIs in <tt>ucol.h</tt>.
jpayne@69 105 * Lastly to update String Search to the new collator attributes,
jpayne@69 106 * usearch_reset() has to be called.
jpayne@69 107 * <p>
jpayne@69 108 * Restriction: <br>
jpayne@69 109 * Currently there are no composite characters that consists of a
jpayne@69 110 * character with combining class > 0 before a character with combining
jpayne@69 111 * class == 0. However, if such a character exists in the future, the
jpayne@69 112 * search mechanism does not guarantee the results for option 1.
jpayne@69 113 *
jpayne@69 114 * <p>
jpayne@69 115 * Example of use:<br>
jpayne@69 116 * <pre><code>
jpayne@69 117 * char *tgtstr = "The quick brown fox jumped over the lazy fox";
jpayne@69 118 * char *patstr = "fox";
jpayne@69 119 * UChar target[64];
jpayne@69 120 * UChar pattern[16];
jpayne@69 121 * UErrorCode status = U_ZERO_ERROR;
jpayne@69 122 * u_uastrcpy(target, tgtstr);
jpayne@69 123 * u_uastrcpy(pattern, patstr);
jpayne@69 124 *
jpayne@69 125 * UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US",
jpayne@69 126 * NULL, &status);
jpayne@69 127 * if (U_SUCCESS(status)) {
jpayne@69 128 * for (int pos = usearch_first(search, &status);
jpayne@69 129 * pos != USEARCH_DONE;
jpayne@69 130 * pos = usearch_next(search, &status))
jpayne@69 131 * {
jpayne@69 132 * printf("Found match at %d pos, length is %d\n", pos,
jpayne@69 133 * usearch_getMatchedLength(search));
jpayne@69 134 * }
jpayne@69 135 * }
jpayne@69 136 *
jpayne@69 137 * usearch_close(search);
jpayne@69 138 * </code></pre>
jpayne@69 139 * @stable ICU 2.4
jpayne@69 140 */
jpayne@69 141
jpayne@69 142 /**
jpayne@69 143 * DONE is returned by previous() and next() after all valid matches have
jpayne@69 144 * been returned, and by first() and last() if there are no matches at all.
jpayne@69 145 * @stable ICU 2.4
jpayne@69 146 */
jpayne@69 147 #define USEARCH_DONE -1
jpayne@69 148
jpayne@69 149 /**
jpayne@69 150 * Data structure for searching
jpayne@69 151 * @stable ICU 2.4
jpayne@69 152 */
jpayne@69 153 struct UStringSearch;
jpayne@69 154 /**
jpayne@69 155 * Data structure for searching
jpayne@69 156 * @stable ICU 2.4
jpayne@69 157 */
jpayne@69 158 typedef struct UStringSearch UStringSearch;
jpayne@69 159
jpayne@69 160 /**
jpayne@69 161 * @stable ICU 2.4
jpayne@69 162 */
jpayne@69 163 typedef enum {
jpayne@69 164 /**
jpayne@69 165 * Option for overlapping matches
jpayne@69 166 * @stable ICU 2.4
jpayne@69 167 */
jpayne@69 168 USEARCH_OVERLAP = 0,
jpayne@69 169 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 170 /**
jpayne@69 171 * Option for canonical matches; option 1 in header documentation.
jpayne@69 172 * The default value will be USEARCH_OFF.
jpayne@69 173 * Note: Setting this option to USEARCH_ON currently has no effect on
jpayne@69 174 * search behavior, and this option is deprecated. Instead, to control
jpayne@69 175 * canonical match behavior, you must set UCOL_NORMALIZATION_MODE
jpayne@69 176 * appropriately (to UCOL_OFF or UCOL_ON) in the UCollator used by
jpayne@69 177 * the UStringSearch object.
jpayne@69 178 * @see usearch_openFromCollator
jpayne@69 179 * @see usearch_getCollator
jpayne@69 180 * @see usearch_setCollator
jpayne@69 181 * @see ucol_getAttribute
jpayne@69 182 * @deprecated ICU 53
jpayne@69 183 */
jpayne@69 184 USEARCH_CANONICAL_MATCH = 1,
jpayne@69 185 #endif /* U_HIDE_DEPRECATED_API */
jpayne@69 186 /**
jpayne@69 187 * Option to control how collation elements are compared.
jpayne@69 188 * The default value will be USEARCH_STANDARD_ELEMENT_COMPARISON.
jpayne@69 189 * @stable ICU 4.4
jpayne@69 190 */
jpayne@69 191 USEARCH_ELEMENT_COMPARISON = 2,
jpayne@69 192
jpayne@69 193 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 194 /**
jpayne@69 195 * One more than the highest normal USearchAttribute value.
jpayne@69 196 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
jpayne@69 197 */
jpayne@69 198 USEARCH_ATTRIBUTE_COUNT = 3
jpayne@69 199 #endif /* U_HIDE_DEPRECATED_API */
jpayne@69 200 } USearchAttribute;
jpayne@69 201
jpayne@69 202 /**
jpayne@69 203 * @stable ICU 2.4
jpayne@69 204 */
jpayne@69 205 typedef enum {
jpayne@69 206 /**
jpayne@69 207 * Default value for any USearchAttribute
jpayne@69 208 * @stable ICU 2.4
jpayne@69 209 */
jpayne@69 210 USEARCH_DEFAULT = -1,
jpayne@69 211 /**
jpayne@69 212 * Value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH
jpayne@69 213 * @stable ICU 2.4
jpayne@69 214 */
jpayne@69 215 USEARCH_OFF,
jpayne@69 216 /**
jpayne@69 217 * Value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH
jpayne@69 218 * @stable ICU 2.4
jpayne@69 219 */
jpayne@69 220 USEARCH_ON,
jpayne@69 221 /**
jpayne@69 222 * Value (default) for USEARCH_ELEMENT_COMPARISON;
jpayne@69 223 * standard collation element comparison at the specified collator
jpayne@69 224 * strength.
jpayne@69 225 * @stable ICU 4.4
jpayne@69 226 */
jpayne@69 227 USEARCH_STANDARD_ELEMENT_COMPARISON,
jpayne@69 228 /**
jpayne@69 229 * Value for USEARCH_ELEMENT_COMPARISON;
jpayne@69 230 * collation element comparison is modified to effectively provide
jpayne@69 231 * behavior between the specified strength and strength - 1. Collation
jpayne@69 232 * elements in the pattern that have the base weight for the specified
jpayne@69 233 * strength are treated as "wildcards" that match an element with any
jpayne@69 234 * other weight at that collation level in the searched text. For
jpayne@69 235 * example, with a secondary-strength English collator, a plain 'e' in
jpayne@69 236 * the pattern will match a plain e or an e with any diacritic in the
jpayne@69 237 * searched text, but an e with diacritic in the pattern will only
jpayne@69 238 * match an e with the same diacritic in the searched text.
jpayne@69 239 *
jpayne@69 240 * This supports "asymmetric search" as described in
jpayne@69 241 * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search">
jpayne@69 242 * UTS #10 Unicode Collation Algorithm</a>.
jpayne@69 243 *
jpayne@69 244 * @stable ICU 4.4
jpayne@69 245 */
jpayne@69 246 USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD,
jpayne@69 247 /**
jpayne@69 248 * Value for USEARCH_ELEMENT_COMPARISON.
jpayne@69 249 * collation element comparison is modified to effectively provide
jpayne@69 250 * behavior between the specified strength and strength - 1. Collation
jpayne@69 251 * elements in either the pattern or the searched text that have the
jpayne@69 252 * base weight for the specified strength are treated as "wildcards"
jpayne@69 253 * that match an element with any other weight at that collation level.
jpayne@69 254 * For example, with a secondary-strength English collator, a plain 'e'
jpayne@69 255 * in the pattern will match a plain e or an e with any diacritic in the
jpayne@69 256 * searched text, but an e with diacritic in the pattern will only
jpayne@69 257 * match an e with the same diacritic or a plain e in the searched text.
jpayne@69 258 *
jpayne@69 259 * This option is similar to "asymmetric search" as described in
jpayne@69 260 * [UTS #10 Unicode Collation Algorithm](http://www.unicode.org/reports/tr10/#Asymmetric_Search),
jpayne@69 261 * but also allows unmarked characters in the searched text to match
jpayne@69 262 * marked or unmarked versions of that character in the pattern.
jpayne@69 263 *
jpayne@69 264 * @stable ICU 4.4
jpayne@69 265 */
jpayne@69 266 USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD,
jpayne@69 267
jpayne@69 268 #ifndef U_HIDE_DEPRECATED_API
jpayne@69 269 /**
jpayne@69 270 * One more than the highest normal USearchAttributeValue value.
jpayne@69 271 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
jpayne@69 272 */
jpayne@69 273 USEARCH_ATTRIBUTE_VALUE_COUNT
jpayne@69 274 #endif /* U_HIDE_DEPRECATED_API */
jpayne@69 275 } USearchAttributeValue;
jpayne@69 276
jpayne@69 277 /* open and close ------------------------------------------------------ */
jpayne@69 278
jpayne@69 279 /**
jpayne@69 280 * Creating a search iterator data struct using the argument locale language
jpayne@69 281 * rule set. A collator will be created in the process, which will be owned by
jpayne@69 282 * this search and will be deleted in <tt>usearch_close</tt>.
jpayne@69 283 * @param pattern for matching
jpayne@69 284 * @param patternlength length of the pattern, -1 for null-termination
jpayne@69 285 * @param text text string
jpayne@69 286 * @param textlength length of the text string, -1 for null-termination
jpayne@69 287 * @param locale name of locale for the rules to be used
jpayne@69 288 * @param breakiter A BreakIterator that will be used to restrict the points
jpayne@69 289 * at which matches are detected. If a match is found, but
jpayne@69 290 * the match's start or end index is not a boundary as
jpayne@69 291 * determined by the <tt>BreakIterator</tt>, the match will
jpayne@69 292 * be rejected and another will be searched for.
jpayne@69 293 * If this parameter is <tt>NULL</tt>, no break detection is
jpayne@69 294 * attempted.
jpayne@69 295 * @param status for errors if it occurs. If pattern or text is NULL, or if
jpayne@69 296 * patternlength or textlength is 0 then an
jpayne@69 297 * U_ILLEGAL_ARGUMENT_ERROR is returned.
jpayne@69 298 * @return search iterator data structure, or NULL if there is an error.
jpayne@69 299 * @stable ICU 2.4
jpayne@69 300 */
jpayne@69 301 U_STABLE UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern,
jpayne@69 302 int32_t patternlength,
jpayne@69 303 const UChar *text,
jpayne@69 304 int32_t textlength,
jpayne@69 305 const char *locale,
jpayne@69 306 UBreakIterator *breakiter,
jpayne@69 307 UErrorCode *status);
jpayne@69 308
jpayne@69 309 /**
jpayne@69 310 * Creating a search iterator data struct using the argument collator language
jpayne@69 311 * rule set. Note, user retains the ownership of this collator, thus the
jpayne@69 312 * responsibility of deletion lies with the user.
jpayne@69 313 * NOTE: string search cannot be instantiated from a collator that has
jpayne@69 314 * collate digits as numbers (CODAN) turned on.
jpayne@69 315 * @param pattern for matching
jpayne@69 316 * @param patternlength length of the pattern, -1 for null-termination
jpayne@69 317 * @param text text string
jpayne@69 318 * @param textlength length of the text string, -1 for null-termination
jpayne@69 319 * @param collator used for the language rules
jpayne@69 320 * @param breakiter A BreakIterator that will be used to restrict the points
jpayne@69 321 * at which matches are detected. If a match is found, but
jpayne@69 322 * the match's start or end index is not a boundary as
jpayne@69 323 * determined by the <tt>BreakIterator</tt>, the match will
jpayne@69 324 * be rejected and another will be searched for.
jpayne@69 325 * If this parameter is <tt>NULL</tt>, no break detection is
jpayne@69 326 * attempted.
jpayne@69 327 * @param status for errors if it occurs. If collator, pattern or text is NULL,
jpayne@69 328 * or if patternlength or textlength is 0 then an
jpayne@69 329 * U_ILLEGAL_ARGUMENT_ERROR is returned.
jpayne@69 330 * @return search iterator data structure, or NULL if there is an error.
jpayne@69 331 * @stable ICU 2.4
jpayne@69 332 */
jpayne@69 333 U_STABLE UStringSearch * U_EXPORT2 usearch_openFromCollator(
jpayne@69 334 const UChar *pattern,
jpayne@69 335 int32_t patternlength,
jpayne@69 336 const UChar *text,
jpayne@69 337 int32_t textlength,
jpayne@69 338 const UCollator *collator,
jpayne@69 339 UBreakIterator *breakiter,
jpayne@69 340 UErrorCode *status);
jpayne@69 341
jpayne@69 342 /**
jpayne@69 343 * Destroying and cleaning up the search iterator data struct.
jpayne@69 344 * If a collator is created in <tt>usearch_open</tt>, it will be destroyed here.
jpayne@69 345 * @param searchiter data struct to clean up
jpayne@69 346 * @stable ICU 2.4
jpayne@69 347 */
jpayne@69 348 U_STABLE void U_EXPORT2 usearch_close(UStringSearch *searchiter);
jpayne@69 349
jpayne@69 350 #if U_SHOW_CPLUSPLUS_API
jpayne@69 351
jpayne@69 352 U_NAMESPACE_BEGIN
jpayne@69 353
jpayne@69 354 /**
jpayne@69 355 * \class LocalUStringSearchPointer
jpayne@69 356 * "Smart pointer" class, closes a UStringSearch via usearch_close().
jpayne@69 357 * For most methods see the LocalPointerBase base class.
jpayne@69 358 *
jpayne@69 359 * @see LocalPointerBase
jpayne@69 360 * @see LocalPointer
jpayne@69 361 * @stable ICU 4.4
jpayne@69 362 */
jpayne@69 363 U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringSearchPointer, UStringSearch, usearch_close);
jpayne@69 364
jpayne@69 365 U_NAMESPACE_END
jpayne@69 366
jpayne@69 367 #endif
jpayne@69 368
jpayne@69 369 /* get and set methods -------------------------------------------------- */
jpayne@69 370
jpayne@69 371 /**
jpayne@69 372 * Sets the current position in the text string which the next search will
jpayne@69 373 * start from. Clears previous states.
jpayne@69 374 * This method takes the argument index and sets the position in the text
jpayne@69 375 * string accordingly without checking if the index is pointing to a
jpayne@69 376 * valid starting point to begin searching.
jpayne@69 377 * Search positions that may render incorrect results are highlighted in the
jpayne@69 378 * header comments
jpayne@69 379 * @param strsrch search iterator data struct
jpayne@69 380 * @param position position to start next search from. If position is less
jpayne@69 381 * than or greater than the text range for searching,
jpayne@69 382 * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
jpayne@69 383 * @param status error status if any.
jpayne@69 384 * @stable ICU 2.4
jpayne@69 385 */
jpayne@69 386 U_STABLE void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
jpayne@69 387 int32_t position,
jpayne@69 388 UErrorCode *status);
jpayne@69 389
jpayne@69 390 /**
jpayne@69 391 * Return the current index in the string text being searched.
jpayne@69 392 * If the iteration has gone past the end of the text (or past the beginning
jpayne@69 393 * for a backwards search), <tt>USEARCH_DONE</tt> is returned.
jpayne@69 394 * @param strsrch search iterator data struct
jpayne@69 395 * @see #USEARCH_DONE
jpayne@69 396 * @stable ICU 2.4
jpayne@69 397 */
jpayne@69 398 U_STABLE int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch);
jpayne@69 399
jpayne@69 400 /**
jpayne@69 401 * Sets the text searching attributes located in the enum USearchAttribute
jpayne@69 402 * with values from the enum USearchAttributeValue.
jpayne@69 403 * <tt>USEARCH_DEFAULT</tt> can be used for all attributes for resetting.
jpayne@69 404 * @param strsrch search iterator data struct
jpayne@69 405 * @param attribute text attribute to be set
jpayne@69 406 * @param value text attribute value
jpayne@69 407 * @param status for errors if it occurs
jpayne@69 408 * @see #usearch_getAttribute
jpayne@69 409 * @stable ICU 2.4
jpayne@69 410 */
jpayne@69 411 U_STABLE void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
jpayne@69 412 USearchAttribute attribute,
jpayne@69 413 USearchAttributeValue value,
jpayne@69 414 UErrorCode *status);
jpayne@69 415
jpayne@69 416 /**
jpayne@69 417 * Gets the text searching attributes.
jpayne@69 418 * @param strsrch search iterator data struct
jpayne@69 419 * @param attribute text attribute to be retrieve
jpayne@69 420 * @return text attribute value
jpayne@69 421 * @see #usearch_setAttribute
jpayne@69 422 * @stable ICU 2.4
jpayne@69 423 */
jpayne@69 424 U_STABLE USearchAttributeValue U_EXPORT2 usearch_getAttribute(
jpayne@69 425 const UStringSearch *strsrch,
jpayne@69 426 USearchAttribute attribute);
jpayne@69 427
jpayne@69 428 /**
jpayne@69 429 * Returns the index to the match in the text string that was searched.
jpayne@69 430 * This call returns a valid result only after a successful call to
jpayne@69 431 * <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
jpayne@69 432 * or <tt>usearch_last</tt>.
jpayne@69 433 * Just after construction, or after a searching method returns
jpayne@69 434 * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
jpayne@69 435 * <p>
jpayne@69 436 * Use <tt>usearch_getMatchedLength</tt> to get the matched string length.
jpayne@69 437 * @param strsrch search iterator data struct
jpayne@69 438 * @return index to a substring within the text string that is being
jpayne@69 439 * searched.
jpayne@69 440 * @see #usearch_first
jpayne@69 441 * @see #usearch_next
jpayne@69 442 * @see #usearch_previous
jpayne@69 443 * @see #usearch_last
jpayne@69 444 * @see #USEARCH_DONE
jpayne@69 445 * @stable ICU 2.4
jpayne@69 446 */
jpayne@69 447 U_STABLE int32_t U_EXPORT2 usearch_getMatchedStart(
jpayne@69 448 const UStringSearch *strsrch);
jpayne@69 449
jpayne@69 450 /**
jpayne@69 451 * Returns the length of text in the string which matches the search pattern.
jpayne@69 452 * This call returns a valid result only after a successful call to
jpayne@69 453 * <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
jpayne@69 454 * or <tt>usearch_last</tt>.
jpayne@69 455 * Just after construction, or after a searching method returns
jpayne@69 456 * <tt>USEARCH_DONE</tt>, this method will return 0.
jpayne@69 457 * @param strsrch search iterator data struct
jpayne@69 458 * @return The length of the match in the string text, or 0 if there is no
jpayne@69 459 * match currently.
jpayne@69 460 * @see #usearch_first
jpayne@69 461 * @see #usearch_next
jpayne@69 462 * @see #usearch_previous
jpayne@69 463 * @see #usearch_last
jpayne@69 464 * @see #USEARCH_DONE
jpayne@69 465 * @stable ICU 2.4
jpayne@69 466 */
jpayne@69 467 U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
jpayne@69 468 const UStringSearch *strsrch);
jpayne@69 469
jpayne@69 470 /**
jpayne@69 471 * Returns the text that was matched by the most recent call to
jpayne@69 472 * <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
jpayne@69 473 * or <tt>usearch_last</tt>.
jpayne@69 474 * If the iterator is not pointing at a valid match (e.g. just after
jpayne@69 475 * construction or after <tt>USEARCH_DONE</tt> has been returned, returns
jpayne@69 476 * an empty string. If result is not large enough to store the matched text,
jpayne@69 477 * result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR
jpayne@69 478 * will be returned in status. result will be null-terminated whenever
jpayne@69 479 * possible. If the buffer fits the matched text exactly, a null-termination
jpayne@69 480 * is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
jpayne@69 481 * Pre-flighting can be either done with length = 0 or the API
jpayne@69 482 * <tt>usearch_getMatchedLength</tt>.
jpayne@69 483 * @param strsrch search iterator data struct
jpayne@69 484 * @param result UChar buffer to store the matched string
jpayne@69 485 * @param resultCapacity length of the result buffer
jpayne@69 486 * @param status error returned if result is not large enough
jpayne@69 487 * @return exact length of the matched text, not counting the null-termination
jpayne@69 488 * @see #usearch_first
jpayne@69 489 * @see #usearch_next
jpayne@69 490 * @see #usearch_previous
jpayne@69 491 * @see #usearch_last
jpayne@69 492 * @see #USEARCH_DONE
jpayne@69 493 * @stable ICU 2.4
jpayne@69 494 */
jpayne@69 495 U_STABLE int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
jpayne@69 496 UChar *result,
jpayne@69 497 int32_t resultCapacity,
jpayne@69 498 UErrorCode *status);
jpayne@69 499
jpayne@69 500 #if !UCONFIG_NO_BREAK_ITERATION
jpayne@69 501
jpayne@69 502 /**
jpayne@69 503 * Set the BreakIterator that will be used to restrict the points at which
jpayne@69 504 * matches are detected.
jpayne@69 505 * @param strsrch search iterator data struct
jpayne@69 506 * @param breakiter A BreakIterator that will be used to restrict the points
jpayne@69 507 * at which matches are detected. If a match is found, but
jpayne@69 508 * the match's start or end index is not a boundary as
jpayne@69 509 * determined by the <tt>BreakIterator</tt>, the match will
jpayne@69 510 * be rejected and another will be searched for.
jpayne@69 511 * If this parameter is <tt>NULL</tt>, no break detection is
jpayne@69 512 * attempted.
jpayne@69 513 * @param status for errors if it occurs
jpayne@69 514 * @see #usearch_getBreakIterator
jpayne@69 515 * @stable ICU 2.4
jpayne@69 516 */
jpayne@69 517 U_STABLE void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
jpayne@69 518 UBreakIterator *breakiter,
jpayne@69 519 UErrorCode *status);
jpayne@69 520
jpayne@69 521 /**
jpayne@69 522 * Returns the BreakIterator that is used to restrict the points at which
jpayne@69 523 * matches are detected. This will be the same object that was passed to the
jpayne@69 524 * constructor or to <tt>usearch_setBreakIterator</tt>. Note that
jpayne@69 525 * <tt>NULL</tt>
jpayne@69 526 * is a legal value; it means that break detection should not be attempted.
jpayne@69 527 * @param strsrch search iterator data struct
jpayne@69 528 * @return break iterator used
jpayne@69 529 * @see #usearch_setBreakIterator
jpayne@69 530 * @stable ICU 2.4
jpayne@69 531 */
jpayne@69 532 U_STABLE const UBreakIterator * U_EXPORT2 usearch_getBreakIterator(
jpayne@69 533 const UStringSearch *strsrch);
jpayne@69 534
jpayne@69 535 #endif
jpayne@69 536
jpayne@69 537 /**
jpayne@69 538 * Set the string text to be searched. Text iteration will hence begin at the
jpayne@69 539 * start of the text string. This method is useful if you want to re-use an
jpayne@69 540 * iterator to search for the same pattern within a different body of text.
jpayne@69 541 * @param strsrch search iterator data struct
jpayne@69 542 * @param text new string to look for match
jpayne@69 543 * @param textlength length of the new string, -1 for null-termination
jpayne@69 544 * @param status for errors if it occurs. If text is NULL, or textlength is 0
jpayne@69 545 * then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
jpayne@69 546 * done to strsrch.
jpayne@69 547 * @see #usearch_getText
jpayne@69 548 * @stable ICU 2.4
jpayne@69 549 */
jpayne@69 550 U_STABLE void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
jpayne@69 551 const UChar *text,
jpayne@69 552 int32_t textlength,
jpayne@69 553 UErrorCode *status);
jpayne@69 554
jpayne@69 555 /**
jpayne@69 556 * Return the string text to be searched.
jpayne@69 557 * @param strsrch search iterator data struct
jpayne@69 558 * @param length returned string text length
jpayne@69 559 * @return string text
jpayne@69 560 * @see #usearch_setText
jpayne@69 561 * @stable ICU 2.4
jpayne@69 562 */
jpayne@69 563 U_STABLE const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
jpayne@69 564 int32_t *length);
jpayne@69 565
jpayne@69 566 /**
jpayne@69 567 * Gets the collator used for the language rules.
jpayne@69 568 * <p>
jpayne@69 569 * Deleting the returned <tt>UCollator</tt> before calling
jpayne@69 570 * <tt>usearch_close</tt> would cause the string search to fail.
jpayne@69 571 * <tt>usearch_close</tt> will delete the collator if this search owns it.
jpayne@69 572 * @param strsrch search iterator data struct
jpayne@69 573 * @return collator
jpayne@69 574 * @stable ICU 2.4
jpayne@69 575 */
jpayne@69 576 U_STABLE UCollator * U_EXPORT2 usearch_getCollator(
jpayne@69 577 const UStringSearch *strsrch);
jpayne@69 578
jpayne@69 579 /**
jpayne@69 580 * Sets the collator used for the language rules. User retains the ownership
jpayne@69 581 * of this collator, thus the responsibility of deletion lies with the user.
jpayne@69 582 * This method causes internal data such as Boyer-Moore shift tables to
jpayne@69 583 * be recalculated, but the iterator's position is unchanged.
jpayne@69 584 * @param strsrch search iterator data struct
jpayne@69 585 * @param collator to be used
jpayne@69 586 * @param status for errors if it occurs
jpayne@69 587 * @stable ICU 2.4
jpayne@69 588 */
jpayne@69 589 U_STABLE void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
jpayne@69 590 const UCollator *collator,
jpayne@69 591 UErrorCode *status);
jpayne@69 592
jpayne@69 593 /**
jpayne@69 594 * Sets the pattern used for matching.
jpayne@69 595 * Internal data like the Boyer Moore table will be recalculated, but the
jpayne@69 596 * iterator's position is unchanged.
jpayne@69 597 * @param strsrch search iterator data struct
jpayne@69 598 * @param pattern string
jpayne@69 599 * @param patternlength pattern length, -1 for null-terminated string
jpayne@69 600 * @param status for errors if it occurs. If text is NULL, or textlength is 0
jpayne@69 601 * then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
jpayne@69 602 * done to strsrch.
jpayne@69 603 * @stable ICU 2.4
jpayne@69 604 */
jpayne@69 605 U_STABLE void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
jpayne@69 606 const UChar *pattern,
jpayne@69 607 int32_t patternlength,
jpayne@69 608 UErrorCode *status);
jpayne@69 609
jpayne@69 610 /**
jpayne@69 611 * Gets the search pattern
jpayne@69 612 * @param strsrch search iterator data struct
jpayne@69 613 * @param length return length of the pattern, -1 indicates that the pattern
jpayne@69 614 * is null-terminated
jpayne@69 615 * @return pattern string
jpayne@69 616 * @stable ICU 2.4
jpayne@69 617 */
jpayne@69 618 U_STABLE const UChar * U_EXPORT2 usearch_getPattern(
jpayne@69 619 const UStringSearch *strsrch,
jpayne@69 620 int32_t *length);
jpayne@69 621
jpayne@69 622 /* methods ------------------------------------------------------------- */
jpayne@69 623
jpayne@69 624 /**
jpayne@69 625 * Returns the first index at which the string text matches the search
jpayne@69 626 * pattern.
jpayne@69 627 * The iterator is adjusted so that its current index (as returned by
jpayne@69 628 * <tt>usearch_getOffset</tt>) is the match position if one was found.
jpayne@69 629 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
jpayne@69 630 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
jpayne@69 631 * @param strsrch search iterator data struct
jpayne@69 632 * @param status for errors if it occurs
jpayne@69 633 * @return The character index of the first match, or
jpayne@69 634 * <tt>USEARCH_DONE</tt> if there are no matches.
jpayne@69 635 * @see #usearch_getOffset
jpayne@69 636 * @see #USEARCH_DONE
jpayne@69 637 * @stable ICU 2.4
jpayne@69 638 */
jpayne@69 639 U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
jpayne@69 640 UErrorCode *status);
jpayne@69 641
jpayne@69 642 /**
jpayne@69 643 * Returns the first index equal or greater than <tt>position</tt> at which
jpayne@69 644 * the string text
jpayne@69 645 * matches the search pattern. The iterator is adjusted so that its current
jpayne@69 646 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
jpayne@69 647 * one was found.
jpayne@69 648 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
jpayne@69 649 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
jpayne@69 650 * <p>
jpayne@69 651 * Search positions that may render incorrect results are highlighted in the
jpayne@69 652 * header comments. If position is less than or greater than the text range
jpayne@69 653 * for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
jpayne@69 654 * @param strsrch search iterator data struct
jpayne@69 655 * @param position to start the search at
jpayne@69 656 * @param status for errors if it occurs
jpayne@69 657 * @return The character index of the first match following <tt>pos</tt>,
jpayne@69 658 * or <tt>USEARCH_DONE</tt> if there are no matches.
jpayne@69 659 * @see #usearch_getOffset
jpayne@69 660 * @see #USEARCH_DONE
jpayne@69 661 * @stable ICU 2.4
jpayne@69 662 */
jpayne@69 663 U_STABLE int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
jpayne@69 664 int32_t position,
jpayne@69 665 UErrorCode *status);
jpayne@69 666
jpayne@69 667 /**
jpayne@69 668 * Returns the last index in the target text at which it matches the search
jpayne@69 669 * pattern. The iterator is adjusted so that its current
jpayne@69 670 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
jpayne@69 671 * one was found.
jpayne@69 672 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
jpayne@69 673 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
jpayne@69 674 * @param strsrch search iterator data struct
jpayne@69 675 * @param status for errors if it occurs
jpayne@69 676 * @return The index of the first match, or <tt>USEARCH_DONE</tt> if there
jpayne@69 677 * are no matches.
jpayne@69 678 * @see #usearch_getOffset
jpayne@69 679 * @see #USEARCH_DONE
jpayne@69 680 * @stable ICU 2.4
jpayne@69 681 */
jpayne@69 682 U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
jpayne@69 683 UErrorCode *status);
jpayne@69 684
jpayne@69 685 /**
jpayne@69 686 * Returns the first index less than <tt>position</tt> at which the string text
jpayne@69 687 * matches the search pattern. The iterator is adjusted so that its current
jpayne@69 688 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
jpayne@69 689 * one was found.
jpayne@69 690 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
jpayne@69 691 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
jpayne@69 692 * <p>
jpayne@69 693 * Search positions that may render incorrect results are highlighted in the
jpayne@69 694 * header comments. If position is less than or greater than the text range
jpayne@69 695 * for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
jpayne@69 696 * <p>
jpayne@69 697 * When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the
jpayne@69 698 * result match is always less than <tt>position</tt>.
jpayne@69 699 * When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across
jpayne@69 700 * <tt>position</tt>.
jpayne@69 701 * @param strsrch search iterator data struct
jpayne@69 702 * @param position index position the search is to begin at
jpayne@69 703 * @param status for errors if it occurs
jpayne@69 704 * @return The character index of the first match preceding <tt>pos</tt>,
jpayne@69 705 * or <tt>USEARCH_DONE</tt> if there are no matches.
jpayne@69 706 * @see #usearch_getOffset
jpayne@69 707 * @see #USEARCH_DONE
jpayne@69 708 * @stable ICU 2.4
jpayne@69 709 */
jpayne@69 710 U_STABLE int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
jpayne@69 711 int32_t position,
jpayne@69 712 UErrorCode *status);
jpayne@69 713
jpayne@69 714 /**
jpayne@69 715 * Returns the index of the next point at which the string text matches the
jpayne@69 716 * search pattern, starting from the current position.
jpayne@69 717 * The iterator is adjusted so that its current
jpayne@69 718 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
jpayne@69 719 * one was found.
jpayne@69 720 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
jpayne@69 721 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
jpayne@69 722 * @param strsrch search iterator data struct
jpayne@69 723 * @param status for errors if it occurs
jpayne@69 724 * @return The index of the next match after the current position, or
jpayne@69 725 * <tt>USEARCH_DONE</tt> if there are no more matches.
jpayne@69 726 * @see #usearch_first
jpayne@69 727 * @see #usearch_getOffset
jpayne@69 728 * @see #USEARCH_DONE
jpayne@69 729 * @stable ICU 2.4
jpayne@69 730 */
jpayne@69 731 U_STABLE int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
jpayne@69 732 UErrorCode *status);
jpayne@69 733
jpayne@69 734 /**
jpayne@69 735 * Returns the index of the previous point at which the string text matches
jpayne@69 736 * the search pattern, starting at the current position.
jpayne@69 737 * The iterator is adjusted so that its current
jpayne@69 738 * index (as returned by <tt>usearch_getOffset</tt>) is the match position if
jpayne@69 739 * one was found.
jpayne@69 740 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
jpayne@69 741 * the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
jpayne@69 742 * @param strsrch search iterator data struct
jpayne@69 743 * @param status for errors if it occurs
jpayne@69 744 * @return The index of the previous match before the current position,
jpayne@69 745 * or <tt>USEARCH_DONE</tt> if there are no more matches.
jpayne@69 746 * @see #usearch_last
jpayne@69 747 * @see #usearch_getOffset
jpayne@69 748 * @see #USEARCH_DONE
jpayne@69 749 * @stable ICU 2.4
jpayne@69 750 */
jpayne@69 751 U_STABLE int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
jpayne@69 752 UErrorCode *status);
jpayne@69 753
jpayne@69 754 /**
jpayne@69 755 * Reset the iteration.
jpayne@69 756 * Search will begin at the start of the text string if a forward iteration
jpayne@69 757 * is initiated before a backwards iteration. Otherwise if a backwards
jpayne@69 758 * iteration is initiated before a forwards iteration, the search will begin
jpayne@69 759 * at the end of the text string.
jpayne@69 760 * @param strsrch search iterator data struct
jpayne@69 761 * @see #usearch_first
jpayne@69 762 * @stable ICU 2.4
jpayne@69 763 */
jpayne@69 764 U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
jpayne@69 765
jpayne@69 766 #ifndef U_HIDE_INTERNAL_API
jpayne@69 767 /**
jpayne@69 768 * Simple forward search for the pattern, starting at a specified index,
jpayne@69 769 * and using a default set search options.
jpayne@69 770 *
jpayne@69 771 * This is an experimental function, and is not an official part of the
jpayne@69 772 * ICU API.
jpayne@69 773 *
jpayne@69 774 * The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
jpayne@69 775 *
jpayne@69 776 * The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
jpayne@69 777 * any Break Iterator are ignored.
jpayne@69 778 *
jpayne@69 779 * Matches obey the following constraints:
jpayne@69 780 *
jpayne@69 781 * Characters at the start or end positions of a match that are ignorable
jpayne@69 782 * for collation are not included as part of the match, unless they
jpayne@69 783 * are part of a combining sequence, as described below.
jpayne@69 784 *
jpayne@69 785 * A match will not include a partial combining sequence. Combining
jpayne@69 786 * character sequences are considered to be inseparable units,
jpayne@69 787 * and either match the pattern completely, or are considered to not match
jpayne@69 788 * at all. Thus, for example, an A followed a combining accent mark will
jpayne@69 789 * not be found when searching for a plain (unaccented) A. (unless
jpayne@69 790 * the collation strength has been set to ignore all accents).
jpayne@69 791 *
jpayne@69 792 * When beginning a search, the initial starting position, startIdx,
jpayne@69 793 * is assumed to be an acceptable match boundary with respect to
jpayne@69 794 * combining characters. A combining sequence that spans across the
jpayne@69 795 * starting point will not suppress a match beginning at startIdx.
jpayne@69 796 *
jpayne@69 797 * Characters that expand to multiple collation elements
jpayne@69 798 * (German sharp-S becoming 'ss', or the composed forms of accented
jpayne@69 799 * characters, for example) also must match completely.
jpayne@69 800 * Searching for a single 's' in a string containing only a sharp-s will
jpayne@69 801 * find no match.
jpayne@69 802 *
jpayne@69 803 *
jpayne@69 804 * @param strsrch the UStringSearch struct, which references both
jpayne@69 805 * the text to be searched and the pattern being sought.
jpayne@69 806 * @param startIdx The index into the text to begin the search.
jpayne@69 807 * @param matchStart An out parameter, the starting index of the matched text.
jpayne@69 808 * This parameter may be NULL.
jpayne@69 809 * A value of -1 will be returned if no match was found.
jpayne@69 810 * @param matchLimit Out parameter, the index of the first position following the matched text.
jpayne@69 811 * The matchLimit will be at a suitable position for beginning a subsequent search
jpayne@69 812 * in the input text.
jpayne@69 813 * This parameter may be NULL.
jpayne@69 814 * A value of -1 will be returned if no match was found.
jpayne@69 815 *
jpayne@69 816 * @param status Report any errors. Note that no match found is not an error.
jpayne@69 817 * @return TRUE if a match was found, FALSE otherwise.
jpayne@69 818 *
jpayne@69 819 * @internal
jpayne@69 820 */
jpayne@69 821 U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
jpayne@69 822 int32_t startIdx,
jpayne@69 823 int32_t *matchStart,
jpayne@69 824 int32_t *matchLimit,
jpayne@69 825 UErrorCode *status);
jpayne@69 826
jpayne@69 827 /**
jpayne@69 828 * Simple backwards search for the pattern, starting at a specified index,
jpayne@69 829 * and using using a default set search options.
jpayne@69 830 *
jpayne@69 831 * This is an experimental function, and is not an official part of the
jpayne@69 832 * ICU API.
jpayne@69 833 *
jpayne@69 834 * The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
jpayne@69 835 *
jpayne@69 836 * The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
jpayne@69 837 * any Break Iterator are ignored.
jpayne@69 838 *
jpayne@69 839 * Matches obey the following constraints:
jpayne@69 840 *
jpayne@69 841 * Characters at the start or end positions of a match that are ignorable
jpayne@69 842 * for collation are not included as part of the match, unless they
jpayne@69 843 * are part of a combining sequence, as described below.
jpayne@69 844 *
jpayne@69 845 * A match will not include a partial combining sequence. Combining
jpayne@69 846 * character sequences are considered to be inseparable units,
jpayne@69 847 * and either match the pattern completely, or are considered to not match
jpayne@69 848 * at all. Thus, for example, an A followed a combining accent mark will
jpayne@69 849 * not be found when searching for a plain (unaccented) A. (unless
jpayne@69 850 * the collation strength has been set to ignore all accents).
jpayne@69 851 *
jpayne@69 852 * When beginning a search, the initial starting position, startIdx,
jpayne@69 853 * is assumed to be an acceptable match boundary with respect to
jpayne@69 854 * combining characters. A combining sequence that spans across the
jpayne@69 855 * starting point will not suppress a match beginning at startIdx.
jpayne@69 856 *
jpayne@69 857 * Characters that expand to multiple collation elements
jpayne@69 858 * (German sharp-S becoming 'ss', or the composed forms of accented
jpayne@69 859 * characters, for example) also must match completely.
jpayne@69 860 * Searching for a single 's' in a string containing only a sharp-s will
jpayne@69 861 * find no match.
jpayne@69 862 *
jpayne@69 863 *
jpayne@69 864 * @param strsrch the UStringSearch struct, which references both
jpayne@69 865 * the text to be searched and the pattern being sought.
jpayne@69 866 * @param startIdx The index into the text to begin the search.
jpayne@69 867 * @param matchStart An out parameter, the starting index of the matched text.
jpayne@69 868 * This parameter may be NULL.
jpayne@69 869 * A value of -1 will be returned if no match was found.
jpayne@69 870 * @param matchLimit Out parameter, the index of the first position following the matched text.
jpayne@69 871 * The matchLimit will be at a suitable position for beginning a subsequent search
jpayne@69 872 * in the input text.
jpayne@69 873 * This parameter may be NULL.
jpayne@69 874 * A value of -1 will be returned if no match was found.
jpayne@69 875 *
jpayne@69 876 * @param status Report any errors. Note that no match found is not an error.
jpayne@69 877 * @return TRUE if a match was found, FALSE otherwise.
jpayne@69 878 *
jpayne@69 879 * @internal
jpayne@69 880 */
jpayne@69 881 U_INTERNAL UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
jpayne@69 882 int32_t startIdx,
jpayne@69 883 int32_t *matchStart,
jpayne@69 884 int32_t *matchLimit,
jpayne@69 885 UErrorCode *status);
jpayne@69 886 #endif /* U_HIDE_INTERNAL_API */
jpayne@69 887
jpayne@69 888 #endif /* #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION */
jpayne@69 889
jpayne@69 890 #endif