jpayne@69
|
1 // © 2016 and later: Unicode, Inc. and others.
|
jpayne@69
|
2 // License & terms of use: http://www.unicode.org/copyright.html
|
jpayne@69
|
3 /*
|
jpayne@69
|
4 **********************************************************************
|
jpayne@69
|
5 * Copyright (C) 2001-2014 IBM and others. All rights reserved.
|
jpayne@69
|
6 **********************************************************************
|
jpayne@69
|
7 * Date Name Description
|
jpayne@69
|
8 * 03/22/2000 helena Creation.
|
jpayne@69
|
9 **********************************************************************
|
jpayne@69
|
10 */
|
jpayne@69
|
11
|
jpayne@69
|
12 #ifndef STSEARCH_H
|
jpayne@69
|
13 #define STSEARCH_H
|
jpayne@69
|
14
|
jpayne@69
|
15 #include "unicode/utypes.h"
|
jpayne@69
|
16
|
jpayne@69
|
17 #if U_SHOW_CPLUSPLUS_API
|
jpayne@69
|
18
|
jpayne@69
|
19 /**
|
jpayne@69
|
20 * \file
|
jpayne@69
|
21 * \brief C++ API: Service for searching text based on RuleBasedCollator.
|
jpayne@69
|
22 */
|
jpayne@69
|
23
|
jpayne@69
|
24 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
jpayne@69
|
25
|
jpayne@69
|
26 #include "unicode/tblcoll.h"
|
jpayne@69
|
27 #include "unicode/coleitr.h"
|
jpayne@69
|
28 #include "unicode/search.h"
|
jpayne@69
|
29
|
jpayne@69
|
30 U_NAMESPACE_BEGIN
|
jpayne@69
|
31
|
jpayne@69
|
32 /**
|
jpayne@69
|
33 *
|
jpayne@69
|
34 * <tt>StringSearch</tt> is a <tt>SearchIterator</tt> that provides
|
jpayne@69
|
35 * language-sensitive text searching based on the comparison rules defined
|
jpayne@69
|
36 * in a {@link RuleBasedCollator} object.
|
jpayne@69
|
37 * StringSearch ensures that language eccentricity can be
|
jpayne@69
|
38 * handled, e.g. for the German collator, characters ß and SS will be matched
|
jpayne@69
|
39 * if case is chosen to be ignored.
|
jpayne@69
|
40 * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
|
jpayne@69
|
41 * "ICU Collation Design Document"</a> for more information.
|
jpayne@69
|
42 * <p>
|
jpayne@69
|
43 * There are 2 match options for selection:<br>
|
jpayne@69
|
44 * Let S' be the sub-string of a text string S between the offsets start and
|
jpayne@69
|
45 * end [start, end].
|
jpayne@69
|
46 * <br>
|
jpayne@69
|
47 * A pattern string P matches a text string S at the offsets [start, end]
|
jpayne@69
|
48 * if
|
jpayne@69
|
49 * <pre>
|
jpayne@69
|
50 * option 1. Some canonical equivalent of P matches some canonical equivalent
|
jpayne@69
|
51 * of S'
|
jpayne@69
|
52 * option 2. P matches S' and if P starts or ends with a combining mark,
|
jpayne@69
|
53 * there exists no non-ignorable combining mark before or after S?
|
jpayne@69
|
54 * in S respectively.
|
jpayne@69
|
55 * </pre>
|
jpayne@69
|
56 * Option 2. will be the default.
|
jpayne@69
|
57 * <p>
|
jpayne@69
|
58 * This search has APIs similar to that of other text iteration mechanisms
|
jpayne@69
|
59 * such as the break iterators in <tt>BreakIterator</tt>. Using these
|
jpayne@69
|
60 * APIs, it is easy to scan through text looking for all occurrences of
|
jpayne@69
|
61 * a given pattern. This search iterator allows changing of direction by
|
jpayne@69
|
62 * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>.
|
jpayne@69
|
63 * Though a direction change can occur without calling <tt>reset</tt> first,
|
jpayne@69
|
64 * this operation comes with some speed penalty.
|
jpayne@69
|
65 * Match results in the forward direction will match the result matches in
|
jpayne@69
|
66 * the backwards direction in the reverse order
|
jpayne@69
|
67 * <p>
|
jpayne@69
|
68 * <tt>SearchIterator</tt> provides APIs to specify the starting position
|
jpayne@69
|
69 * within the text string to be searched, e.g. <tt>setOffset</tt>,
|
jpayne@69
|
70 * <tt>preceding</tt> and <tt>following</tt>. Since the
|
jpayne@69
|
71 * starting position will be set as it is specified, please take note that
|
jpayne@69
|
72 * there are some danger points which the search may render incorrect
|
jpayne@69
|
73 * results:
|
jpayne@69
|
74 * <ul>
|
jpayne@69
|
75 * <li> The midst of a substring that requires normalization.
|
jpayne@69
|
76 * <li> If the following match is to be found, the position should not be the
|
jpayne@69
|
77 * second character which requires to be swapped with the preceding
|
jpayne@69
|
78 * character. Vice versa, if the preceding match is to be found,
|
jpayne@69
|
79 * position to search from should not be the first character which
|
jpayne@69
|
80 * requires to be swapped with the next character. E.g certain Thai and
|
jpayne@69
|
81 * Lao characters require swapping.
|
jpayne@69
|
82 * <li> If a following pattern match is to be found, any position within a
|
jpayne@69
|
83 * contracting sequence except the first will fail. Vice versa if a
|
jpayne@69
|
84 * preceding pattern match is to be found, a invalid starting point
|
jpayne@69
|
85 * would be any character within a contracting sequence except the last.
|
jpayne@69
|
86 * </ul>
|
jpayne@69
|
87 * <p>
|
jpayne@69
|
88 * A <tt>BreakIterator</tt> can be used if only matches at logical breaks are desired.
|
jpayne@69
|
89 * Using a <tt>BreakIterator</tt> will only give you results that exactly matches the
|
jpayne@69
|
90 * boundaries given by the breakiterator. For instance the pattern "e" will
|
jpayne@69
|
91 * not be found in the string "\u00e9" if a character break iterator is used.
|
jpayne@69
|
92 * <p>
|
jpayne@69
|
93 * Options are provided to handle overlapping matches.
|
jpayne@69
|
94 * E.g. In English, overlapping matches produces the result 0 and 2
|
jpayne@69
|
95 * for the pattern "abab" in the text "ababab", where else mutually
|
jpayne@69
|
96 * exclusive matches only produce the result of 0.
|
jpayne@69
|
97 * <p>
|
jpayne@69
|
98 * Though collator attributes will be taken into consideration while
|
jpayne@69
|
99 * performing matches, there are no APIs here for setting and getting the
|
jpayne@69
|
100 * attributes. These attributes can be set by getting the collator
|
jpayne@69
|
101 * from <tt>getCollator</tt> and using the APIs in <tt>coll.h</tt>.
|
jpayne@69
|
102 * Lastly to update <tt>StringSearch</tt> to the new collator attributes,
|
jpayne@69
|
103 * <tt>reset</tt> has to be called.
|
jpayne@69
|
104 * <p>
|
jpayne@69
|
105 * Restriction: <br>
|
jpayne@69
|
106 * Currently there are no composite characters that consists of a
|
jpayne@69
|
107 * character with combining class > 0 before a character with combining
|
jpayne@69
|
108 * class == 0. However, if such a character exists in the future,
|
jpayne@69
|
109 * <tt>StringSearch</tt> does not guarantee the results for option 1.
|
jpayne@69
|
110 * <p>
|
jpayne@69
|
111 * Consult the <tt>SearchIterator</tt> documentation for information on
|
jpayne@69
|
112 * and examples of how to use instances of this class to implement text
|
jpayne@69
|
113 * searching.
|
jpayne@69
|
114 * <pre><code>
|
jpayne@69
|
115 * UnicodeString target("The quick brown fox jumps over the lazy dog.");
|
jpayne@69
|
116 * UnicodeString pattern("fox");
|
jpayne@69
|
117 *
|
jpayne@69
|
118 * UErrorCode error = U_ZERO_ERROR;
|
jpayne@69
|
119 * StringSearch iter(pattern, target, Locale::getUS(), NULL, status);
|
jpayne@69
|
120 * for (int pos = iter.first(error);
|
jpayne@69
|
121 * pos != USEARCH_DONE;
|
jpayne@69
|
122 * pos = iter.next(error))
|
jpayne@69
|
123 * {
|
jpayne@69
|
124 * printf("Found match at %d pos, length is %d\n", pos, iter.getMatchedLength());
|
jpayne@69
|
125 * }
|
jpayne@69
|
126 * </code></pre>
|
jpayne@69
|
127 * <p>
|
jpayne@69
|
128 * Note, <tt>StringSearch</tt> is not to be subclassed.
|
jpayne@69
|
129 * </p>
|
jpayne@69
|
130 * @see SearchIterator
|
jpayne@69
|
131 * @see RuleBasedCollator
|
jpayne@69
|
132 * @since ICU 2.0
|
jpayne@69
|
133 */
|
jpayne@69
|
134
|
jpayne@69
|
135 class U_I18N_API StringSearch U_FINAL : public SearchIterator
|
jpayne@69
|
136 {
|
jpayne@69
|
137 public:
|
jpayne@69
|
138
|
jpayne@69
|
139 // public constructors and destructors --------------------------------
|
jpayne@69
|
140
|
jpayne@69
|
141 /**
|
jpayne@69
|
142 * Creating a <tt>StringSearch</tt> instance using the argument locale
|
jpayne@69
|
143 * language rule set. A collator will be created in the process, which
|
jpayne@69
|
144 * will be owned by this instance and will be deleted during
|
jpayne@69
|
145 * destruction
|
jpayne@69
|
146 * @param pattern The text for which this object will search.
|
jpayne@69
|
147 * @param text The text in which to search for the pattern.
|
jpayne@69
|
148 * @param locale A locale which defines the language-sensitive
|
jpayne@69
|
149 * comparison rules used to determine whether text in the
|
jpayne@69
|
150 * pattern and target matches.
|
jpayne@69
|
151 * @param breakiter A <tt>BreakIterator</tt> object used to constrain
|
jpayne@69
|
152 * the matches that are found. Matches whose start and end
|
jpayne@69
|
153 * indices in the target text are not boundaries as
|
jpayne@69
|
154 * determined by the <tt>BreakIterator</tt> are
|
jpayne@69
|
155 * ignored. If this behavior is not desired,
|
jpayne@69
|
156 * <tt>NULL</tt> can be passed in instead.
|
jpayne@69
|
157 * @param status for errors if any. If pattern or text is NULL, or if
|
jpayne@69
|
158 * either the length of pattern or text is 0 then an
|
jpayne@69
|
159 * U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
160 * @stable ICU 2.0
|
jpayne@69
|
161 */
|
jpayne@69
|
162 StringSearch(const UnicodeString &pattern, const UnicodeString &text,
|
jpayne@69
|
163 const Locale &locale,
|
jpayne@69
|
164 BreakIterator *breakiter,
|
jpayne@69
|
165 UErrorCode &status);
|
jpayne@69
|
166
|
jpayne@69
|
167 /**
|
jpayne@69
|
168 * Creating a <tt>StringSearch</tt> instance using the argument collator
|
jpayne@69
|
169 * language rule set. Note, user retains the ownership of this collator,
|
jpayne@69
|
170 * it does not get destroyed during this instance's destruction.
|
jpayne@69
|
171 * @param pattern The text for which this object will search.
|
jpayne@69
|
172 * @param text The text in which to search for the pattern.
|
jpayne@69
|
173 * @param coll A <tt>RuleBasedCollator</tt> object which defines
|
jpayne@69
|
174 * the language-sensitive comparison rules used to
|
jpayne@69
|
175 * determine whether text in the pattern and target
|
jpayne@69
|
176 * matches. User is responsible for the clearing of this
|
jpayne@69
|
177 * object.
|
jpayne@69
|
178 * @param breakiter A <tt>BreakIterator</tt> object used to constrain
|
jpayne@69
|
179 * the matches that are found. Matches whose start and end
|
jpayne@69
|
180 * indices in the target text are not boundaries as
|
jpayne@69
|
181 * determined by the <tt>BreakIterator</tt> are
|
jpayne@69
|
182 * ignored. If this behavior is not desired,
|
jpayne@69
|
183 * <tt>NULL</tt> can be passed in instead.
|
jpayne@69
|
184 * @param status for errors if any. If either the length of pattern or
|
jpayne@69
|
185 * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
186 * @stable ICU 2.0
|
jpayne@69
|
187 */
|
jpayne@69
|
188 StringSearch(const UnicodeString &pattern,
|
jpayne@69
|
189 const UnicodeString &text,
|
jpayne@69
|
190 RuleBasedCollator *coll,
|
jpayne@69
|
191 BreakIterator *breakiter,
|
jpayne@69
|
192 UErrorCode &status);
|
jpayne@69
|
193
|
jpayne@69
|
194 /**
|
jpayne@69
|
195 * Creating a <tt>StringSearch</tt> instance using the argument locale
|
jpayne@69
|
196 * language rule set. A collator will be created in the process, which
|
jpayne@69
|
197 * will be owned by this instance and will be deleted during
|
jpayne@69
|
198 * destruction
|
jpayne@69
|
199 * <p>
|
jpayne@69
|
200 * Note: No parsing of the text within the <tt>CharacterIterator</tt>
|
jpayne@69
|
201 * will be done during searching for this version. The block of text
|
jpayne@69
|
202 * in <tt>CharacterIterator</tt> will be used as it is.
|
jpayne@69
|
203 * @param pattern The text for which this object will search.
|
jpayne@69
|
204 * @param text The text iterator in which to search for the pattern.
|
jpayne@69
|
205 * @param locale A locale which defines the language-sensitive
|
jpayne@69
|
206 * comparison rules used to determine whether text in the
|
jpayne@69
|
207 * pattern and target matches. User is responsible for
|
jpayne@69
|
208 * the clearing of this object.
|
jpayne@69
|
209 * @param breakiter A <tt>BreakIterator</tt> object used to constrain
|
jpayne@69
|
210 * the matches that are found. Matches whose start and end
|
jpayne@69
|
211 * indices in the target text are not boundaries as
|
jpayne@69
|
212 * determined by the <tt>BreakIterator</tt> are
|
jpayne@69
|
213 * ignored. If this behavior is not desired,
|
jpayne@69
|
214 * <tt>NULL</tt> can be passed in instead.
|
jpayne@69
|
215 * @param status for errors if any. If either the length of pattern or
|
jpayne@69
|
216 * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
217 * @stable ICU 2.0
|
jpayne@69
|
218 */
|
jpayne@69
|
219 StringSearch(const UnicodeString &pattern, CharacterIterator &text,
|
jpayne@69
|
220 const Locale &locale,
|
jpayne@69
|
221 BreakIterator *breakiter,
|
jpayne@69
|
222 UErrorCode &status);
|
jpayne@69
|
223
|
jpayne@69
|
224 /**
|
jpayne@69
|
225 * Creating a <tt>StringSearch</tt> instance using the argument collator
|
jpayne@69
|
226 * language rule set. Note, user retains the ownership of this collator,
|
jpayne@69
|
227 * it does not get destroyed during this instance's destruction.
|
jpayne@69
|
228 * <p>
|
jpayne@69
|
229 * Note: No parsing of the text within the <tt>CharacterIterator</tt>
|
jpayne@69
|
230 * will be done during searching for this version. The block of text
|
jpayne@69
|
231 * in <tt>CharacterIterator</tt> will be used as it is.
|
jpayne@69
|
232 * @param pattern The text for which this object will search.
|
jpayne@69
|
233 * @param text The text in which to search for the pattern.
|
jpayne@69
|
234 * @param coll A <tt>RuleBasedCollator</tt> object which defines
|
jpayne@69
|
235 * the language-sensitive comparison rules used to
|
jpayne@69
|
236 * determine whether text in the pattern and target
|
jpayne@69
|
237 * matches. User is responsible for the clearing of this
|
jpayne@69
|
238 * object.
|
jpayne@69
|
239 * @param breakiter A <tt>BreakIterator</tt> object used to constrain
|
jpayne@69
|
240 * the matches that are found. Matches whose start and end
|
jpayne@69
|
241 * indices in the target text are not boundaries as
|
jpayne@69
|
242 * determined by the <tt>BreakIterator</tt> are
|
jpayne@69
|
243 * ignored. If this behavior is not desired,
|
jpayne@69
|
244 * <tt>NULL</tt> can be passed in instead.
|
jpayne@69
|
245 * @param status for errors if any. If either the length of pattern or
|
jpayne@69
|
246 * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
247 * @stable ICU 2.0
|
jpayne@69
|
248 */
|
jpayne@69
|
249 StringSearch(const UnicodeString &pattern, CharacterIterator &text,
|
jpayne@69
|
250 RuleBasedCollator *coll,
|
jpayne@69
|
251 BreakIterator *breakiter,
|
jpayne@69
|
252 UErrorCode &status);
|
jpayne@69
|
253
|
jpayne@69
|
254 /**
|
jpayne@69
|
255 * Copy constructor that creates a StringSearch instance with the same
|
jpayne@69
|
256 * behavior, and iterating over the same text.
|
jpayne@69
|
257 * @param that StringSearch instance to be copied.
|
jpayne@69
|
258 * @stable ICU 2.0
|
jpayne@69
|
259 */
|
jpayne@69
|
260 StringSearch(const StringSearch &that);
|
jpayne@69
|
261
|
jpayne@69
|
262 /**
|
jpayne@69
|
263 * Destructor. Cleans up the search iterator data struct.
|
jpayne@69
|
264 * If a collator is created in the constructor, it will be destroyed here.
|
jpayne@69
|
265 * @stable ICU 2.0
|
jpayne@69
|
266 */
|
jpayne@69
|
267 virtual ~StringSearch(void);
|
jpayne@69
|
268
|
jpayne@69
|
269 /**
|
jpayne@69
|
270 * Clone this object.
|
jpayne@69
|
271 * Clones can be used concurrently in multiple threads.
|
jpayne@69
|
272 * If an error occurs, then NULL is returned.
|
jpayne@69
|
273 * The caller must delete the clone.
|
jpayne@69
|
274 *
|
jpayne@69
|
275 * @return a clone of this object
|
jpayne@69
|
276 *
|
jpayne@69
|
277 * @see getDynamicClassID
|
jpayne@69
|
278 * @stable ICU 2.8
|
jpayne@69
|
279 */
|
jpayne@69
|
280 StringSearch *clone() const;
|
jpayne@69
|
281
|
jpayne@69
|
282 // operator overloading ---------------------------------------------
|
jpayne@69
|
283
|
jpayne@69
|
284 /**
|
jpayne@69
|
285 * Assignment operator. Sets this iterator to have the same behavior,
|
jpayne@69
|
286 * and iterate over the same text, as the one passed in.
|
jpayne@69
|
287 * @param that instance to be copied.
|
jpayne@69
|
288 * @stable ICU 2.0
|
jpayne@69
|
289 */
|
jpayne@69
|
290 StringSearch & operator=(const StringSearch &that);
|
jpayne@69
|
291
|
jpayne@69
|
292 /**
|
jpayne@69
|
293 * Equality operator.
|
jpayne@69
|
294 * @param that instance to be compared.
|
jpayne@69
|
295 * @return TRUE if both instances have the same attributes,
|
jpayne@69
|
296 * breakiterators, collators and iterate over the same text
|
jpayne@69
|
297 * while looking for the same pattern.
|
jpayne@69
|
298 * @stable ICU 2.0
|
jpayne@69
|
299 */
|
jpayne@69
|
300 virtual UBool operator==(const SearchIterator &that) const;
|
jpayne@69
|
301
|
jpayne@69
|
302 // public get and set methods ----------------------------------------
|
jpayne@69
|
303
|
jpayne@69
|
304 /**
|
jpayne@69
|
305 * Sets the index to point to the given position, and clears any state
|
jpayne@69
|
306 * that's affected.
|
jpayne@69
|
307 * <p>
|
jpayne@69
|
308 * This method takes the argument index and sets the position in the text
|
jpayne@69
|
309 * string accordingly without checking if the index is pointing to a
|
jpayne@69
|
310 * valid starting point to begin searching.
|
jpayne@69
|
311 * @param position within the text to be set. If position is less
|
jpayne@69
|
312 * than or greater than the text range for searching,
|
jpayne@69
|
313 * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
jpayne@69
|
314 * @param status for errors if it occurs
|
jpayne@69
|
315 * @stable ICU 2.0
|
jpayne@69
|
316 */
|
jpayne@69
|
317 virtual void setOffset(int32_t position, UErrorCode &status);
|
jpayne@69
|
318
|
jpayne@69
|
319 /**
|
jpayne@69
|
320 * Return the current index in the text being searched.
|
jpayne@69
|
321 * If the iteration has gone past the end of the text
|
jpayne@69
|
322 * (or past the beginning for a backwards search), USEARCH_DONE
|
jpayne@69
|
323 * is returned.
|
jpayne@69
|
324 * @return current index in the text being searched.
|
jpayne@69
|
325 * @stable ICU 2.0
|
jpayne@69
|
326 */
|
jpayne@69
|
327 virtual int32_t getOffset(void) const;
|
jpayne@69
|
328
|
jpayne@69
|
329 /**
|
jpayne@69
|
330 * Set the target text to be searched.
|
jpayne@69
|
331 * Text iteration will hence begin at the start of the text string.
|
jpayne@69
|
332 * This method is
|
jpayne@69
|
333 * useful if you want to re-use an iterator to search for the same
|
jpayne@69
|
334 * pattern within a different body of text.
|
jpayne@69
|
335 * @param text text string to be searched
|
jpayne@69
|
336 * @param status for errors if any. If the text length is 0 then an
|
jpayne@69
|
337 * U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
338 * @stable ICU 2.0
|
jpayne@69
|
339 */
|
jpayne@69
|
340 virtual void setText(const UnicodeString &text, UErrorCode &status);
|
jpayne@69
|
341
|
jpayne@69
|
342 /**
|
jpayne@69
|
343 * Set the target text to be searched.
|
jpayne@69
|
344 * Text iteration will hence begin at the start of the text string.
|
jpayne@69
|
345 * This method is
|
jpayne@69
|
346 * useful if you want to re-use an iterator to search for the same
|
jpayne@69
|
347 * pattern within a different body of text.
|
jpayne@69
|
348 * Note: No parsing of the text within the <tt>CharacterIterator</tt>
|
jpayne@69
|
349 * will be done during searching for this version. The block of text
|
jpayne@69
|
350 * in <tt>CharacterIterator</tt> will be used as it is.
|
jpayne@69
|
351 * @param text text string to be searched
|
jpayne@69
|
352 * @param status for errors if any. If the text length is 0 then an
|
jpayne@69
|
353 * U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
354 * @stable ICU 2.0
|
jpayne@69
|
355 */
|
jpayne@69
|
356 virtual void setText(CharacterIterator &text, UErrorCode &status);
|
jpayne@69
|
357
|
jpayne@69
|
358 /**
|
jpayne@69
|
359 * Gets the collator used for the language rules.
|
jpayne@69
|
360 * <p>
|
jpayne@69
|
361 * Caller may modify but <b>must not</b> delete the <tt>RuleBasedCollator</tt>!
|
jpayne@69
|
362 * Modifications to this collator will affect the original collator passed in to
|
jpayne@69
|
363 * the <tt>StringSearch></tt> constructor or to setCollator, if any.
|
jpayne@69
|
364 * @return collator used for string search
|
jpayne@69
|
365 * @stable ICU 2.0
|
jpayne@69
|
366 */
|
jpayne@69
|
367 RuleBasedCollator * getCollator() const;
|
jpayne@69
|
368
|
jpayne@69
|
369 /**
|
jpayne@69
|
370 * Sets the collator used for the language rules. User retains the
|
jpayne@69
|
371 * ownership of this collator, thus the responsibility of deletion lies
|
jpayne@69
|
372 * with the user. The iterator's position will not be changed by this method.
|
jpayne@69
|
373 * @param coll collator
|
jpayne@69
|
374 * @param status for errors if any
|
jpayne@69
|
375 * @stable ICU 2.0
|
jpayne@69
|
376 */
|
jpayne@69
|
377 void setCollator(RuleBasedCollator *coll, UErrorCode &status);
|
jpayne@69
|
378
|
jpayne@69
|
379 /**
|
jpayne@69
|
380 * Sets the pattern used for matching.
|
jpayne@69
|
381 * The iterator's position will not be changed by this method.
|
jpayne@69
|
382 * @param pattern search pattern to be found
|
jpayne@69
|
383 * @param status for errors if any. If the pattern length is 0 then an
|
jpayne@69
|
384 * U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
385 * @stable ICU 2.0
|
jpayne@69
|
386 */
|
jpayne@69
|
387 void setPattern(const UnicodeString &pattern, UErrorCode &status);
|
jpayne@69
|
388
|
jpayne@69
|
389 /**
|
jpayne@69
|
390 * Gets the search pattern.
|
jpayne@69
|
391 * @return pattern used for matching
|
jpayne@69
|
392 * @stable ICU 2.0
|
jpayne@69
|
393 */
|
jpayne@69
|
394 const UnicodeString & getPattern() const;
|
jpayne@69
|
395
|
jpayne@69
|
396 // public methods ----------------------------------------------------
|
jpayne@69
|
397
|
jpayne@69
|
398 /**
|
jpayne@69
|
399 * Reset the iteration.
|
jpayne@69
|
400 * Search will begin at the start of the text string if a forward
|
jpayne@69
|
401 * iteration is initiated before a backwards iteration. Otherwise if
|
jpayne@69
|
402 * a backwards iteration is initiated before a forwards iteration, the
|
jpayne@69
|
403 * search will begin at the end of the text string.
|
jpayne@69
|
404 * @stable ICU 2.0
|
jpayne@69
|
405 */
|
jpayne@69
|
406 virtual void reset();
|
jpayne@69
|
407
|
jpayne@69
|
408 /**
|
jpayne@69
|
409 * Returns a copy of StringSearch with the same behavior, and
|
jpayne@69
|
410 * iterating over the same text, as this one. Note that all data will be
|
jpayne@69
|
411 * replicated, except for the user-specified collator and the
|
jpayne@69
|
412 * breakiterator.
|
jpayne@69
|
413 * @return cloned object
|
jpayne@69
|
414 * @stable ICU 2.0
|
jpayne@69
|
415 */
|
jpayne@69
|
416 virtual StringSearch * safeClone() const;
|
jpayne@69
|
417
|
jpayne@69
|
418 /**
|
jpayne@69
|
419 * ICU "poor man's RTTI", returns a UClassID for the actual class.
|
jpayne@69
|
420 *
|
jpayne@69
|
421 * @stable ICU 2.2
|
jpayne@69
|
422 */
|
jpayne@69
|
423 virtual UClassID getDynamicClassID() const;
|
jpayne@69
|
424
|
jpayne@69
|
425 /**
|
jpayne@69
|
426 * ICU "poor man's RTTI", returns a UClassID for this class.
|
jpayne@69
|
427 *
|
jpayne@69
|
428 * @stable ICU 2.2
|
jpayne@69
|
429 */
|
jpayne@69
|
430 static UClassID U_EXPORT2 getStaticClassID();
|
jpayne@69
|
431
|
jpayne@69
|
432 protected:
|
jpayne@69
|
433
|
jpayne@69
|
434 // protected method -------------------------------------------------
|
jpayne@69
|
435
|
jpayne@69
|
436 /**
|
jpayne@69
|
437 * Search forward for matching text, starting at a given location.
|
jpayne@69
|
438 * Clients should not call this method directly; instead they should
|
jpayne@69
|
439 * call {@link SearchIterator#next }.
|
jpayne@69
|
440 * <p>
|
jpayne@69
|
441 * If a match is found, this method returns the index at which the match
|
jpayne@69
|
442 * starts and calls {@link SearchIterator#setMatchLength } with the number
|
jpayne@69
|
443 * of characters in the target text that make up the match. If no match
|
jpayne@69
|
444 * is found, the method returns <tt>USEARCH_DONE</tt>.
|
jpayne@69
|
445 * <p>
|
jpayne@69
|
446 * The <tt>StringSearch</tt> is adjusted so that its current index
|
jpayne@69
|
447 * (as returned by {@link #getOffset }) is the match position if one was
|
jpayne@69
|
448 * found.
|
jpayne@69
|
449 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
jpayne@69
|
450 * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
|
jpayne@69
|
451 * @param position The index in the target text at which the search
|
jpayne@69
|
452 * starts
|
jpayne@69
|
453 * @param status for errors if any occurs
|
jpayne@69
|
454 * @return The index at which the matched text in the target starts, or
|
jpayne@69
|
455 * USEARCH_DONE if no match was found.
|
jpayne@69
|
456 * @stable ICU 2.0
|
jpayne@69
|
457 */
|
jpayne@69
|
458 virtual int32_t handleNext(int32_t position, UErrorCode &status);
|
jpayne@69
|
459
|
jpayne@69
|
460 /**
|
jpayne@69
|
461 * Search backward for matching text, starting at a given location.
|
jpayne@69
|
462 * Clients should not call this method directly; instead they should call
|
jpayne@69
|
463 * <tt>SearchIterator.previous()</tt>, which this method overrides.
|
jpayne@69
|
464 * <p>
|
jpayne@69
|
465 * If a match is found, this method returns the index at which the match
|
jpayne@69
|
466 * starts and calls {@link SearchIterator#setMatchLength } with the number
|
jpayne@69
|
467 * of characters in the target text that make up the match. If no match
|
jpayne@69
|
468 * is found, the method returns <tt>USEARCH_DONE</tt>.
|
jpayne@69
|
469 * <p>
|
jpayne@69
|
470 * The <tt>StringSearch</tt> is adjusted so that its current index
|
jpayne@69
|
471 * (as returned by {@link #getOffset }) is the match position if one was
|
jpayne@69
|
472 * found.
|
jpayne@69
|
473 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
jpayne@69
|
474 * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
|
jpayne@69
|
475 * @param position The index in the target text at which the search
|
jpayne@69
|
476 * starts.
|
jpayne@69
|
477 * @param status for errors if any occurs
|
jpayne@69
|
478 * @return The index at which the matched text in the target starts, or
|
jpayne@69
|
479 * USEARCH_DONE if no match was found.
|
jpayne@69
|
480 * @stable ICU 2.0
|
jpayne@69
|
481 */
|
jpayne@69
|
482 virtual int32_t handlePrev(int32_t position, UErrorCode &status);
|
jpayne@69
|
483
|
jpayne@69
|
484 private :
|
jpayne@69
|
485 StringSearch(); // default constructor not implemented
|
jpayne@69
|
486
|
jpayne@69
|
487 // private data members ----------------------------------------------
|
jpayne@69
|
488
|
jpayne@69
|
489 /**
|
jpayne@69
|
490 * Pattern text
|
jpayne@69
|
491 * @stable ICU 2.0
|
jpayne@69
|
492 */
|
jpayne@69
|
493 UnicodeString m_pattern_;
|
jpayne@69
|
494 /**
|
jpayne@69
|
495 * String search struct data
|
jpayne@69
|
496 * @stable ICU 2.0
|
jpayne@69
|
497 */
|
jpayne@69
|
498 UStringSearch *m_strsrch_;
|
jpayne@69
|
499
|
jpayne@69
|
500 };
|
jpayne@69
|
501
|
jpayne@69
|
502 U_NAMESPACE_END
|
jpayne@69
|
503
|
jpayne@69
|
504 #endif /* #if !UCONFIG_NO_COLLATION */
|
jpayne@69
|
505
|
jpayne@69
|
506 #endif /* U_SHOW_CPLUSPLUS_API */
|
jpayne@69
|
507
|
jpayne@69
|
508 #endif
|
jpayne@69
|
509
|