jpayne@69
|
1 // © 2016 and later: Unicode, Inc. and others.
|
jpayne@69
|
2 // License & terms of use: http://www.unicode.org/copyright.html
|
jpayne@69
|
3 /*
|
jpayne@69
|
4 **********************************************************************
|
jpayne@69
|
5 * Copyright (C) 2001-2011 IBM and others. All rights reserved.
|
jpayne@69
|
6 **********************************************************************
|
jpayne@69
|
7 * Date Name Description
|
jpayne@69
|
8 * 03/22/2000 helena Creation.
|
jpayne@69
|
9 **********************************************************************
|
jpayne@69
|
10 */
|
jpayne@69
|
11
|
jpayne@69
|
12 #ifndef SEARCH_H
|
jpayne@69
|
13 #define SEARCH_H
|
jpayne@69
|
14
|
jpayne@69
|
15 #include "unicode/utypes.h"
|
jpayne@69
|
16
|
jpayne@69
|
17 #if U_SHOW_CPLUSPLUS_API
|
jpayne@69
|
18
|
jpayne@69
|
19 /**
|
jpayne@69
|
20 * \file
|
jpayne@69
|
21 * \brief C++ API: SearchIterator object.
|
jpayne@69
|
22 */
|
jpayne@69
|
23
|
jpayne@69
|
24 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
jpayne@69
|
25
|
jpayne@69
|
26 #include "unicode/uobject.h"
|
jpayne@69
|
27 #include "unicode/unistr.h"
|
jpayne@69
|
28 #include "unicode/chariter.h"
|
jpayne@69
|
29 #include "unicode/brkiter.h"
|
jpayne@69
|
30 #include "unicode/usearch.h"
|
jpayne@69
|
31
|
jpayne@69
|
32 /**
|
jpayne@69
|
33 * @stable ICU 2.0
|
jpayne@69
|
34 */
|
jpayne@69
|
35 struct USearch;
|
jpayne@69
|
36 /**
|
jpayne@69
|
37 * @stable ICU 2.0
|
jpayne@69
|
38 */
|
jpayne@69
|
39 typedef struct USearch USearch;
|
jpayne@69
|
40
|
jpayne@69
|
41 U_NAMESPACE_BEGIN
|
jpayne@69
|
42
|
jpayne@69
|
43 /**
|
jpayne@69
|
44 *
|
jpayne@69
|
45 * <tt>SearchIterator</tt> is an abstract base class that provides
|
jpayne@69
|
46 * methods to search for a pattern within a text string. Instances of
|
jpayne@69
|
47 * <tt>SearchIterator</tt> maintain a current position and scans over the
|
jpayne@69
|
48 * target text, returning the indices the pattern is matched and the length
|
jpayne@69
|
49 * of each match.
|
jpayne@69
|
50 * <p>
|
jpayne@69
|
51 * <tt>SearchIterator</tt> defines a protocol for text searching.
|
jpayne@69
|
52 * Subclasses provide concrete implementations of various search algorithms.
|
jpayne@69
|
53 * For example, <tt>StringSearch</tt> implements language-sensitive pattern
|
jpayne@69
|
54 * matching based on the comparison rules defined in a
|
jpayne@69
|
55 * <tt>RuleBasedCollator</tt> object.
|
jpayne@69
|
56 * <p>
|
jpayne@69
|
57 * Other options for searching includes using a BreakIterator to restrict
|
jpayne@69
|
58 * the points at which matches are detected.
|
jpayne@69
|
59 * <p>
|
jpayne@69
|
60 * <tt>SearchIterator</tt> provides an API that is similar to that of
|
jpayne@69
|
61 * other text iteration classes such as <tt>BreakIterator</tt>. Using
|
jpayne@69
|
62 * this class, it is easy to scan through text looking for all occurances of
|
jpayne@69
|
63 * a given pattern. The following example uses a <tt>StringSearch</tt>
|
jpayne@69
|
64 * object to find all instances of "fox" in the target string. Any other
|
jpayne@69
|
65 * subclass of <tt>SearchIterator</tt> can be used in an identical
|
jpayne@69
|
66 * manner.
|
jpayne@69
|
67 * <pre><code>
|
jpayne@69
|
68 * UnicodeString target("The quick brown fox jumped over the lazy fox");
|
jpayne@69
|
69 * UnicodeString pattern("fox");
|
jpayne@69
|
70 *
|
jpayne@69
|
71 * SearchIterator *iter = new StringSearch(pattern, target);
|
jpayne@69
|
72 * UErrorCode error = U_ZERO_ERROR;
|
jpayne@69
|
73 * for (int pos = iter->first(error); pos != USEARCH_DONE;
|
jpayne@69
|
74 * pos = iter->next(error)) {
|
jpayne@69
|
75 * printf("Found match at %d pos, length is %d\n", pos, iter.getMatchedLength());
|
jpayne@69
|
76 * }
|
jpayne@69
|
77 * </code></pre>
|
jpayne@69
|
78 *
|
jpayne@69
|
79 * @see StringSearch
|
jpayne@69
|
80 * @see RuleBasedCollator
|
jpayne@69
|
81 */
|
jpayne@69
|
82 class U_I18N_API SearchIterator : public UObject {
|
jpayne@69
|
83
|
jpayne@69
|
84 public:
|
jpayne@69
|
85
|
jpayne@69
|
86 // public constructors and destructors -------------------------------
|
jpayne@69
|
87
|
jpayne@69
|
88 /**
|
jpayne@69
|
89 * Copy constructor that creates a SearchIterator instance with the same
|
jpayne@69
|
90 * behavior, and iterating over the same text.
|
jpayne@69
|
91 * @param other the SearchIterator instance to be copied.
|
jpayne@69
|
92 * @stable ICU 2.0
|
jpayne@69
|
93 */
|
jpayne@69
|
94 SearchIterator(const SearchIterator &other);
|
jpayne@69
|
95
|
jpayne@69
|
96 /**
|
jpayne@69
|
97 * Destructor. Cleans up the search iterator data struct.
|
jpayne@69
|
98 * @stable ICU 2.0
|
jpayne@69
|
99 */
|
jpayne@69
|
100 virtual ~SearchIterator();
|
jpayne@69
|
101
|
jpayne@69
|
102 // public get and set methods ----------------------------------------
|
jpayne@69
|
103
|
jpayne@69
|
104 /**
|
jpayne@69
|
105 * Sets the index to point to the given position, and clears any state
|
jpayne@69
|
106 * that's affected.
|
jpayne@69
|
107 * <p>
|
jpayne@69
|
108 * This method takes the argument index and sets the position in the text
|
jpayne@69
|
109 * string accordingly without checking if the index is pointing to a
|
jpayne@69
|
110 * valid starting point to begin searching.
|
jpayne@69
|
111 * @param position within the text to be set. If position is less
|
jpayne@69
|
112 * than or greater than the text range for searching,
|
jpayne@69
|
113 * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
jpayne@69
|
114 * @param status for errors if it occurs
|
jpayne@69
|
115 * @stable ICU 2.0
|
jpayne@69
|
116 */
|
jpayne@69
|
117 virtual void setOffset(int32_t position, UErrorCode &status) = 0;
|
jpayne@69
|
118
|
jpayne@69
|
119 /**
|
jpayne@69
|
120 * Return the current index in the text being searched.
|
jpayne@69
|
121 * If the iteration has gone past the end of the text
|
jpayne@69
|
122 * (or past the beginning for a backwards search), USEARCH_DONE
|
jpayne@69
|
123 * is returned.
|
jpayne@69
|
124 * @return current index in the text being searched.
|
jpayne@69
|
125 * @stable ICU 2.0
|
jpayne@69
|
126 */
|
jpayne@69
|
127 virtual int32_t getOffset(void) const = 0;
|
jpayne@69
|
128
|
jpayne@69
|
129 /**
|
jpayne@69
|
130 * Sets the text searching attributes located in the enum
|
jpayne@69
|
131 * USearchAttribute with values from the enum USearchAttributeValue.
|
jpayne@69
|
132 * USEARCH_DEFAULT can be used for all attributes for resetting.
|
jpayne@69
|
133 * @param attribute text attribute (enum USearchAttribute) to be set
|
jpayne@69
|
134 * @param value text attribute value
|
jpayne@69
|
135 * @param status for errors if it occurs
|
jpayne@69
|
136 * @stable ICU 2.0
|
jpayne@69
|
137 */
|
jpayne@69
|
138 void setAttribute(USearchAttribute attribute,
|
jpayne@69
|
139 USearchAttributeValue value,
|
jpayne@69
|
140 UErrorCode &status);
|
jpayne@69
|
141
|
jpayne@69
|
142 /**
|
jpayne@69
|
143 * Gets the text searching attributes
|
jpayne@69
|
144 * @param attribute text attribute (enum USearchAttribute) to be retrieve
|
jpayne@69
|
145 * @return text attribute value
|
jpayne@69
|
146 * @stable ICU 2.0
|
jpayne@69
|
147 */
|
jpayne@69
|
148 USearchAttributeValue getAttribute(USearchAttribute attribute) const;
|
jpayne@69
|
149
|
jpayne@69
|
150 /**
|
jpayne@69
|
151 * Returns the index to the match in the text string that was searched.
|
jpayne@69
|
152 * This call returns a valid result only after a successful call to
|
jpayne@69
|
153 * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
|
jpayne@69
|
154 * Just after construction, or after a searching method returns
|
jpayne@69
|
155 * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
|
jpayne@69
|
156 * <p>
|
jpayne@69
|
157 * Use getMatchedLength to get the matched string length.
|
jpayne@69
|
158 * @return index of a substring within the text string that is being
|
jpayne@69
|
159 * searched.
|
jpayne@69
|
160 * @see #first
|
jpayne@69
|
161 * @see #next
|
jpayne@69
|
162 * @see #previous
|
jpayne@69
|
163 * @see #last
|
jpayne@69
|
164 * @stable ICU 2.0
|
jpayne@69
|
165 */
|
jpayne@69
|
166 int32_t getMatchedStart(void) const;
|
jpayne@69
|
167
|
jpayne@69
|
168 /**
|
jpayne@69
|
169 * Returns the length of text in the string which matches the search
|
jpayne@69
|
170 * pattern. This call returns a valid result only after a successful call
|
jpayne@69
|
171 * to <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
|
jpayne@69
|
172 * Just after construction, or after a searching method returns
|
jpayne@69
|
173 * <tt>USEARCH_DONE</tt>, this method will return 0.
|
jpayne@69
|
174 * @return The length of the match in the target text, or 0 if there
|
jpayne@69
|
175 * is no match currently.
|
jpayne@69
|
176 * @see #first
|
jpayne@69
|
177 * @see #next
|
jpayne@69
|
178 * @see #previous
|
jpayne@69
|
179 * @see #last
|
jpayne@69
|
180 * @stable ICU 2.0
|
jpayne@69
|
181 */
|
jpayne@69
|
182 int32_t getMatchedLength(void) const;
|
jpayne@69
|
183
|
jpayne@69
|
184 /**
|
jpayne@69
|
185 * Returns the text that was matched by the most recent call to
|
jpayne@69
|
186 * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
|
jpayne@69
|
187 * If the iterator is not pointing at a valid match (e.g. just after
|
jpayne@69
|
188 * construction or after <tt>USEARCH_DONE</tt> has been returned,
|
jpayne@69
|
189 * returns an empty string.
|
jpayne@69
|
190 * @param result stores the matched string or an empty string if a match
|
jpayne@69
|
191 * is not found.
|
jpayne@69
|
192 * @see #first
|
jpayne@69
|
193 * @see #next
|
jpayne@69
|
194 * @see #previous
|
jpayne@69
|
195 * @see #last
|
jpayne@69
|
196 * @stable ICU 2.0
|
jpayne@69
|
197 */
|
jpayne@69
|
198 void getMatchedText(UnicodeString &result) const;
|
jpayne@69
|
199
|
jpayne@69
|
200 /**
|
jpayne@69
|
201 * Set the BreakIterator that will be used to restrict the points
|
jpayne@69
|
202 * at which matches are detected. The user is responsible for deleting
|
jpayne@69
|
203 * the breakiterator.
|
jpayne@69
|
204 * @param breakiter A BreakIterator that will be used to restrict the
|
jpayne@69
|
205 * points at which matches are detected. If a match is
|
jpayne@69
|
206 * found, but the match's start or end index is not a
|
jpayne@69
|
207 * boundary as determined by the <tt>BreakIterator</tt>,
|
jpayne@69
|
208 * the match will be rejected and another will be searched
|
jpayne@69
|
209 * for. If this parameter is <tt>NULL</tt>, no break
|
jpayne@69
|
210 * detection is attempted.
|
jpayne@69
|
211 * @param status for errors if it occurs
|
jpayne@69
|
212 * @see BreakIterator
|
jpayne@69
|
213 * @stable ICU 2.0
|
jpayne@69
|
214 */
|
jpayne@69
|
215 void setBreakIterator(BreakIterator *breakiter, UErrorCode &status);
|
jpayne@69
|
216
|
jpayne@69
|
217 /**
|
jpayne@69
|
218 * Returns the BreakIterator that is used to restrict the points at
|
jpayne@69
|
219 * which matches are detected. This will be the same object that was
|
jpayne@69
|
220 * passed to the constructor or to <tt>setBreakIterator</tt>.
|
jpayne@69
|
221 * Note that <tt>NULL</tt> is a legal value; it means that break
|
jpayne@69
|
222 * detection should not be attempted.
|
jpayne@69
|
223 * @return BreakIterator used to restrict matchings.
|
jpayne@69
|
224 * @see #setBreakIterator
|
jpayne@69
|
225 * @stable ICU 2.0
|
jpayne@69
|
226 */
|
jpayne@69
|
227 const BreakIterator * getBreakIterator(void) const;
|
jpayne@69
|
228
|
jpayne@69
|
229 /**
|
jpayne@69
|
230 * Set the string text to be searched. Text iteration will hence begin at
|
jpayne@69
|
231 * the start of the text string. This method is useful if you want to
|
jpayne@69
|
232 * re-use an iterator to search for the same pattern within a different
|
jpayne@69
|
233 * body of text. The user is responsible for deleting the text.
|
jpayne@69
|
234 * @param text string to be searched.
|
jpayne@69
|
235 * @param status for errors. If the text length is 0,
|
jpayne@69
|
236 * an U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
237 * @stable ICU 2.0
|
jpayne@69
|
238 */
|
jpayne@69
|
239 virtual void setText(const UnicodeString &text, UErrorCode &status);
|
jpayne@69
|
240
|
jpayne@69
|
241 /**
|
jpayne@69
|
242 * Set the string text to be searched. Text iteration will hence begin at
|
jpayne@69
|
243 * the start of the text string. This method is useful if you want to
|
jpayne@69
|
244 * re-use an iterator to search for the same pattern within a different
|
jpayne@69
|
245 * body of text.
|
jpayne@69
|
246 * <p>
|
jpayne@69
|
247 * Note: No parsing of the text within the <tt>CharacterIterator</tt>
|
jpayne@69
|
248 * will be done during searching for this version. The block of text
|
jpayne@69
|
249 * in <tt>CharacterIterator</tt> will be used as it is.
|
jpayne@69
|
250 * The user is responsible for deleting the text.
|
jpayne@69
|
251 * @param text string iterator to be searched.
|
jpayne@69
|
252 * @param status for errors if any. If the text length is 0 then an
|
jpayne@69
|
253 * U_ILLEGAL_ARGUMENT_ERROR is returned.
|
jpayne@69
|
254 * @stable ICU 2.0
|
jpayne@69
|
255 */
|
jpayne@69
|
256 virtual void setText(CharacterIterator &text, UErrorCode &status);
|
jpayne@69
|
257
|
jpayne@69
|
258 /**
|
jpayne@69
|
259 * Return the string text to be searched.
|
jpayne@69
|
260 * @return text string to be searched.
|
jpayne@69
|
261 * @stable ICU 2.0
|
jpayne@69
|
262 */
|
jpayne@69
|
263 const UnicodeString & getText(void) const;
|
jpayne@69
|
264
|
jpayne@69
|
265 // operator overloading ----------------------------------------------
|
jpayne@69
|
266
|
jpayne@69
|
267 /**
|
jpayne@69
|
268 * Equality operator.
|
jpayne@69
|
269 * @param that SearchIterator instance to be compared.
|
jpayne@69
|
270 * @return TRUE if both BreakIterators are of the same class, have the
|
jpayne@69
|
271 * same behavior, terates over the same text and have the same
|
jpayne@69
|
272 * attributes. FALSE otherwise.
|
jpayne@69
|
273 * @stable ICU 2.0
|
jpayne@69
|
274 */
|
jpayne@69
|
275 virtual UBool operator==(const SearchIterator &that) const;
|
jpayne@69
|
276
|
jpayne@69
|
277 /**
|
jpayne@69
|
278 * Not-equal operator.
|
jpayne@69
|
279 * @param that SearchIterator instance to be compared.
|
jpayne@69
|
280 * @return FALSE if operator== returns TRUE, and vice versa.
|
jpayne@69
|
281 * @stable ICU 2.0
|
jpayne@69
|
282 */
|
jpayne@69
|
283 UBool operator!=(const SearchIterator &that) const;
|
jpayne@69
|
284
|
jpayne@69
|
285 // public methods ----------------------------------------------------
|
jpayne@69
|
286
|
jpayne@69
|
287 /**
|
jpayne@69
|
288 * Returns a copy of SearchIterator with the same behavior, and
|
jpayne@69
|
289 * iterating over the same text, as this one. Note that all data will be
|
jpayne@69
|
290 * replicated, except for the text string to be searched.
|
jpayne@69
|
291 * @return cloned object
|
jpayne@69
|
292 * @stable ICU 2.0
|
jpayne@69
|
293 */
|
jpayne@69
|
294 virtual SearchIterator* safeClone(void) const = 0;
|
jpayne@69
|
295
|
jpayne@69
|
296 /**
|
jpayne@69
|
297 * Returns the first index at which the string text matches the search
|
jpayne@69
|
298 * pattern. The iterator is adjusted so that its current index (as
|
jpayne@69
|
299 * returned by <tt>getOffset</tt>) is the match position if one
|
jpayne@69
|
300 * was found.
|
jpayne@69
|
301 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
jpayne@69
|
302 * the iterator will be adjusted to the index USEARCH_DONE
|
jpayne@69
|
303 * @param status for errors if it occurs
|
jpayne@69
|
304 * @return The character index of the first match, or
|
jpayne@69
|
305 * <tt>USEARCH_DONE</tt> if there are no matches.
|
jpayne@69
|
306 * @see #getOffset
|
jpayne@69
|
307 * @stable ICU 2.0
|
jpayne@69
|
308 */
|
jpayne@69
|
309 int32_t first(UErrorCode &status);
|
jpayne@69
|
310
|
jpayne@69
|
311 /**
|
jpayne@69
|
312 * Returns the first index equal or greater than <tt>position</tt> at which the
|
jpayne@69
|
313 * string text matches the search pattern. The iterator is adjusted so
|
jpayne@69
|
314 * that its current index (as returned by <tt>getOffset</tt>) is the
|
jpayne@69
|
315 * match position if one was found.
|
jpayne@69
|
316 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and the
|
jpayne@69
|
317 * iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
|
jpayne@69
|
318 * @param position where search if to start from. If position is less
|
jpayne@69
|
319 * than or greater than the text range for searching,
|
jpayne@69
|
320 * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
jpayne@69
|
321 * @param status for errors if it occurs
|
jpayne@69
|
322 * @return The character index of the first match following
|
jpayne@69
|
323 * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no
|
jpayne@69
|
324 * matches.
|
jpayne@69
|
325 * @see #getOffset
|
jpayne@69
|
326 * @stable ICU 2.0
|
jpayne@69
|
327 */
|
jpayne@69
|
328 int32_t following(int32_t position, UErrorCode &status);
|
jpayne@69
|
329
|
jpayne@69
|
330 /**
|
jpayne@69
|
331 * Returns the last index in the target text at which it matches the
|
jpayne@69
|
332 * search pattern. The iterator is adjusted so that its current index
|
jpayne@69
|
333 * (as returned by <tt>getOffset</tt>) is the match position if one was
|
jpayne@69
|
334 * found.
|
jpayne@69
|
335 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
jpayne@69
|
336 * the iterator will be adjusted to the index USEARCH_DONE.
|
jpayne@69
|
337 * @param status for errors if it occurs
|
jpayne@69
|
338 * @return The index of the first match, or <tt>USEARCH_DONE</tt> if
|
jpayne@69
|
339 * there are no matches.
|
jpayne@69
|
340 * @see #getOffset
|
jpayne@69
|
341 * @stable ICU 2.0
|
jpayne@69
|
342 */
|
jpayne@69
|
343 int32_t last(UErrorCode &status);
|
jpayne@69
|
344
|
jpayne@69
|
345 /**
|
jpayne@69
|
346 * Returns the first index less than <tt>position</tt> at which the string
|
jpayne@69
|
347 * text matches the search pattern. The iterator is adjusted so that its
|
jpayne@69
|
348 * current index (as returned by <tt>getOffset</tt>) is the match
|
jpayne@69
|
349 * position if one was found. If a match is not found,
|
jpayne@69
|
350 * <tt>USEARCH_DONE</tt> will be returned and the iterator will be
|
jpayne@69
|
351 * adjusted to the index USEARCH_DONE
|
jpayne@69
|
352 * <p>
|
jpayne@69
|
353 * When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the
|
jpayne@69
|
354 * result match is always less than <tt>position</tt>.
|
jpayne@69
|
355 * When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across
|
jpayne@69
|
356 * <tt>position</tt>.
|
jpayne@69
|
357 *
|
jpayne@69
|
358 * @param position where search is to start from. If position is less
|
jpayne@69
|
359 * than or greater than the text range for searching,
|
jpayne@69
|
360 * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
jpayne@69
|
361 * @param status for errors if it occurs
|
jpayne@69
|
362 * @return The character index of the first match preceding
|
jpayne@69
|
363 * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are
|
jpayne@69
|
364 * no matches.
|
jpayne@69
|
365 * @see #getOffset
|
jpayne@69
|
366 * @stable ICU 2.0
|
jpayne@69
|
367 */
|
jpayne@69
|
368 int32_t preceding(int32_t position, UErrorCode &status);
|
jpayne@69
|
369
|
jpayne@69
|
370 /**
|
jpayne@69
|
371 * Returns the index of the next point at which the text matches the
|
jpayne@69
|
372 * search pattern, starting from the current position
|
jpayne@69
|
373 * The iterator is adjusted so that its current index (as returned by
|
jpayne@69
|
374 * <tt>getOffset</tt>) is the match position if one was found.
|
jpayne@69
|
375 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
jpayne@69
|
376 * the iterator will be adjusted to a position after the end of the text
|
jpayne@69
|
377 * string.
|
jpayne@69
|
378 * @param status for errors if it occurs
|
jpayne@69
|
379 * @return The index of the next match after the current position,
|
jpayne@69
|
380 * or <tt>USEARCH_DONE</tt> if there are no more matches.
|
jpayne@69
|
381 * @see #getOffset
|
jpayne@69
|
382 * @stable ICU 2.0
|
jpayne@69
|
383 */
|
jpayne@69
|
384 int32_t next(UErrorCode &status);
|
jpayne@69
|
385
|
jpayne@69
|
386 /**
|
jpayne@69
|
387 * Returns the index of the previous point at which the string text
|
jpayne@69
|
388 * matches the search pattern, starting at the current position.
|
jpayne@69
|
389 * The iterator is adjusted so that its current index (as returned by
|
jpayne@69
|
390 * <tt>getOffset</tt>) is the match position if one was found.
|
jpayne@69
|
391 * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
jpayne@69
|
392 * the iterator will be adjusted to the index USEARCH_DONE
|
jpayne@69
|
393 * @param status for errors if it occurs
|
jpayne@69
|
394 * @return The index of the previous match before the current position,
|
jpayne@69
|
395 * or <tt>USEARCH_DONE</tt> if there are no more matches.
|
jpayne@69
|
396 * @see #getOffset
|
jpayne@69
|
397 * @stable ICU 2.0
|
jpayne@69
|
398 */
|
jpayne@69
|
399 int32_t previous(UErrorCode &status);
|
jpayne@69
|
400
|
jpayne@69
|
401 /**
|
jpayne@69
|
402 * Resets the iteration.
|
jpayne@69
|
403 * Search will begin at the start of the text string if a forward
|
jpayne@69
|
404 * iteration is initiated before a backwards iteration. Otherwise if a
|
jpayne@69
|
405 * backwards iteration is initiated before a forwards iteration, the
|
jpayne@69
|
406 * search will begin at the end of the text string.
|
jpayne@69
|
407 * @stable ICU 2.0
|
jpayne@69
|
408 */
|
jpayne@69
|
409 virtual void reset();
|
jpayne@69
|
410
|
jpayne@69
|
411 protected:
|
jpayne@69
|
412 // protected data members ---------------------------------------------
|
jpayne@69
|
413
|
jpayne@69
|
414 /**
|
jpayne@69
|
415 * C search data struct
|
jpayne@69
|
416 * @stable ICU 2.0
|
jpayne@69
|
417 */
|
jpayne@69
|
418 USearch *m_search_;
|
jpayne@69
|
419
|
jpayne@69
|
420 /**
|
jpayne@69
|
421 * Break iterator.
|
jpayne@69
|
422 * Currently the C++ breakiterator does not have getRules etc to reproduce
|
jpayne@69
|
423 * another in C. Hence we keep the original around and do the verification
|
jpayne@69
|
424 * at the end of the match. The user is responsible for deleting this
|
jpayne@69
|
425 * break iterator.
|
jpayne@69
|
426 * @stable ICU 2.0
|
jpayne@69
|
427 */
|
jpayne@69
|
428 BreakIterator *m_breakiterator_;
|
jpayne@69
|
429
|
jpayne@69
|
430 /**
|
jpayne@69
|
431 * Unicode string version of the search text
|
jpayne@69
|
432 * @stable ICU 2.0
|
jpayne@69
|
433 */
|
jpayne@69
|
434 UnicodeString m_text_;
|
jpayne@69
|
435
|
jpayne@69
|
436 // protected constructors and destructors -----------------------------
|
jpayne@69
|
437
|
jpayne@69
|
438 /**
|
jpayne@69
|
439 * Default constructor.
|
jpayne@69
|
440 * Initializes data to the default values.
|
jpayne@69
|
441 * @stable ICU 2.0
|
jpayne@69
|
442 */
|
jpayne@69
|
443 SearchIterator();
|
jpayne@69
|
444
|
jpayne@69
|
445 /**
|
jpayne@69
|
446 * Constructor for use by subclasses.
|
jpayne@69
|
447 * @param text The target text to be searched.
|
jpayne@69
|
448 * @param breakiter A {@link BreakIterator} that is used to restrict the
|
jpayne@69
|
449 * points at which matches are detected. If
|
jpayne@69
|
450 * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a
|
jpayne@69
|
451 * match, but the match's start or end index is not a
|
jpayne@69
|
452 * boundary as determined by the <tt>BreakIterator</tt>,
|
jpayne@69
|
453 * the match is rejected and <tt>handleNext</tt> or
|
jpayne@69
|
454 * <tt>handlePrev</tt> is called again. If this parameter
|
jpayne@69
|
455 * is <tt>NULL</tt>, no break detection is attempted.
|
jpayne@69
|
456 * @see #handleNext
|
jpayne@69
|
457 * @see #handlePrev
|
jpayne@69
|
458 * @stable ICU 2.0
|
jpayne@69
|
459 */
|
jpayne@69
|
460 SearchIterator(const UnicodeString &text,
|
jpayne@69
|
461 BreakIterator *breakiter = NULL);
|
jpayne@69
|
462
|
jpayne@69
|
463 /**
|
jpayne@69
|
464 * Constructor for use by subclasses.
|
jpayne@69
|
465 * <p>
|
jpayne@69
|
466 * Note: No parsing of the text within the <tt>CharacterIterator</tt>
|
jpayne@69
|
467 * will be done during searching for this version. The block of text
|
jpayne@69
|
468 * in <tt>CharacterIterator</tt> will be used as it is.
|
jpayne@69
|
469 * @param text The target text to be searched.
|
jpayne@69
|
470 * @param breakiter A {@link BreakIterator} that is used to restrict the
|
jpayne@69
|
471 * points at which matches are detected. If
|
jpayne@69
|
472 * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a
|
jpayne@69
|
473 * match, but the match's start or end index is not a
|
jpayne@69
|
474 * boundary as determined by the <tt>BreakIterator</tt>,
|
jpayne@69
|
475 * the match is rejected and <tt>handleNext</tt> or
|
jpayne@69
|
476 * <tt>handlePrev</tt> is called again. If this parameter
|
jpayne@69
|
477 * is <tt>NULL</tt>, no break detection is attempted.
|
jpayne@69
|
478 * @see #handleNext
|
jpayne@69
|
479 * @see #handlePrev
|
jpayne@69
|
480 * @stable ICU 2.0
|
jpayne@69
|
481 */
|
jpayne@69
|
482 SearchIterator(CharacterIterator &text, BreakIterator *breakiter = NULL);
|
jpayne@69
|
483
|
jpayne@69
|
484 // protected methods --------------------------------------------------
|
jpayne@69
|
485
|
jpayne@69
|
486 /**
|
jpayne@69
|
487 * Assignment operator. Sets this iterator to have the same behavior,
|
jpayne@69
|
488 * and iterate over the same text, as the one passed in.
|
jpayne@69
|
489 * @param that instance to be copied.
|
jpayne@69
|
490 * @stable ICU 2.0
|
jpayne@69
|
491 */
|
jpayne@69
|
492 SearchIterator & operator=(const SearchIterator &that);
|
jpayne@69
|
493
|
jpayne@69
|
494 /**
|
jpayne@69
|
495 * Abstract method which subclasses override to provide the mechanism
|
jpayne@69
|
496 * for finding the next match in the target text. This allows different
|
jpayne@69
|
497 * subclasses to provide different search algorithms.
|
jpayne@69
|
498 * <p>
|
jpayne@69
|
499 * If a match is found, the implementation should return the index at
|
jpayne@69
|
500 * which the match starts and should call
|
jpayne@69
|
501 * <tt>setMatchLength</tt> with the number of characters
|
jpayne@69
|
502 * in the target text that make up the match. If no match is found, the
|
jpayne@69
|
503 * method should return USEARCH_DONE.
|
jpayne@69
|
504 * <p>
|
jpayne@69
|
505 * @param position The index in the target text at which the search
|
jpayne@69
|
506 * should start.
|
jpayne@69
|
507 * @param status for error codes if it occurs.
|
jpayne@69
|
508 * @return index at which the match starts, else if match is not found
|
jpayne@69
|
509 * USEARCH_DONE is returned
|
jpayne@69
|
510 * @see #setMatchLength
|
jpayne@69
|
511 * @stable ICU 2.0
|
jpayne@69
|
512 */
|
jpayne@69
|
513 virtual int32_t handleNext(int32_t position, UErrorCode &status)
|
jpayne@69
|
514 = 0;
|
jpayne@69
|
515
|
jpayne@69
|
516 /**
|
jpayne@69
|
517 * Abstract method which subclasses override to provide the mechanism for
|
jpayne@69
|
518 * finding the previous match in the target text. This allows different
|
jpayne@69
|
519 * subclasses to provide different search algorithms.
|
jpayne@69
|
520 * <p>
|
jpayne@69
|
521 * If a match is found, the implementation should return the index at
|
jpayne@69
|
522 * which the match starts and should call
|
jpayne@69
|
523 * <tt>setMatchLength</tt> with the number of characters
|
jpayne@69
|
524 * in the target text that make up the match. If no match is found, the
|
jpayne@69
|
525 * method should return USEARCH_DONE.
|
jpayne@69
|
526 * <p>
|
jpayne@69
|
527 * @param position The index in the target text at which the search
|
jpayne@69
|
528 * should start.
|
jpayne@69
|
529 * @param status for error codes if it occurs.
|
jpayne@69
|
530 * @return index at which the match starts, else if match is not found
|
jpayne@69
|
531 * USEARCH_DONE is returned
|
jpayne@69
|
532 * @see #setMatchLength
|
jpayne@69
|
533 * @stable ICU 2.0
|
jpayne@69
|
534 */
|
jpayne@69
|
535 virtual int32_t handlePrev(int32_t position, UErrorCode &status)
|
jpayne@69
|
536 = 0;
|
jpayne@69
|
537
|
jpayne@69
|
538 /**
|
jpayne@69
|
539 * Sets the length of the currently matched string in the text string to
|
jpayne@69
|
540 * be searched.
|
jpayne@69
|
541 * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
|
jpayne@69
|
542 * methods should call this when they find a match in the target text.
|
jpayne@69
|
543 * @param length length of the matched text.
|
jpayne@69
|
544 * @see #handleNext
|
jpayne@69
|
545 * @see #handlePrev
|
jpayne@69
|
546 * @stable ICU 2.0
|
jpayne@69
|
547 */
|
jpayne@69
|
548 virtual void setMatchLength(int32_t length);
|
jpayne@69
|
549
|
jpayne@69
|
550 /**
|
jpayne@69
|
551 * Sets the offset of the currently matched string in the text string to
|
jpayne@69
|
552 * be searched.
|
jpayne@69
|
553 * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
|
jpayne@69
|
554 * methods should call this when they find a match in the target text.
|
jpayne@69
|
555 * @param position start offset of the matched text.
|
jpayne@69
|
556 * @see #handleNext
|
jpayne@69
|
557 * @see #handlePrev
|
jpayne@69
|
558 * @stable ICU 2.0
|
jpayne@69
|
559 */
|
jpayne@69
|
560 virtual void setMatchStart(int32_t position);
|
jpayne@69
|
561
|
jpayne@69
|
562 /**
|
jpayne@69
|
563 * sets match not found
|
jpayne@69
|
564 * @stable ICU 2.0
|
jpayne@69
|
565 */
|
jpayne@69
|
566 void setMatchNotFound();
|
jpayne@69
|
567 };
|
jpayne@69
|
568
|
jpayne@69
|
569 inline UBool SearchIterator::operator!=(const SearchIterator &that) const
|
jpayne@69
|
570 {
|
jpayne@69
|
571 return !operator==(that);
|
jpayne@69
|
572 }
|
jpayne@69
|
573 U_NAMESPACE_END
|
jpayne@69
|
574
|
jpayne@69
|
575 #endif /* #if !UCONFIG_NO_COLLATION */
|
jpayne@69
|
576
|
jpayne@69
|
577 #endif /* U_SHOW_CPLUSPLUS_API */
|
jpayne@69
|
578
|
jpayne@69
|
579 #endif
|
jpayne@69
|
580
|