Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/unicode/unimatch.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 // © 2016 and later: Unicode, Inc. and others. | |
2 // License & terms of use: http://www.unicode.org/copyright.html | |
3 /* | |
4 * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. | |
5 ********************************************************************** | |
6 * Date Name Description | |
7 * 07/18/01 aliu Creation. | |
8 ********************************************************************** | |
9 */ | |
10 #ifndef UNIMATCH_H | |
11 #define UNIMATCH_H | |
12 | |
13 #include "unicode/utypes.h" | |
14 | |
15 /** | |
16 * \file | |
17 * \brief C++ API: Unicode Matcher | |
18 */ | |
19 | |
20 #if U_SHOW_CPLUSPLUS_API | |
21 | |
22 U_NAMESPACE_BEGIN | |
23 | |
24 class Replaceable; | |
25 class UnicodeString; | |
26 class UnicodeSet; | |
27 | |
28 /** | |
29 * Constants returned by <code>UnicodeMatcher::matches()</code> | |
30 * indicating the degree of match. | |
31 * @stable ICU 2.4 | |
32 */ | |
33 enum UMatchDegree { | |
34 /** | |
35 * Constant returned by <code>matches()</code> indicating a | |
36 * mismatch between the text and this matcher. The text contains | |
37 * a character which does not match, or the text does not contain | |
38 * all desired characters for a non-incremental match. | |
39 * @stable ICU 2.4 | |
40 */ | |
41 U_MISMATCH, | |
42 | |
43 /** | |
44 * Constant returned by <code>matches()</code> indicating a | |
45 * partial match between the text and this matcher. This value is | |
46 * only returned for incremental match operations. All characters | |
47 * of the text match, but more characters are required for a | |
48 * complete match. Alternatively, for variable-length matchers, | |
49 * all characters of the text match, and if more characters were | |
50 * supplied at limit, they might also match. | |
51 * @stable ICU 2.4 | |
52 */ | |
53 U_PARTIAL_MATCH, | |
54 | |
55 /** | |
56 * Constant returned by <code>matches()</code> indicating a | |
57 * complete match between the text and this matcher. For an | |
58 * incremental variable-length match, this value is returned if | |
59 * the given text matches, and it is known that additional | |
60 * characters would not alter the extent of the match. | |
61 * @stable ICU 2.4 | |
62 */ | |
63 U_MATCH | |
64 }; | |
65 | |
66 /** | |
67 * <code>UnicodeMatcher</code> defines a protocol for objects that can | |
68 * match a range of characters in a Replaceable string. | |
69 * @stable ICU 2.4 | |
70 */ | |
71 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { | |
72 | |
73 public: | |
74 /** | |
75 * Destructor. | |
76 * @stable ICU 2.4 | |
77 */ | |
78 virtual ~UnicodeMatcher(); | |
79 | |
80 /** | |
81 * Return a UMatchDegree value indicating the degree of match for | |
82 * the given text at the given offset. Zero, one, or more | |
83 * characters may be matched. | |
84 * | |
85 * Matching in the forward direction is indicated by limit > | |
86 * offset. Characters from offset forwards to limit-1 will be | |
87 * considered for matching. | |
88 * | |
89 * Matching in the reverse direction is indicated by limit < | |
90 * offset. Characters from offset backwards to limit+1 will be | |
91 * considered for matching. | |
92 * | |
93 * If limit == offset then the only match possible is a zero | |
94 * character match (which subclasses may implement if desired). | |
95 * | |
96 * As a side effect, advance the offset parameter to the limit of | |
97 * the matched substring. In the forward direction, this will be | |
98 * the index of the last matched character plus one. In the | |
99 * reverse direction, this will be the index of the last matched | |
100 * character minus one. | |
101 * | |
102 * <p>Note: This method is not const because some classes may | |
103 * modify their state as the result of a match. | |
104 * | |
105 * @param text the text to be matched | |
106 * @param offset on input, the index into text at which to begin | |
107 * matching. On output, the limit of the matched text. The | |
108 * number of matched characters is the output value of offset | |
109 * minus the input value. Offset should always point to the | |
110 * HIGH SURROGATE (leading code unit) of a pair of surrogates, | |
111 * both on entry and upon return. | |
112 * @param limit the limit index of text to be matched. Greater | |
113 * than offset for a forward direction match, less than offset for | |
114 * a backward direction match. The last character to be | |
115 * considered for matching will be text.charAt(limit-1) in the | |
116 * forward direction or text.charAt(limit+1) in the backward | |
117 * direction. | |
118 * @param incremental if TRUE, then assume further characters may | |
119 * be inserted at limit and check for partial matching. Otherwise | |
120 * assume the text as given is complete. | |
121 * @return a match degree value indicating a full match, a partial | |
122 * match, or a mismatch. If incremental is FALSE then | |
123 * U_PARTIAL_MATCH should never be returned. | |
124 * @stable ICU 2.4 | |
125 */ | |
126 virtual UMatchDegree matches(const Replaceable& text, | |
127 int32_t& offset, | |
128 int32_t limit, | |
129 UBool incremental) = 0; | |
130 | |
131 /** | |
132 * Returns a string representation of this matcher. If the result of | |
133 * calling this function is passed to the appropriate parser, it | |
134 * will produce another matcher that is equal to this one. | |
135 * @param result the string to receive the pattern. Previous | |
136 * contents will be deleted. | |
137 * @param escapeUnprintable if TRUE then convert unprintable | |
138 * character to their hex escape representations, \\uxxxx or | |
139 * \\Uxxxxxxxx. Unprintable characters are those other than | |
140 * U+000A, U+0020..U+007E. | |
141 * @stable ICU 2.4 | |
142 */ | |
143 virtual UnicodeString& toPattern(UnicodeString& result, | |
144 UBool escapeUnprintable = FALSE) const = 0; | |
145 | |
146 /** | |
147 * Returns TRUE if this matcher will match a character c, where c | |
148 * & 0xFF == v, at offset, in the forward direction (with limit > | |
149 * offset). This is used by <tt>RuleBasedTransliterator</tt> for | |
150 * indexing. | |
151 * @stable ICU 2.4 | |
152 */ | |
153 virtual UBool matchesIndexValue(uint8_t v) const = 0; | |
154 | |
155 /** | |
156 * Union the set of all characters that may be matched by this object | |
157 * into the given set. | |
158 * @param toUnionTo the set into which to union the source characters | |
159 * @stable ICU 2.4 | |
160 */ | |
161 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; | |
162 }; | |
163 | |
164 U_NAMESPACE_END | |
165 | |
166 #endif /* U_SHOW_CPLUSPLUS_API */ | |
167 | |
168 #endif |