jpayne@69
|
1 // © 2016 and later: Unicode, Inc. and others.
|
jpayne@69
|
2 // License & terms of use: http://www.unicode.org/copyright.html
|
jpayne@69
|
3 /*
|
jpayne@69
|
4 * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
|
jpayne@69
|
5 **********************************************************************
|
jpayne@69
|
6 * Date Name Description
|
jpayne@69
|
7 * 07/18/01 aliu Creation.
|
jpayne@69
|
8 **********************************************************************
|
jpayne@69
|
9 */
|
jpayne@69
|
10 #ifndef UNIMATCH_H
|
jpayne@69
|
11 #define UNIMATCH_H
|
jpayne@69
|
12
|
jpayne@69
|
13 #include "unicode/utypes.h"
|
jpayne@69
|
14
|
jpayne@69
|
15 /**
|
jpayne@69
|
16 * \file
|
jpayne@69
|
17 * \brief C++ API: Unicode Matcher
|
jpayne@69
|
18 */
|
jpayne@69
|
19
|
jpayne@69
|
20 #if U_SHOW_CPLUSPLUS_API
|
jpayne@69
|
21
|
jpayne@69
|
22 U_NAMESPACE_BEGIN
|
jpayne@69
|
23
|
jpayne@69
|
24 class Replaceable;
|
jpayne@69
|
25 class UnicodeString;
|
jpayne@69
|
26 class UnicodeSet;
|
jpayne@69
|
27
|
jpayne@69
|
28 /**
|
jpayne@69
|
29 * Constants returned by <code>UnicodeMatcher::matches()</code>
|
jpayne@69
|
30 * indicating the degree of match.
|
jpayne@69
|
31 * @stable ICU 2.4
|
jpayne@69
|
32 */
|
jpayne@69
|
33 enum UMatchDegree {
|
jpayne@69
|
34 /**
|
jpayne@69
|
35 * Constant returned by <code>matches()</code> indicating a
|
jpayne@69
|
36 * mismatch between the text and this matcher. The text contains
|
jpayne@69
|
37 * a character which does not match, or the text does not contain
|
jpayne@69
|
38 * all desired characters for a non-incremental match.
|
jpayne@69
|
39 * @stable ICU 2.4
|
jpayne@69
|
40 */
|
jpayne@69
|
41 U_MISMATCH,
|
jpayne@69
|
42
|
jpayne@69
|
43 /**
|
jpayne@69
|
44 * Constant returned by <code>matches()</code> indicating a
|
jpayne@69
|
45 * partial match between the text and this matcher. This value is
|
jpayne@69
|
46 * only returned for incremental match operations. All characters
|
jpayne@69
|
47 * of the text match, but more characters are required for a
|
jpayne@69
|
48 * complete match. Alternatively, for variable-length matchers,
|
jpayne@69
|
49 * all characters of the text match, and if more characters were
|
jpayne@69
|
50 * supplied at limit, they might also match.
|
jpayne@69
|
51 * @stable ICU 2.4
|
jpayne@69
|
52 */
|
jpayne@69
|
53 U_PARTIAL_MATCH,
|
jpayne@69
|
54
|
jpayne@69
|
55 /**
|
jpayne@69
|
56 * Constant returned by <code>matches()</code> indicating a
|
jpayne@69
|
57 * complete match between the text and this matcher. For an
|
jpayne@69
|
58 * incremental variable-length match, this value is returned if
|
jpayne@69
|
59 * the given text matches, and it is known that additional
|
jpayne@69
|
60 * characters would not alter the extent of the match.
|
jpayne@69
|
61 * @stable ICU 2.4
|
jpayne@69
|
62 */
|
jpayne@69
|
63 U_MATCH
|
jpayne@69
|
64 };
|
jpayne@69
|
65
|
jpayne@69
|
66 /**
|
jpayne@69
|
67 * <code>UnicodeMatcher</code> defines a protocol for objects that can
|
jpayne@69
|
68 * match a range of characters in a Replaceable string.
|
jpayne@69
|
69 * @stable ICU 2.4
|
jpayne@69
|
70 */
|
jpayne@69
|
71 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
|
jpayne@69
|
72
|
jpayne@69
|
73 public:
|
jpayne@69
|
74 /**
|
jpayne@69
|
75 * Destructor.
|
jpayne@69
|
76 * @stable ICU 2.4
|
jpayne@69
|
77 */
|
jpayne@69
|
78 virtual ~UnicodeMatcher();
|
jpayne@69
|
79
|
jpayne@69
|
80 /**
|
jpayne@69
|
81 * Return a UMatchDegree value indicating the degree of match for
|
jpayne@69
|
82 * the given text at the given offset. Zero, one, or more
|
jpayne@69
|
83 * characters may be matched.
|
jpayne@69
|
84 *
|
jpayne@69
|
85 * Matching in the forward direction is indicated by limit >
|
jpayne@69
|
86 * offset. Characters from offset forwards to limit-1 will be
|
jpayne@69
|
87 * considered for matching.
|
jpayne@69
|
88 *
|
jpayne@69
|
89 * Matching in the reverse direction is indicated by limit <
|
jpayne@69
|
90 * offset. Characters from offset backwards to limit+1 will be
|
jpayne@69
|
91 * considered for matching.
|
jpayne@69
|
92 *
|
jpayne@69
|
93 * If limit == offset then the only match possible is a zero
|
jpayne@69
|
94 * character match (which subclasses may implement if desired).
|
jpayne@69
|
95 *
|
jpayne@69
|
96 * As a side effect, advance the offset parameter to the limit of
|
jpayne@69
|
97 * the matched substring. In the forward direction, this will be
|
jpayne@69
|
98 * the index of the last matched character plus one. In the
|
jpayne@69
|
99 * reverse direction, this will be the index of the last matched
|
jpayne@69
|
100 * character minus one.
|
jpayne@69
|
101 *
|
jpayne@69
|
102 * <p>Note: This method is not const because some classes may
|
jpayne@69
|
103 * modify their state as the result of a match.
|
jpayne@69
|
104 *
|
jpayne@69
|
105 * @param text the text to be matched
|
jpayne@69
|
106 * @param offset on input, the index into text at which to begin
|
jpayne@69
|
107 * matching. On output, the limit of the matched text. The
|
jpayne@69
|
108 * number of matched characters is the output value of offset
|
jpayne@69
|
109 * minus the input value. Offset should always point to the
|
jpayne@69
|
110 * HIGH SURROGATE (leading code unit) of a pair of surrogates,
|
jpayne@69
|
111 * both on entry and upon return.
|
jpayne@69
|
112 * @param limit the limit index of text to be matched. Greater
|
jpayne@69
|
113 * than offset for a forward direction match, less than offset for
|
jpayne@69
|
114 * a backward direction match. The last character to be
|
jpayne@69
|
115 * considered for matching will be text.charAt(limit-1) in the
|
jpayne@69
|
116 * forward direction or text.charAt(limit+1) in the backward
|
jpayne@69
|
117 * direction.
|
jpayne@69
|
118 * @param incremental if TRUE, then assume further characters may
|
jpayne@69
|
119 * be inserted at limit and check for partial matching. Otherwise
|
jpayne@69
|
120 * assume the text as given is complete.
|
jpayne@69
|
121 * @return a match degree value indicating a full match, a partial
|
jpayne@69
|
122 * match, or a mismatch. If incremental is FALSE then
|
jpayne@69
|
123 * U_PARTIAL_MATCH should never be returned.
|
jpayne@69
|
124 * @stable ICU 2.4
|
jpayne@69
|
125 */
|
jpayne@69
|
126 virtual UMatchDegree matches(const Replaceable& text,
|
jpayne@69
|
127 int32_t& offset,
|
jpayne@69
|
128 int32_t limit,
|
jpayne@69
|
129 UBool incremental) = 0;
|
jpayne@69
|
130
|
jpayne@69
|
131 /**
|
jpayne@69
|
132 * Returns a string representation of this matcher. If the result of
|
jpayne@69
|
133 * calling this function is passed to the appropriate parser, it
|
jpayne@69
|
134 * will produce another matcher that is equal to this one.
|
jpayne@69
|
135 * @param result the string to receive the pattern. Previous
|
jpayne@69
|
136 * contents will be deleted.
|
jpayne@69
|
137 * @param escapeUnprintable if TRUE then convert unprintable
|
jpayne@69
|
138 * character to their hex escape representations, \\uxxxx or
|
jpayne@69
|
139 * \\Uxxxxxxxx. Unprintable characters are those other than
|
jpayne@69
|
140 * U+000A, U+0020..U+007E.
|
jpayne@69
|
141 * @stable ICU 2.4
|
jpayne@69
|
142 */
|
jpayne@69
|
143 virtual UnicodeString& toPattern(UnicodeString& result,
|
jpayne@69
|
144 UBool escapeUnprintable = FALSE) const = 0;
|
jpayne@69
|
145
|
jpayne@69
|
146 /**
|
jpayne@69
|
147 * Returns TRUE if this matcher will match a character c, where c
|
jpayne@69
|
148 * & 0xFF == v, at offset, in the forward direction (with limit >
|
jpayne@69
|
149 * offset). This is used by <tt>RuleBasedTransliterator</tt> for
|
jpayne@69
|
150 * indexing.
|
jpayne@69
|
151 * @stable ICU 2.4
|
jpayne@69
|
152 */
|
jpayne@69
|
153 virtual UBool matchesIndexValue(uint8_t v) const = 0;
|
jpayne@69
|
154
|
jpayne@69
|
155 /**
|
jpayne@69
|
156 * Union the set of all characters that may be matched by this object
|
jpayne@69
|
157 * into the given set.
|
jpayne@69
|
158 * @param toUnionTo the set into which to union the source characters
|
jpayne@69
|
159 * @stable ICU 2.4
|
jpayne@69
|
160 */
|
jpayne@69
|
161 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
|
jpayne@69
|
162 };
|
jpayne@69
|
163
|
jpayne@69
|
164 U_NAMESPACE_END
|
jpayne@69
|
165
|
jpayne@69
|
166 #endif /* U_SHOW_CPLUSPLUS_API */
|
jpayne@69
|
167
|
jpayne@69
|
168 #endif
|