Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/pcre_scanner.h @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 // Copyright (c) 2005, Google Inc. | |
2 // All rights reserved. | |
3 // | |
4 // Redistribution and use in source and binary forms, with or without | |
5 // modification, are permitted provided that the following conditions are | |
6 // met: | |
7 // | |
8 // * Redistributions of source code must retain the above copyright | |
9 // notice, this list of conditions and the following disclaimer. | |
10 // * Redistributions in binary form must reproduce the above | |
11 // copyright notice, this list of conditions and the following disclaimer | |
12 // in the documentation and/or other materials provided with the | |
13 // distribution. | |
14 // * Neither the name of Google Inc. nor the names of its | |
15 // contributors may be used to endorse or promote products derived from | |
16 // this software without specific prior written permission. | |
17 // | |
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 // | |
30 // Author: Sanjay Ghemawat | |
31 // | |
32 // Regular-expression based scanner for parsing an input stream. | |
33 // | |
34 // Example 1: parse a sequence of "var = number" entries from input: | |
35 // | |
36 // Scanner scanner(input); | |
37 // string var; | |
38 // int number; | |
39 // scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter | |
40 // while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) { | |
41 // ...; | |
42 // } | |
43 | |
44 #ifndef _PCRE_SCANNER_H | |
45 #define _PCRE_SCANNER_H | |
46 | |
47 #include <assert.h> | |
48 #include <string> | |
49 #include <vector> | |
50 | |
51 #include <pcrecpp.h> | |
52 #include <pcre_stringpiece.h> | |
53 | |
54 namespace pcrecpp { | |
55 | |
56 class PCRECPP_EXP_DEFN Scanner { | |
57 public: | |
58 Scanner(); | |
59 explicit Scanner(const std::string& input); | |
60 ~Scanner(); | |
61 | |
62 // Return current line number. The returned line-number is | |
63 // one-based. I.e. it returns 1 + the number of consumed newlines. | |
64 // | |
65 // Note: this method may be slow. It may take time proportional to | |
66 // the size of the input. | |
67 int LineNumber() const; | |
68 | |
69 // Return the byte-offset that the scanner is looking in the | |
70 // input data; | |
71 int Offset() const; | |
72 | |
73 // Return true iff the start of the remaining input matches "re" | |
74 bool LookingAt(const RE& re) const; | |
75 | |
76 // Return true iff all of the following are true | |
77 // a. the start of the remaining input matches "re", | |
78 // b. if any arguments are supplied, matched sub-patterns can be | |
79 // parsed and stored into the arguments. | |
80 // If it returns true, it skips over the matched input and any | |
81 // following input that matches the "skip" regular expression. | |
82 bool Consume(const RE& re, | |
83 const Arg& arg0 = RE::no_arg, | |
84 const Arg& arg1 = RE::no_arg, | |
85 const Arg& arg2 = RE::no_arg | |
86 // TODO: Allow more arguments? | |
87 ); | |
88 | |
89 // Set the "skip" regular expression. If after consuming some data, | |
90 // a prefix of the input matches this RE, it is automatically | |
91 // skipped. For example, a programming language scanner would use | |
92 // a skip RE that matches white space and comments. | |
93 // | |
94 // scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/"); | |
95 // | |
96 // Skipping repeats as long as it succeeds. We used to let people do | |
97 // this by writing "(...)*" in the regular expression, but that added | |
98 // up to lots of recursive calls within the pcre library, so now we | |
99 // control repetition explicitly via the function call API. | |
100 // | |
101 // You can pass NULL for "re" if you do not want any data to be skipped. | |
102 void Skip(const char* re); // DEPRECATED; does *not* repeat | |
103 void SetSkipExpression(const char* re); | |
104 | |
105 // Temporarily pause "skip"ing. This | |
106 // Skip("Foo"); code ; DisableSkip(); code; EnableSkip() | |
107 // is similar to | |
108 // Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo"); | |
109 // but avoids creating/deleting new RE objects. | |
110 void DisableSkip(); | |
111 | |
112 // Reenable previously paused skipping. Any prefix of the input | |
113 // that matches the skip pattern is immediately dropped. | |
114 void EnableSkip(); | |
115 | |
116 /***** Special wrappers around SetSkip() for some common idioms *****/ | |
117 | |
118 // Arranges to skip whitespace, C comments, C++ comments. | |
119 // The overall RE is a disjunction of the following REs: | |
120 // \\s whitespace | |
121 // //.*\n C++ comment | |
122 // /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x) | |
123 // We get repetition via the semantics of SetSkipExpression, not by using * | |
124 void SkipCXXComments() { | |
125 SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/"); | |
126 } | |
127 | |
128 void set_save_comments(bool comments) { | |
129 save_comments_ = comments; | |
130 } | |
131 | |
132 bool save_comments() { | |
133 return save_comments_; | |
134 } | |
135 | |
136 // Append to vector ranges the comments found in the | |
137 // byte range [start,end] (inclusive) of the input data. | |
138 // Only comments that were extracted entirely within that | |
139 // range are returned: no range splitting of atomically-extracted | |
140 // comments is performed. | |
141 void GetComments(int start, int end, std::vector<StringPiece> *ranges); | |
142 | |
143 // Append to vector ranges the comments added | |
144 // since the last time this was called. This | |
145 // functionality is provided for efficiency when | |
146 // interleaving scanning with parsing. | |
147 void GetNextComments(std::vector<StringPiece> *ranges); | |
148 | |
149 private: | |
150 std::string data_; // All the input data | |
151 StringPiece input_; // Unprocessed input | |
152 RE* skip_; // If non-NULL, RE for skipping input | |
153 bool should_skip_; // If true, use skip_ | |
154 bool skip_repeat_; // If true, repeat skip_ as long as it works | |
155 bool save_comments_; // If true, aggregate the skip expression | |
156 | |
157 // the skipped comments | |
158 // TODO: later consider requiring that the StringPieces be added | |
159 // in order by their start position | |
160 std::vector<StringPiece> *comments_; | |
161 | |
162 // the offset into comments_ that has been returned by GetNextComments | |
163 int comments_offset_; | |
164 | |
165 // helper function to consume *skip_ and honour | |
166 // save_comments_ | |
167 void ConsumeSkip(); | |
168 }; | |
169 | |
170 } // namespace pcrecpp | |
171 | |
172 #endif /* _PCRE_SCANNER_H */ |