annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/pcre_scanner.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // Copyright (c) 2005, Google Inc.
jpayne@69 2 // All rights reserved.
jpayne@69 3 //
jpayne@69 4 // Redistribution and use in source and binary forms, with or without
jpayne@69 5 // modification, are permitted provided that the following conditions are
jpayne@69 6 // met:
jpayne@69 7 //
jpayne@69 8 // * Redistributions of source code must retain the above copyright
jpayne@69 9 // notice, this list of conditions and the following disclaimer.
jpayne@69 10 // * Redistributions in binary form must reproduce the above
jpayne@69 11 // copyright notice, this list of conditions and the following disclaimer
jpayne@69 12 // in the documentation and/or other materials provided with the
jpayne@69 13 // distribution.
jpayne@69 14 // * Neither the name of Google Inc. nor the names of its
jpayne@69 15 // contributors may be used to endorse or promote products derived from
jpayne@69 16 // this software without specific prior written permission.
jpayne@69 17 //
jpayne@69 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
jpayne@69 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
jpayne@69 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
jpayne@69 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
jpayne@69 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
jpayne@69 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
jpayne@69 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
jpayne@69 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
jpayne@69 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
jpayne@69 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
jpayne@69 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
jpayne@69 29 //
jpayne@69 30 // Author: Sanjay Ghemawat
jpayne@69 31 //
jpayne@69 32 // Regular-expression based scanner for parsing an input stream.
jpayne@69 33 //
jpayne@69 34 // Example 1: parse a sequence of "var = number" entries from input:
jpayne@69 35 //
jpayne@69 36 // Scanner scanner(input);
jpayne@69 37 // string var;
jpayne@69 38 // int number;
jpayne@69 39 // scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
jpayne@69 40 // while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
jpayne@69 41 // ...;
jpayne@69 42 // }
jpayne@69 43
jpayne@69 44 #ifndef _PCRE_SCANNER_H
jpayne@69 45 #define _PCRE_SCANNER_H
jpayne@69 46
jpayne@69 47 #include <assert.h>
jpayne@69 48 #include <string>
jpayne@69 49 #include <vector>
jpayne@69 50
jpayne@69 51 #include <pcrecpp.h>
jpayne@69 52 #include <pcre_stringpiece.h>
jpayne@69 53
jpayne@69 54 namespace pcrecpp {
jpayne@69 55
jpayne@69 56 class PCRECPP_EXP_DEFN Scanner {
jpayne@69 57 public:
jpayne@69 58 Scanner();
jpayne@69 59 explicit Scanner(const std::string& input);
jpayne@69 60 ~Scanner();
jpayne@69 61
jpayne@69 62 // Return current line number. The returned line-number is
jpayne@69 63 // one-based. I.e. it returns 1 + the number of consumed newlines.
jpayne@69 64 //
jpayne@69 65 // Note: this method may be slow. It may take time proportional to
jpayne@69 66 // the size of the input.
jpayne@69 67 int LineNumber() const;
jpayne@69 68
jpayne@69 69 // Return the byte-offset that the scanner is looking in the
jpayne@69 70 // input data;
jpayne@69 71 int Offset() const;
jpayne@69 72
jpayne@69 73 // Return true iff the start of the remaining input matches "re"
jpayne@69 74 bool LookingAt(const RE& re) const;
jpayne@69 75
jpayne@69 76 // Return true iff all of the following are true
jpayne@69 77 // a. the start of the remaining input matches "re",
jpayne@69 78 // b. if any arguments are supplied, matched sub-patterns can be
jpayne@69 79 // parsed and stored into the arguments.
jpayne@69 80 // If it returns true, it skips over the matched input and any
jpayne@69 81 // following input that matches the "skip" regular expression.
jpayne@69 82 bool Consume(const RE& re,
jpayne@69 83 const Arg& arg0 = RE::no_arg,
jpayne@69 84 const Arg& arg1 = RE::no_arg,
jpayne@69 85 const Arg& arg2 = RE::no_arg
jpayne@69 86 // TODO: Allow more arguments?
jpayne@69 87 );
jpayne@69 88
jpayne@69 89 // Set the "skip" regular expression. If after consuming some data,
jpayne@69 90 // a prefix of the input matches this RE, it is automatically
jpayne@69 91 // skipped. For example, a programming language scanner would use
jpayne@69 92 // a skip RE that matches white space and comments.
jpayne@69 93 //
jpayne@69 94 // scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
jpayne@69 95 //
jpayne@69 96 // Skipping repeats as long as it succeeds. We used to let people do
jpayne@69 97 // this by writing "(...)*" in the regular expression, but that added
jpayne@69 98 // up to lots of recursive calls within the pcre library, so now we
jpayne@69 99 // control repetition explicitly via the function call API.
jpayne@69 100 //
jpayne@69 101 // You can pass NULL for "re" if you do not want any data to be skipped.
jpayne@69 102 void Skip(const char* re); // DEPRECATED; does *not* repeat
jpayne@69 103 void SetSkipExpression(const char* re);
jpayne@69 104
jpayne@69 105 // Temporarily pause "skip"ing. This
jpayne@69 106 // Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
jpayne@69 107 // is similar to
jpayne@69 108 // Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
jpayne@69 109 // but avoids creating/deleting new RE objects.
jpayne@69 110 void DisableSkip();
jpayne@69 111
jpayne@69 112 // Reenable previously paused skipping. Any prefix of the input
jpayne@69 113 // that matches the skip pattern is immediately dropped.
jpayne@69 114 void EnableSkip();
jpayne@69 115
jpayne@69 116 /***** Special wrappers around SetSkip() for some common idioms *****/
jpayne@69 117
jpayne@69 118 // Arranges to skip whitespace, C comments, C++ comments.
jpayne@69 119 // The overall RE is a disjunction of the following REs:
jpayne@69 120 // \\s whitespace
jpayne@69 121 // //.*\n C++ comment
jpayne@69 122 // /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x)
jpayne@69 123 // We get repetition via the semantics of SetSkipExpression, not by using *
jpayne@69 124 void SkipCXXComments() {
jpayne@69 125 SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
jpayne@69 126 }
jpayne@69 127
jpayne@69 128 void set_save_comments(bool comments) {
jpayne@69 129 save_comments_ = comments;
jpayne@69 130 }
jpayne@69 131
jpayne@69 132 bool save_comments() {
jpayne@69 133 return save_comments_;
jpayne@69 134 }
jpayne@69 135
jpayne@69 136 // Append to vector ranges the comments found in the
jpayne@69 137 // byte range [start,end] (inclusive) of the input data.
jpayne@69 138 // Only comments that were extracted entirely within that
jpayne@69 139 // range are returned: no range splitting of atomically-extracted
jpayne@69 140 // comments is performed.
jpayne@69 141 void GetComments(int start, int end, std::vector<StringPiece> *ranges);
jpayne@69 142
jpayne@69 143 // Append to vector ranges the comments added
jpayne@69 144 // since the last time this was called. This
jpayne@69 145 // functionality is provided for efficiency when
jpayne@69 146 // interleaving scanning with parsing.
jpayne@69 147 void GetNextComments(std::vector<StringPiece> *ranges);
jpayne@69 148
jpayne@69 149 private:
jpayne@69 150 std::string data_; // All the input data
jpayne@69 151 StringPiece input_; // Unprocessed input
jpayne@69 152 RE* skip_; // If non-NULL, RE for skipping input
jpayne@69 153 bool should_skip_; // If true, use skip_
jpayne@69 154 bool skip_repeat_; // If true, repeat skip_ as long as it works
jpayne@69 155 bool save_comments_; // If true, aggregate the skip expression
jpayne@69 156
jpayne@69 157 // the skipped comments
jpayne@69 158 // TODO: later consider requiring that the StringPieces be added
jpayne@69 159 // in order by their start position
jpayne@69 160 std::vector<StringPiece> *comments_;
jpayne@69 161
jpayne@69 162 // the offset into comments_ that has been returned by GetNextComments
jpayne@69 163 int comments_offset_;
jpayne@69 164
jpayne@69 165 // helper function to consume *skip_ and honour
jpayne@69 166 // save_comments_
jpayne@69 167 void ConsumeSkip();
jpayne@69 168 };
jpayne@69 169
jpayne@69 170 } // namespace pcrecpp
jpayne@69 171
jpayne@69 172 #endif /* _PCRE_SCANNER_H */