jpayne@69
|
1 """Provide advanced parsing abilities for ParenMatch and other extensions.
|
jpayne@69
|
2
|
jpayne@69
|
3 HyperParser uses PyParser. PyParser mostly gives information on the
|
jpayne@69
|
4 proper indentation of code. HyperParser gives additional information on
|
jpayne@69
|
5 the structure of code.
|
jpayne@69
|
6 """
|
jpayne@69
|
7 from keyword import iskeyword
|
jpayne@69
|
8 import string
|
jpayne@69
|
9
|
jpayne@69
|
10 from idlelib import pyparse
|
jpayne@69
|
11
|
jpayne@69
|
12 # all ASCII chars that may be in an identifier
|
jpayne@69
|
13 _ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
|
jpayne@69
|
14 # all ASCII chars that may be the first char of an identifier
|
jpayne@69
|
15 _ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")
|
jpayne@69
|
16
|
jpayne@69
|
17 # lookup table for whether 7-bit ASCII chars are valid in a Python identifier
|
jpayne@69
|
18 _IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
|
jpayne@69
|
19 # lookup table for whether 7-bit ASCII chars are valid as the first
|
jpayne@69
|
20 # char in a Python identifier
|
jpayne@69
|
21 _IS_ASCII_ID_FIRST_CHAR = \
|
jpayne@69
|
22 [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
|
jpayne@69
|
23
|
jpayne@69
|
24
|
jpayne@69
|
25 class HyperParser:
|
jpayne@69
|
26 def __init__(self, editwin, index):
|
jpayne@69
|
27 "To initialize, analyze the surroundings of the given index."
|
jpayne@69
|
28
|
jpayne@69
|
29 self.editwin = editwin
|
jpayne@69
|
30 self.text = text = editwin.text
|
jpayne@69
|
31
|
jpayne@69
|
32 parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth)
|
jpayne@69
|
33
|
jpayne@69
|
34 def index2line(index):
|
jpayne@69
|
35 return int(float(index))
|
jpayne@69
|
36 lno = index2line(text.index(index))
|
jpayne@69
|
37
|
jpayne@69
|
38 if not editwin.prompt_last_line:
|
jpayne@69
|
39 for context in editwin.num_context_lines:
|
jpayne@69
|
40 startat = max(lno - context, 1)
|
jpayne@69
|
41 startatindex = repr(startat) + ".0"
|
jpayne@69
|
42 stopatindex = "%d.end" % lno
|
jpayne@69
|
43 # We add the newline because PyParse requires a newline
|
jpayne@69
|
44 # at end. We add a space so that index won't be at end
|
jpayne@69
|
45 # of line, so that its status will be the same as the
|
jpayne@69
|
46 # char before it, if should.
|
jpayne@69
|
47 parser.set_code(text.get(startatindex, stopatindex)+' \n')
|
jpayne@69
|
48 bod = parser.find_good_parse_start(
|
jpayne@69
|
49 editwin._build_char_in_string_func(startatindex))
|
jpayne@69
|
50 if bod is not None or startat == 1:
|
jpayne@69
|
51 break
|
jpayne@69
|
52 parser.set_lo(bod or 0)
|
jpayne@69
|
53 else:
|
jpayne@69
|
54 r = text.tag_prevrange("console", index)
|
jpayne@69
|
55 if r:
|
jpayne@69
|
56 startatindex = r[1]
|
jpayne@69
|
57 else:
|
jpayne@69
|
58 startatindex = "1.0"
|
jpayne@69
|
59 stopatindex = "%d.end" % lno
|
jpayne@69
|
60 # We add the newline because PyParse requires it. We add a
|
jpayne@69
|
61 # space so that index won't be at end of line, so that its
|
jpayne@69
|
62 # status will be the same as the char before it, if should.
|
jpayne@69
|
63 parser.set_code(text.get(startatindex, stopatindex)+' \n')
|
jpayne@69
|
64 parser.set_lo(0)
|
jpayne@69
|
65
|
jpayne@69
|
66 # We want what the parser has, minus the last newline and space.
|
jpayne@69
|
67 self.rawtext = parser.code[:-2]
|
jpayne@69
|
68 # Parser.code apparently preserves the statement we are in, so
|
jpayne@69
|
69 # that stopatindex can be used to synchronize the string with
|
jpayne@69
|
70 # the text box indices.
|
jpayne@69
|
71 self.stopatindex = stopatindex
|
jpayne@69
|
72 self.bracketing = parser.get_last_stmt_bracketing()
|
jpayne@69
|
73 # find which pairs of bracketing are openers. These always
|
jpayne@69
|
74 # correspond to a character of rawtext.
|
jpayne@69
|
75 self.isopener = [i>0 and self.bracketing[i][1] >
|
jpayne@69
|
76 self.bracketing[i-1][1]
|
jpayne@69
|
77 for i in range(len(self.bracketing))]
|
jpayne@69
|
78
|
jpayne@69
|
79 self.set_index(index)
|
jpayne@69
|
80
|
jpayne@69
|
81 def set_index(self, index):
|
jpayne@69
|
82 """Set the index to which the functions relate.
|
jpayne@69
|
83
|
jpayne@69
|
84 The index must be in the same statement.
|
jpayne@69
|
85 """
|
jpayne@69
|
86 indexinrawtext = (len(self.rawtext) -
|
jpayne@69
|
87 len(self.text.get(index, self.stopatindex)))
|
jpayne@69
|
88 if indexinrawtext < 0:
|
jpayne@69
|
89 raise ValueError("Index %s precedes the analyzed statement"
|
jpayne@69
|
90 % index)
|
jpayne@69
|
91 self.indexinrawtext = indexinrawtext
|
jpayne@69
|
92 # find the rightmost bracket to which index belongs
|
jpayne@69
|
93 self.indexbracket = 0
|
jpayne@69
|
94 while (self.indexbracket < len(self.bracketing)-1 and
|
jpayne@69
|
95 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
|
jpayne@69
|
96 self.indexbracket += 1
|
jpayne@69
|
97 if (self.indexbracket < len(self.bracketing)-1 and
|
jpayne@69
|
98 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
|
jpayne@69
|
99 not self.isopener[self.indexbracket+1]):
|
jpayne@69
|
100 self.indexbracket += 1
|
jpayne@69
|
101
|
jpayne@69
|
102 def is_in_string(self):
|
jpayne@69
|
103 """Is the index given to the HyperParser in a string?"""
|
jpayne@69
|
104 # The bracket to which we belong should be an opener.
|
jpayne@69
|
105 # If it's an opener, it has to have a character.
|
jpayne@69
|
106 return (self.isopener[self.indexbracket] and
|
jpayne@69
|
107 self.rawtext[self.bracketing[self.indexbracket][0]]
|
jpayne@69
|
108 in ('"', "'"))
|
jpayne@69
|
109
|
jpayne@69
|
110 def is_in_code(self):
|
jpayne@69
|
111 """Is the index given to the HyperParser in normal code?"""
|
jpayne@69
|
112 return (not self.isopener[self.indexbracket] or
|
jpayne@69
|
113 self.rawtext[self.bracketing[self.indexbracket][0]]
|
jpayne@69
|
114 not in ('#', '"', "'"))
|
jpayne@69
|
115
|
jpayne@69
|
116 def get_surrounding_brackets(self, openers='([{', mustclose=False):
|
jpayne@69
|
117 """Return bracket indexes or None.
|
jpayne@69
|
118
|
jpayne@69
|
119 If the index given to the HyperParser is surrounded by a
|
jpayne@69
|
120 bracket defined in openers (or at least has one before it),
|
jpayne@69
|
121 return the indices of the opening bracket and the closing
|
jpayne@69
|
122 bracket (or the end of line, whichever comes first).
|
jpayne@69
|
123
|
jpayne@69
|
124 If it is not surrounded by brackets, or the end of line comes
|
jpayne@69
|
125 before the closing bracket and mustclose is True, returns None.
|
jpayne@69
|
126 """
|
jpayne@69
|
127
|
jpayne@69
|
128 bracketinglevel = self.bracketing[self.indexbracket][1]
|
jpayne@69
|
129 before = self.indexbracket
|
jpayne@69
|
130 while (not self.isopener[before] or
|
jpayne@69
|
131 self.rawtext[self.bracketing[before][0]] not in openers or
|
jpayne@69
|
132 self.bracketing[before][1] > bracketinglevel):
|
jpayne@69
|
133 before -= 1
|
jpayne@69
|
134 if before < 0:
|
jpayne@69
|
135 return None
|
jpayne@69
|
136 bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
|
jpayne@69
|
137 after = self.indexbracket + 1
|
jpayne@69
|
138 while (after < len(self.bracketing) and
|
jpayne@69
|
139 self.bracketing[after][1] >= bracketinglevel):
|
jpayne@69
|
140 after += 1
|
jpayne@69
|
141
|
jpayne@69
|
142 beforeindex = self.text.index("%s-%dc" %
|
jpayne@69
|
143 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
|
jpayne@69
|
144 if (after >= len(self.bracketing) or
|
jpayne@69
|
145 self.bracketing[after][0] > len(self.rawtext)):
|
jpayne@69
|
146 if mustclose:
|
jpayne@69
|
147 return None
|
jpayne@69
|
148 afterindex = self.stopatindex
|
jpayne@69
|
149 else:
|
jpayne@69
|
150 # We are after a real char, so it is a ')' and we give the
|
jpayne@69
|
151 # index before it.
|
jpayne@69
|
152 afterindex = self.text.index(
|
jpayne@69
|
153 "%s-%dc" % (self.stopatindex,
|
jpayne@69
|
154 len(self.rawtext)-(self.bracketing[after][0]-1)))
|
jpayne@69
|
155
|
jpayne@69
|
156 return beforeindex, afterindex
|
jpayne@69
|
157
|
jpayne@69
|
158 # the set of built-in identifiers which are also keywords,
|
jpayne@69
|
159 # i.e. keyword.iskeyword() returns True for them
|
jpayne@69
|
160 _ID_KEYWORDS = frozenset({"True", "False", "None"})
|
jpayne@69
|
161
|
jpayne@69
|
162 @classmethod
|
jpayne@69
|
163 def _eat_identifier(cls, str, limit, pos):
|
jpayne@69
|
164 """Given a string and pos, return the number of chars in the
|
jpayne@69
|
165 identifier which ends at pos, or 0 if there is no such one.
|
jpayne@69
|
166
|
jpayne@69
|
167 This ignores non-identifier eywords are not identifiers.
|
jpayne@69
|
168 """
|
jpayne@69
|
169 is_ascii_id_char = _IS_ASCII_ID_CHAR
|
jpayne@69
|
170
|
jpayne@69
|
171 # Start at the end (pos) and work backwards.
|
jpayne@69
|
172 i = pos
|
jpayne@69
|
173
|
jpayne@69
|
174 # Go backwards as long as the characters are valid ASCII
|
jpayne@69
|
175 # identifier characters. This is an optimization, since it
|
jpayne@69
|
176 # is faster in the common case where most of the characters
|
jpayne@69
|
177 # are ASCII.
|
jpayne@69
|
178 while i > limit and (
|
jpayne@69
|
179 ord(str[i - 1]) < 128 and
|
jpayne@69
|
180 is_ascii_id_char[ord(str[i - 1])]
|
jpayne@69
|
181 ):
|
jpayne@69
|
182 i -= 1
|
jpayne@69
|
183
|
jpayne@69
|
184 # If the above loop ended due to reaching a non-ASCII
|
jpayne@69
|
185 # character, continue going backwards using the most generic
|
jpayne@69
|
186 # test for whether a string contains only valid identifier
|
jpayne@69
|
187 # characters.
|
jpayne@69
|
188 if i > limit and ord(str[i - 1]) >= 128:
|
jpayne@69
|
189 while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
|
jpayne@69
|
190 i -= 4
|
jpayne@69
|
191 if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
|
jpayne@69
|
192 i -= 2
|
jpayne@69
|
193 if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
|
jpayne@69
|
194 i -= 1
|
jpayne@69
|
195
|
jpayne@69
|
196 # The identifier candidate starts here. If it isn't a valid
|
jpayne@69
|
197 # identifier, don't eat anything. At this point that is only
|
jpayne@69
|
198 # possible if the first character isn't a valid first
|
jpayne@69
|
199 # character for an identifier.
|
jpayne@69
|
200 if not str[i:pos].isidentifier():
|
jpayne@69
|
201 return 0
|
jpayne@69
|
202 elif i < pos:
|
jpayne@69
|
203 # All characters in str[i:pos] are valid ASCII identifier
|
jpayne@69
|
204 # characters, so it is enough to check that the first is
|
jpayne@69
|
205 # valid as the first character of an identifier.
|
jpayne@69
|
206 if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:
|
jpayne@69
|
207 return 0
|
jpayne@69
|
208
|
jpayne@69
|
209 # All keywords are valid identifiers, but should not be
|
jpayne@69
|
210 # considered identifiers here, except for True, False and None.
|
jpayne@69
|
211 if i < pos and (
|
jpayne@69
|
212 iskeyword(str[i:pos]) and
|
jpayne@69
|
213 str[i:pos] not in cls._ID_KEYWORDS
|
jpayne@69
|
214 ):
|
jpayne@69
|
215 return 0
|
jpayne@69
|
216
|
jpayne@69
|
217 return pos - i
|
jpayne@69
|
218
|
jpayne@69
|
219 # This string includes all chars that may be in a white space
|
jpayne@69
|
220 _whitespace_chars = " \t\n\\"
|
jpayne@69
|
221
|
jpayne@69
|
222 def get_expression(self):
|
jpayne@69
|
223 """Return a string with the Python expression which ends at the
|
jpayne@69
|
224 given index, which is empty if there is no real one.
|
jpayne@69
|
225 """
|
jpayne@69
|
226 if not self.is_in_code():
|
jpayne@69
|
227 raise ValueError("get_expression should only be called "
|
jpayne@69
|
228 "if index is inside a code.")
|
jpayne@69
|
229
|
jpayne@69
|
230 rawtext = self.rawtext
|
jpayne@69
|
231 bracketing = self.bracketing
|
jpayne@69
|
232
|
jpayne@69
|
233 brck_index = self.indexbracket
|
jpayne@69
|
234 brck_limit = bracketing[brck_index][0]
|
jpayne@69
|
235 pos = self.indexinrawtext
|
jpayne@69
|
236
|
jpayne@69
|
237 last_identifier_pos = pos
|
jpayne@69
|
238 postdot_phase = True
|
jpayne@69
|
239
|
jpayne@69
|
240 while 1:
|
jpayne@69
|
241 # Eat whitespaces, comments, and if postdot_phase is False - a dot
|
jpayne@69
|
242 while 1:
|
jpayne@69
|
243 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
|
jpayne@69
|
244 # Eat a whitespace
|
jpayne@69
|
245 pos -= 1
|
jpayne@69
|
246 elif (not postdot_phase and
|
jpayne@69
|
247 pos > brck_limit and rawtext[pos-1] == '.'):
|
jpayne@69
|
248 # Eat a dot
|
jpayne@69
|
249 pos -= 1
|
jpayne@69
|
250 postdot_phase = True
|
jpayne@69
|
251 # The next line will fail if we are *inside* a comment,
|
jpayne@69
|
252 # but we shouldn't be.
|
jpayne@69
|
253 elif (pos == brck_limit and brck_index > 0 and
|
jpayne@69
|
254 rawtext[bracketing[brck_index-1][0]] == '#'):
|
jpayne@69
|
255 # Eat a comment
|
jpayne@69
|
256 brck_index -= 2
|
jpayne@69
|
257 brck_limit = bracketing[brck_index][0]
|
jpayne@69
|
258 pos = bracketing[brck_index+1][0]
|
jpayne@69
|
259 else:
|
jpayne@69
|
260 # If we didn't eat anything, quit.
|
jpayne@69
|
261 break
|
jpayne@69
|
262
|
jpayne@69
|
263 if not postdot_phase:
|
jpayne@69
|
264 # We didn't find a dot, so the expression end at the
|
jpayne@69
|
265 # last identifier pos.
|
jpayne@69
|
266 break
|
jpayne@69
|
267
|
jpayne@69
|
268 ret = self._eat_identifier(rawtext, brck_limit, pos)
|
jpayne@69
|
269 if ret:
|
jpayne@69
|
270 # There is an identifier to eat
|
jpayne@69
|
271 pos = pos - ret
|
jpayne@69
|
272 last_identifier_pos = pos
|
jpayne@69
|
273 # Now, to continue the search, we must find a dot.
|
jpayne@69
|
274 postdot_phase = False
|
jpayne@69
|
275 # (the loop continues now)
|
jpayne@69
|
276
|
jpayne@69
|
277 elif pos == brck_limit:
|
jpayne@69
|
278 # We are at a bracketing limit. If it is a closing
|
jpayne@69
|
279 # bracket, eat the bracket, otherwise, stop the search.
|
jpayne@69
|
280 level = bracketing[brck_index][1]
|
jpayne@69
|
281 while brck_index > 0 and bracketing[brck_index-1][1] > level:
|
jpayne@69
|
282 brck_index -= 1
|
jpayne@69
|
283 if bracketing[brck_index][0] == brck_limit:
|
jpayne@69
|
284 # We were not at the end of a closing bracket
|
jpayne@69
|
285 break
|
jpayne@69
|
286 pos = bracketing[brck_index][0]
|
jpayne@69
|
287 brck_index -= 1
|
jpayne@69
|
288 brck_limit = bracketing[brck_index][0]
|
jpayne@69
|
289 last_identifier_pos = pos
|
jpayne@69
|
290 if rawtext[pos] in "([":
|
jpayne@69
|
291 # [] and () may be used after an identifier, so we
|
jpayne@69
|
292 # continue. postdot_phase is True, so we don't allow a dot.
|
jpayne@69
|
293 pass
|
jpayne@69
|
294 else:
|
jpayne@69
|
295 # We can't continue after other types of brackets
|
jpayne@69
|
296 if rawtext[pos] in "'\"":
|
jpayne@69
|
297 # Scan a string prefix
|
jpayne@69
|
298 while pos > 0 and rawtext[pos - 1] in "rRbBuU":
|
jpayne@69
|
299 pos -= 1
|
jpayne@69
|
300 last_identifier_pos = pos
|
jpayne@69
|
301 break
|
jpayne@69
|
302
|
jpayne@69
|
303 else:
|
jpayne@69
|
304 # We've found an operator or something.
|
jpayne@69
|
305 break
|
jpayne@69
|
306
|
jpayne@69
|
307 return rawtext[last_identifier_pos:self.indexinrawtext]
|
jpayne@69
|
308
|
jpayne@69
|
309
|
jpayne@69
|
310 if __name__ == '__main__':
|
jpayne@69
|
311 from unittest import main
|
jpayne@69
|
312 main('idlelib.idle_test.test_hyperparser', verbosity=2)
|