Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/idlelib/hyperparser.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 """Provide advanced parsing abilities for ParenMatch and other extensions. | |
2 | |
3 HyperParser uses PyParser. PyParser mostly gives information on the | |
4 proper indentation of code. HyperParser gives additional information on | |
5 the structure of code. | |
6 """ | |
7 from keyword import iskeyword | |
8 import string | |
9 | |
10 from idlelib import pyparse | |
11 | |
12 # all ASCII chars that may be in an identifier | |
13 _ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_") | |
14 # all ASCII chars that may be the first char of an identifier | |
15 _ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_") | |
16 | |
17 # lookup table for whether 7-bit ASCII chars are valid in a Python identifier | |
18 _IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)] | |
19 # lookup table for whether 7-bit ASCII chars are valid as the first | |
20 # char in a Python identifier | |
21 _IS_ASCII_ID_FIRST_CHAR = \ | |
22 [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)] | |
23 | |
24 | |
25 class HyperParser: | |
26 def __init__(self, editwin, index): | |
27 "To initialize, analyze the surroundings of the given index." | |
28 | |
29 self.editwin = editwin | |
30 self.text = text = editwin.text | |
31 | |
32 parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth) | |
33 | |
34 def index2line(index): | |
35 return int(float(index)) | |
36 lno = index2line(text.index(index)) | |
37 | |
38 if not editwin.prompt_last_line: | |
39 for context in editwin.num_context_lines: | |
40 startat = max(lno - context, 1) | |
41 startatindex = repr(startat) + ".0" | |
42 stopatindex = "%d.end" % lno | |
43 # We add the newline because PyParse requires a newline | |
44 # at end. We add a space so that index won't be at end | |
45 # of line, so that its status will be the same as the | |
46 # char before it, if should. | |
47 parser.set_code(text.get(startatindex, stopatindex)+' \n') | |
48 bod = parser.find_good_parse_start( | |
49 editwin._build_char_in_string_func(startatindex)) | |
50 if bod is not None or startat == 1: | |
51 break | |
52 parser.set_lo(bod or 0) | |
53 else: | |
54 r = text.tag_prevrange("console", index) | |
55 if r: | |
56 startatindex = r[1] | |
57 else: | |
58 startatindex = "1.0" | |
59 stopatindex = "%d.end" % lno | |
60 # We add the newline because PyParse requires it. We add a | |
61 # space so that index won't be at end of line, so that its | |
62 # status will be the same as the char before it, if should. | |
63 parser.set_code(text.get(startatindex, stopatindex)+' \n') | |
64 parser.set_lo(0) | |
65 | |
66 # We want what the parser has, minus the last newline and space. | |
67 self.rawtext = parser.code[:-2] | |
68 # Parser.code apparently preserves the statement we are in, so | |
69 # that stopatindex can be used to synchronize the string with | |
70 # the text box indices. | |
71 self.stopatindex = stopatindex | |
72 self.bracketing = parser.get_last_stmt_bracketing() | |
73 # find which pairs of bracketing are openers. These always | |
74 # correspond to a character of rawtext. | |
75 self.isopener = [i>0 and self.bracketing[i][1] > | |
76 self.bracketing[i-1][1] | |
77 for i in range(len(self.bracketing))] | |
78 | |
79 self.set_index(index) | |
80 | |
81 def set_index(self, index): | |
82 """Set the index to which the functions relate. | |
83 | |
84 The index must be in the same statement. | |
85 """ | |
86 indexinrawtext = (len(self.rawtext) - | |
87 len(self.text.get(index, self.stopatindex))) | |
88 if indexinrawtext < 0: | |
89 raise ValueError("Index %s precedes the analyzed statement" | |
90 % index) | |
91 self.indexinrawtext = indexinrawtext | |
92 # find the rightmost bracket to which index belongs | |
93 self.indexbracket = 0 | |
94 while (self.indexbracket < len(self.bracketing)-1 and | |
95 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext): | |
96 self.indexbracket += 1 | |
97 if (self.indexbracket < len(self.bracketing)-1 and | |
98 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and | |
99 not self.isopener[self.indexbracket+1]): | |
100 self.indexbracket += 1 | |
101 | |
102 def is_in_string(self): | |
103 """Is the index given to the HyperParser in a string?""" | |
104 # The bracket to which we belong should be an opener. | |
105 # If it's an opener, it has to have a character. | |
106 return (self.isopener[self.indexbracket] and | |
107 self.rawtext[self.bracketing[self.indexbracket][0]] | |
108 in ('"', "'")) | |
109 | |
110 def is_in_code(self): | |
111 """Is the index given to the HyperParser in normal code?""" | |
112 return (not self.isopener[self.indexbracket] or | |
113 self.rawtext[self.bracketing[self.indexbracket][0]] | |
114 not in ('#', '"', "'")) | |
115 | |
116 def get_surrounding_brackets(self, openers='([{', mustclose=False): | |
117 """Return bracket indexes or None. | |
118 | |
119 If the index given to the HyperParser is surrounded by a | |
120 bracket defined in openers (or at least has one before it), | |
121 return the indices of the opening bracket and the closing | |
122 bracket (or the end of line, whichever comes first). | |
123 | |
124 If it is not surrounded by brackets, or the end of line comes | |
125 before the closing bracket and mustclose is True, returns None. | |
126 """ | |
127 | |
128 bracketinglevel = self.bracketing[self.indexbracket][1] | |
129 before = self.indexbracket | |
130 while (not self.isopener[before] or | |
131 self.rawtext[self.bracketing[before][0]] not in openers or | |
132 self.bracketing[before][1] > bracketinglevel): | |
133 before -= 1 | |
134 if before < 0: | |
135 return None | |
136 bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) | |
137 after = self.indexbracket + 1 | |
138 while (after < len(self.bracketing) and | |
139 self.bracketing[after][1] >= bracketinglevel): | |
140 after += 1 | |
141 | |
142 beforeindex = self.text.index("%s-%dc" % | |
143 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) | |
144 if (after >= len(self.bracketing) or | |
145 self.bracketing[after][0] > len(self.rawtext)): | |
146 if mustclose: | |
147 return None | |
148 afterindex = self.stopatindex | |
149 else: | |
150 # We are after a real char, so it is a ')' and we give the | |
151 # index before it. | |
152 afterindex = self.text.index( | |
153 "%s-%dc" % (self.stopatindex, | |
154 len(self.rawtext)-(self.bracketing[after][0]-1))) | |
155 | |
156 return beforeindex, afterindex | |
157 | |
158 # the set of built-in identifiers which are also keywords, | |
159 # i.e. keyword.iskeyword() returns True for them | |
160 _ID_KEYWORDS = frozenset({"True", "False", "None"}) | |
161 | |
162 @classmethod | |
163 def _eat_identifier(cls, str, limit, pos): | |
164 """Given a string and pos, return the number of chars in the | |
165 identifier which ends at pos, or 0 if there is no such one. | |
166 | |
167 This ignores non-identifier eywords are not identifiers. | |
168 """ | |
169 is_ascii_id_char = _IS_ASCII_ID_CHAR | |
170 | |
171 # Start at the end (pos) and work backwards. | |
172 i = pos | |
173 | |
174 # Go backwards as long as the characters are valid ASCII | |
175 # identifier characters. This is an optimization, since it | |
176 # is faster in the common case where most of the characters | |
177 # are ASCII. | |
178 while i > limit and ( | |
179 ord(str[i - 1]) < 128 and | |
180 is_ascii_id_char[ord(str[i - 1])] | |
181 ): | |
182 i -= 1 | |
183 | |
184 # If the above loop ended due to reaching a non-ASCII | |
185 # character, continue going backwards using the most generic | |
186 # test for whether a string contains only valid identifier | |
187 # characters. | |
188 if i > limit and ord(str[i - 1]) >= 128: | |
189 while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier(): | |
190 i -= 4 | |
191 if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier(): | |
192 i -= 2 | |
193 if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier(): | |
194 i -= 1 | |
195 | |
196 # The identifier candidate starts here. If it isn't a valid | |
197 # identifier, don't eat anything. At this point that is only | |
198 # possible if the first character isn't a valid first | |
199 # character for an identifier. | |
200 if not str[i:pos].isidentifier(): | |
201 return 0 | |
202 elif i < pos: | |
203 # All characters in str[i:pos] are valid ASCII identifier | |
204 # characters, so it is enough to check that the first is | |
205 # valid as the first character of an identifier. | |
206 if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]: | |
207 return 0 | |
208 | |
209 # All keywords are valid identifiers, but should not be | |
210 # considered identifiers here, except for True, False and None. | |
211 if i < pos and ( | |
212 iskeyword(str[i:pos]) and | |
213 str[i:pos] not in cls._ID_KEYWORDS | |
214 ): | |
215 return 0 | |
216 | |
217 return pos - i | |
218 | |
219 # This string includes all chars that may be in a white space | |
220 _whitespace_chars = " \t\n\\" | |
221 | |
222 def get_expression(self): | |
223 """Return a string with the Python expression which ends at the | |
224 given index, which is empty if there is no real one. | |
225 """ | |
226 if not self.is_in_code(): | |
227 raise ValueError("get_expression should only be called " | |
228 "if index is inside a code.") | |
229 | |
230 rawtext = self.rawtext | |
231 bracketing = self.bracketing | |
232 | |
233 brck_index = self.indexbracket | |
234 brck_limit = bracketing[brck_index][0] | |
235 pos = self.indexinrawtext | |
236 | |
237 last_identifier_pos = pos | |
238 postdot_phase = True | |
239 | |
240 while 1: | |
241 # Eat whitespaces, comments, and if postdot_phase is False - a dot | |
242 while 1: | |
243 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: | |
244 # Eat a whitespace | |
245 pos -= 1 | |
246 elif (not postdot_phase and | |
247 pos > brck_limit and rawtext[pos-1] == '.'): | |
248 # Eat a dot | |
249 pos -= 1 | |
250 postdot_phase = True | |
251 # The next line will fail if we are *inside* a comment, | |
252 # but we shouldn't be. | |
253 elif (pos == brck_limit and brck_index > 0 and | |
254 rawtext[bracketing[brck_index-1][0]] == '#'): | |
255 # Eat a comment | |
256 brck_index -= 2 | |
257 brck_limit = bracketing[brck_index][0] | |
258 pos = bracketing[brck_index+1][0] | |
259 else: | |
260 # If we didn't eat anything, quit. | |
261 break | |
262 | |
263 if not postdot_phase: | |
264 # We didn't find a dot, so the expression end at the | |
265 # last identifier pos. | |
266 break | |
267 | |
268 ret = self._eat_identifier(rawtext, brck_limit, pos) | |
269 if ret: | |
270 # There is an identifier to eat | |
271 pos = pos - ret | |
272 last_identifier_pos = pos | |
273 # Now, to continue the search, we must find a dot. | |
274 postdot_phase = False | |
275 # (the loop continues now) | |
276 | |
277 elif pos == brck_limit: | |
278 # We are at a bracketing limit. If it is a closing | |
279 # bracket, eat the bracket, otherwise, stop the search. | |
280 level = bracketing[brck_index][1] | |
281 while brck_index > 0 and bracketing[brck_index-1][1] > level: | |
282 brck_index -= 1 | |
283 if bracketing[brck_index][0] == brck_limit: | |
284 # We were not at the end of a closing bracket | |
285 break | |
286 pos = bracketing[brck_index][0] | |
287 brck_index -= 1 | |
288 brck_limit = bracketing[brck_index][0] | |
289 last_identifier_pos = pos | |
290 if rawtext[pos] in "([": | |
291 # [] and () may be used after an identifier, so we | |
292 # continue. postdot_phase is True, so we don't allow a dot. | |
293 pass | |
294 else: | |
295 # We can't continue after other types of brackets | |
296 if rawtext[pos] in "'\"": | |
297 # Scan a string prefix | |
298 while pos > 0 and rawtext[pos - 1] in "rRbBuU": | |
299 pos -= 1 | |
300 last_identifier_pos = pos | |
301 break | |
302 | |
303 else: | |
304 # We've found an operator or something. | |
305 break | |
306 | |
307 return rawtext[last_identifier_pos:self.indexinrawtext] | |
308 | |
309 | |
310 if __name__ == '__main__': | |
311 from unittest import main | |
312 main('idlelib.idle_test.test_hyperparser', verbosity=2) |