jpayne@68
|
1 """Define partial Python code Parser used by editor and hyperparser.
|
jpayne@68
|
2
|
jpayne@68
|
3 Instances of ParseMap are used with str.translate.
|
jpayne@68
|
4
|
jpayne@68
|
5 The following bound search and match functions are defined:
|
jpayne@68
|
6 _synchre - start of popular statement;
|
jpayne@68
|
7 _junkre - whitespace or comment line;
|
jpayne@68
|
8 _match_stringre: string, possibly without closer;
|
jpayne@68
|
9 _itemre - line that may have bracket structure start;
|
jpayne@68
|
10 _closere - line that must be followed by dedent.
|
jpayne@68
|
11 _chew_ordinaryre - non-special characters.
|
jpayne@68
|
12 """
|
jpayne@68
|
13 import re
|
jpayne@68
|
14
|
jpayne@68
|
15 # Reason last statement is continued (or C_NONE if it's not).
|
jpayne@68
|
16 (C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
|
jpayne@68
|
17 C_STRING_NEXT_LINES, C_BRACKET) = range(5)
|
jpayne@68
|
18
|
jpayne@68
|
19 # Find what looks like the start of a popular statement.
|
jpayne@68
|
20
|
jpayne@68
|
21 _synchre = re.compile(r"""
|
jpayne@68
|
22 ^
|
jpayne@68
|
23 [ \t]*
|
jpayne@68
|
24 (?: while
|
jpayne@68
|
25 | else
|
jpayne@68
|
26 | def
|
jpayne@68
|
27 | return
|
jpayne@68
|
28 | assert
|
jpayne@68
|
29 | break
|
jpayne@68
|
30 | class
|
jpayne@68
|
31 | continue
|
jpayne@68
|
32 | elif
|
jpayne@68
|
33 | try
|
jpayne@68
|
34 | except
|
jpayne@68
|
35 | raise
|
jpayne@68
|
36 | import
|
jpayne@68
|
37 | yield
|
jpayne@68
|
38 )
|
jpayne@68
|
39 \b
|
jpayne@68
|
40 """, re.VERBOSE | re.MULTILINE).search
|
jpayne@68
|
41
|
jpayne@68
|
42 # Match blank line or non-indenting comment line.
|
jpayne@68
|
43
|
jpayne@68
|
44 _junkre = re.compile(r"""
|
jpayne@68
|
45 [ \t]*
|
jpayne@68
|
46 (?: \# \S .* )?
|
jpayne@68
|
47 \n
|
jpayne@68
|
48 """, re.VERBOSE).match
|
jpayne@68
|
49
|
jpayne@68
|
50 # Match any flavor of string; the terminating quote is optional
|
jpayne@68
|
51 # so that we're robust in the face of incomplete program text.
|
jpayne@68
|
52
|
jpayne@68
|
53 _match_stringre = re.compile(r"""
|
jpayne@68
|
54 \""" [^"\\]* (?:
|
jpayne@68
|
55 (?: \\. | "(?!"") )
|
jpayne@68
|
56 [^"\\]*
|
jpayne@68
|
57 )*
|
jpayne@68
|
58 (?: \""" )?
|
jpayne@68
|
59
|
jpayne@68
|
60 | " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
|
jpayne@68
|
61
|
jpayne@68
|
62 | ''' [^'\\]* (?:
|
jpayne@68
|
63 (?: \\. | '(?!'') )
|
jpayne@68
|
64 [^'\\]*
|
jpayne@68
|
65 )*
|
jpayne@68
|
66 (?: ''' )?
|
jpayne@68
|
67
|
jpayne@68
|
68 | ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
|
jpayne@68
|
69 """, re.VERBOSE | re.DOTALL).match
|
jpayne@68
|
70
|
jpayne@68
|
71 # Match a line that starts with something interesting;
|
jpayne@68
|
72 # used to find the first item of a bracket structure.
|
jpayne@68
|
73
|
jpayne@68
|
74 _itemre = re.compile(r"""
|
jpayne@68
|
75 [ \t]*
|
jpayne@68
|
76 [^\s#\\] # if we match, m.end()-1 is the interesting char
|
jpayne@68
|
77 """, re.VERBOSE).match
|
jpayne@68
|
78
|
jpayne@68
|
79 # Match start of statements that should be followed by a dedent.
|
jpayne@68
|
80
|
jpayne@68
|
81 _closere = re.compile(r"""
|
jpayne@68
|
82 \s*
|
jpayne@68
|
83 (?: return
|
jpayne@68
|
84 | break
|
jpayne@68
|
85 | continue
|
jpayne@68
|
86 | raise
|
jpayne@68
|
87 | pass
|
jpayne@68
|
88 )
|
jpayne@68
|
89 \b
|
jpayne@68
|
90 """, re.VERBOSE).match
|
jpayne@68
|
91
|
jpayne@68
|
92 # Chew up non-special chars as quickly as possible. If match is
|
jpayne@68
|
93 # successful, m.end() less 1 is the index of the last boring char
|
jpayne@68
|
94 # matched. If match is unsuccessful, the string starts with an
|
jpayne@68
|
95 # interesting char.
|
jpayne@68
|
96
|
jpayne@68
|
97 _chew_ordinaryre = re.compile(r"""
|
jpayne@68
|
98 [^[\](){}#'"\\]+
|
jpayne@68
|
99 """, re.VERBOSE).match
|
jpayne@68
|
100
|
jpayne@68
|
101
|
jpayne@68
|
102 class ParseMap(dict):
|
jpayne@68
|
103 r"""Dict subclass that maps anything not in dict to 'x'.
|
jpayne@68
|
104
|
jpayne@68
|
105 This is designed to be used with str.translate in study1.
|
jpayne@68
|
106 Anything not specifically mapped otherwise becomes 'x'.
|
jpayne@68
|
107 Example: replace everything except whitespace with 'x'.
|
jpayne@68
|
108
|
jpayne@68
|
109 >>> keepwhite = ParseMap((ord(c), ord(c)) for c in ' \t\n\r')
|
jpayne@68
|
110 >>> "a + b\tc\nd".translate(keepwhite)
|
jpayne@68
|
111 'x x x\tx\nx'
|
jpayne@68
|
112 """
|
jpayne@68
|
113 # Calling this triples access time; see bpo-32940
|
jpayne@68
|
114 def __missing__(self, key):
|
jpayne@68
|
115 return 120 # ord('x')
|
jpayne@68
|
116
|
jpayne@68
|
117
|
jpayne@68
|
118 # Map all ascii to 120 to avoid __missing__ call, then replace some.
|
jpayne@68
|
119 trans = ParseMap.fromkeys(range(128), 120)
|
jpayne@68
|
120 trans.update((ord(c), ord('(')) for c in "({[") # open brackets => '(';
|
jpayne@68
|
121 trans.update((ord(c), ord(')')) for c in ")}]") # close brackets => ')'.
|
jpayne@68
|
122 trans.update((ord(c), ord(c)) for c in "\"'\\\n#") # Keep these.
|
jpayne@68
|
123
|
jpayne@68
|
124
|
jpayne@68
|
125 class Parser:
|
jpayne@68
|
126
|
jpayne@68
|
127 def __init__(self, indentwidth, tabwidth):
|
jpayne@68
|
128 self.indentwidth = indentwidth
|
jpayne@68
|
129 self.tabwidth = tabwidth
|
jpayne@68
|
130
|
jpayne@68
|
131 def set_code(self, s):
|
jpayne@68
|
132 assert len(s) == 0 or s[-1] == '\n'
|
jpayne@68
|
133 self.code = s
|
jpayne@68
|
134 self.study_level = 0
|
jpayne@68
|
135
|
jpayne@68
|
136 def find_good_parse_start(self, is_char_in_string=None,
|
jpayne@68
|
137 _synchre=_synchre):
|
jpayne@68
|
138 """
|
jpayne@68
|
139 Return index of a good place to begin parsing, as close to the
|
jpayne@68
|
140 end of the string as possible. This will be the start of some
|
jpayne@68
|
141 popular stmt like "if" or "def". Return None if none found:
|
jpayne@68
|
142 the caller should pass more prior context then, if possible, or
|
jpayne@68
|
143 if not (the entire program text up until the point of interest
|
jpayne@68
|
144 has already been tried) pass 0 to set_lo().
|
jpayne@68
|
145
|
jpayne@68
|
146 This will be reliable iff given a reliable is_char_in_string()
|
jpayne@68
|
147 function, meaning that when it says "no", it's absolutely
|
jpayne@68
|
148 guaranteed that the char is not in a string.
|
jpayne@68
|
149 """
|
jpayne@68
|
150 code, pos = self.code, None
|
jpayne@68
|
151
|
jpayne@68
|
152 if not is_char_in_string:
|
jpayne@68
|
153 # no clue -- make the caller pass everything
|
jpayne@68
|
154 return None
|
jpayne@68
|
155
|
jpayne@68
|
156 # Peek back from the end for a good place to start,
|
jpayne@68
|
157 # but don't try too often; pos will be left None, or
|
jpayne@68
|
158 # bumped to a legitimate synch point.
|
jpayne@68
|
159 limit = len(code)
|
jpayne@68
|
160 for tries in range(5):
|
jpayne@68
|
161 i = code.rfind(":\n", 0, limit)
|
jpayne@68
|
162 if i < 0:
|
jpayne@68
|
163 break
|
jpayne@68
|
164 i = code.rfind('\n', 0, i) + 1 # start of colon line (-1+1=0)
|
jpayne@68
|
165 m = _synchre(code, i, limit)
|
jpayne@68
|
166 if m and not is_char_in_string(m.start()):
|
jpayne@68
|
167 pos = m.start()
|
jpayne@68
|
168 break
|
jpayne@68
|
169 limit = i
|
jpayne@68
|
170 if pos is None:
|
jpayne@68
|
171 # Nothing looks like a block-opener, or stuff does
|
jpayne@68
|
172 # but is_char_in_string keeps returning true; most likely
|
jpayne@68
|
173 # we're in or near a giant string, the colorizer hasn't
|
jpayne@68
|
174 # caught up enough to be helpful, or there simply *aren't*
|
jpayne@68
|
175 # any interesting stmts. In any of these cases we're
|
jpayne@68
|
176 # going to have to parse the whole thing to be sure, so
|
jpayne@68
|
177 # give it one last try from the start, but stop wasting
|
jpayne@68
|
178 # time here regardless of the outcome.
|
jpayne@68
|
179 m = _synchre(code)
|
jpayne@68
|
180 if m and not is_char_in_string(m.start()):
|
jpayne@68
|
181 pos = m.start()
|
jpayne@68
|
182 return pos
|
jpayne@68
|
183
|
jpayne@68
|
184 # Peeking back worked; look forward until _synchre no longer
|
jpayne@68
|
185 # matches.
|
jpayne@68
|
186 i = pos + 1
|
jpayne@68
|
187 while 1:
|
jpayne@68
|
188 m = _synchre(code, i)
|
jpayne@68
|
189 if m:
|
jpayne@68
|
190 s, i = m.span()
|
jpayne@68
|
191 if not is_char_in_string(s):
|
jpayne@68
|
192 pos = s
|
jpayne@68
|
193 else:
|
jpayne@68
|
194 break
|
jpayne@68
|
195 return pos
|
jpayne@68
|
196
|
jpayne@68
|
197 def set_lo(self, lo):
|
jpayne@68
|
198 """ Throw away the start of the string.
|
jpayne@68
|
199
|
jpayne@68
|
200 Intended to be called with the result of find_good_parse_start().
|
jpayne@68
|
201 """
|
jpayne@68
|
202 assert lo == 0 or self.code[lo-1] == '\n'
|
jpayne@68
|
203 if lo > 0:
|
jpayne@68
|
204 self.code = self.code[lo:]
|
jpayne@68
|
205
|
jpayne@68
|
206 def _study1(self):
|
jpayne@68
|
207 """Find the line numbers of non-continuation lines.
|
jpayne@68
|
208
|
jpayne@68
|
209 As quickly as humanly possible <wink>, find the line numbers (0-
|
jpayne@68
|
210 based) of the non-continuation lines.
|
jpayne@68
|
211 Creates self.{goodlines, continuation}.
|
jpayne@68
|
212 """
|
jpayne@68
|
213 if self.study_level >= 1:
|
jpayne@68
|
214 return
|
jpayne@68
|
215 self.study_level = 1
|
jpayne@68
|
216
|
jpayne@68
|
217 # Map all uninteresting characters to "x", all open brackets
|
jpayne@68
|
218 # to "(", all close brackets to ")", then collapse runs of
|
jpayne@68
|
219 # uninteresting characters. This can cut the number of chars
|
jpayne@68
|
220 # by a factor of 10-40, and so greatly speed the following loop.
|
jpayne@68
|
221 code = self.code
|
jpayne@68
|
222 code = code.translate(trans)
|
jpayne@68
|
223 code = code.replace('xxxxxxxx', 'x')
|
jpayne@68
|
224 code = code.replace('xxxx', 'x')
|
jpayne@68
|
225 code = code.replace('xx', 'x')
|
jpayne@68
|
226 code = code.replace('xx', 'x')
|
jpayne@68
|
227 code = code.replace('\nx', '\n')
|
jpayne@68
|
228 # Replacing x\n with \n would be incorrect because
|
jpayne@68
|
229 # x may be preceded by a backslash.
|
jpayne@68
|
230
|
jpayne@68
|
231 # March over the squashed version of the program, accumulating
|
jpayne@68
|
232 # the line numbers of non-continued stmts, and determining
|
jpayne@68
|
233 # whether & why the last stmt is a continuation.
|
jpayne@68
|
234 continuation = C_NONE
|
jpayne@68
|
235 level = lno = 0 # level is nesting level; lno is line number
|
jpayne@68
|
236 self.goodlines = goodlines = [0]
|
jpayne@68
|
237 push_good = goodlines.append
|
jpayne@68
|
238 i, n = 0, len(code)
|
jpayne@68
|
239 while i < n:
|
jpayne@68
|
240 ch = code[i]
|
jpayne@68
|
241 i = i+1
|
jpayne@68
|
242
|
jpayne@68
|
243 # cases are checked in decreasing order of frequency
|
jpayne@68
|
244 if ch == 'x':
|
jpayne@68
|
245 continue
|
jpayne@68
|
246
|
jpayne@68
|
247 if ch == '\n':
|
jpayne@68
|
248 lno = lno + 1
|
jpayne@68
|
249 if level == 0:
|
jpayne@68
|
250 push_good(lno)
|
jpayne@68
|
251 # else we're in an unclosed bracket structure
|
jpayne@68
|
252 continue
|
jpayne@68
|
253
|
jpayne@68
|
254 if ch == '(':
|
jpayne@68
|
255 level = level + 1
|
jpayne@68
|
256 continue
|
jpayne@68
|
257
|
jpayne@68
|
258 if ch == ')':
|
jpayne@68
|
259 if level:
|
jpayne@68
|
260 level = level - 1
|
jpayne@68
|
261 # else the program is invalid, but we can't complain
|
jpayne@68
|
262 continue
|
jpayne@68
|
263
|
jpayne@68
|
264 if ch == '"' or ch == "'":
|
jpayne@68
|
265 # consume the string
|
jpayne@68
|
266 quote = ch
|
jpayne@68
|
267 if code[i-1:i+2] == quote * 3:
|
jpayne@68
|
268 quote = quote * 3
|
jpayne@68
|
269 firstlno = lno
|
jpayne@68
|
270 w = len(quote) - 1
|
jpayne@68
|
271 i = i+w
|
jpayne@68
|
272 while i < n:
|
jpayne@68
|
273 ch = code[i]
|
jpayne@68
|
274 i = i+1
|
jpayne@68
|
275
|
jpayne@68
|
276 if ch == 'x':
|
jpayne@68
|
277 continue
|
jpayne@68
|
278
|
jpayne@68
|
279 if code[i-1:i+w] == quote:
|
jpayne@68
|
280 i = i+w
|
jpayne@68
|
281 break
|
jpayne@68
|
282
|
jpayne@68
|
283 if ch == '\n':
|
jpayne@68
|
284 lno = lno + 1
|
jpayne@68
|
285 if w == 0:
|
jpayne@68
|
286 # unterminated single-quoted string
|
jpayne@68
|
287 if level == 0:
|
jpayne@68
|
288 push_good(lno)
|
jpayne@68
|
289 break
|
jpayne@68
|
290 continue
|
jpayne@68
|
291
|
jpayne@68
|
292 if ch == '\\':
|
jpayne@68
|
293 assert i < n
|
jpayne@68
|
294 if code[i] == '\n':
|
jpayne@68
|
295 lno = lno + 1
|
jpayne@68
|
296 i = i+1
|
jpayne@68
|
297 continue
|
jpayne@68
|
298
|
jpayne@68
|
299 # else comment char or paren inside string
|
jpayne@68
|
300
|
jpayne@68
|
301 else:
|
jpayne@68
|
302 # didn't break out of the loop, so we're still
|
jpayne@68
|
303 # inside a string
|
jpayne@68
|
304 if (lno - 1) == firstlno:
|
jpayne@68
|
305 # before the previous \n in code, we were in the first
|
jpayne@68
|
306 # line of the string
|
jpayne@68
|
307 continuation = C_STRING_FIRST_LINE
|
jpayne@68
|
308 else:
|
jpayne@68
|
309 continuation = C_STRING_NEXT_LINES
|
jpayne@68
|
310 continue # with outer loop
|
jpayne@68
|
311
|
jpayne@68
|
312 if ch == '#':
|
jpayne@68
|
313 # consume the comment
|
jpayne@68
|
314 i = code.find('\n', i)
|
jpayne@68
|
315 assert i >= 0
|
jpayne@68
|
316 continue
|
jpayne@68
|
317
|
jpayne@68
|
318 assert ch == '\\'
|
jpayne@68
|
319 assert i < n
|
jpayne@68
|
320 if code[i] == '\n':
|
jpayne@68
|
321 lno = lno + 1
|
jpayne@68
|
322 if i+1 == n:
|
jpayne@68
|
323 continuation = C_BACKSLASH
|
jpayne@68
|
324 i = i+1
|
jpayne@68
|
325
|
jpayne@68
|
326 # The last stmt may be continued for all 3 reasons.
|
jpayne@68
|
327 # String continuation takes precedence over bracket
|
jpayne@68
|
328 # continuation, which beats backslash continuation.
|
jpayne@68
|
329 if (continuation != C_STRING_FIRST_LINE
|
jpayne@68
|
330 and continuation != C_STRING_NEXT_LINES and level > 0):
|
jpayne@68
|
331 continuation = C_BRACKET
|
jpayne@68
|
332 self.continuation = continuation
|
jpayne@68
|
333
|
jpayne@68
|
334 # Push the final line number as a sentinel value, regardless of
|
jpayne@68
|
335 # whether it's continued.
|
jpayne@68
|
336 assert (continuation == C_NONE) == (goodlines[-1] == lno)
|
jpayne@68
|
337 if goodlines[-1] != lno:
|
jpayne@68
|
338 push_good(lno)
|
jpayne@68
|
339
|
jpayne@68
|
340 def get_continuation_type(self):
|
jpayne@68
|
341 self._study1()
|
jpayne@68
|
342 return self.continuation
|
jpayne@68
|
343
|
jpayne@68
|
344 def _study2(self):
|
jpayne@68
|
345 """
|
jpayne@68
|
346 study1 was sufficient to determine the continuation status,
|
jpayne@68
|
347 but doing more requires looking at every character. study2
|
jpayne@68
|
348 does this for the last interesting statement in the block.
|
jpayne@68
|
349 Creates:
|
jpayne@68
|
350 self.stmt_start, stmt_end
|
jpayne@68
|
351 slice indices of last interesting stmt
|
jpayne@68
|
352 self.stmt_bracketing
|
jpayne@68
|
353 the bracketing structure of the last interesting stmt; for
|
jpayne@68
|
354 example, for the statement "say(boo) or die",
|
jpayne@68
|
355 stmt_bracketing will be ((0, 0), (0, 1), (2, 0), (2, 1),
|
jpayne@68
|
356 (4, 0)). Strings and comments are treated as brackets, for
|
jpayne@68
|
357 the matter.
|
jpayne@68
|
358 self.lastch
|
jpayne@68
|
359 last interesting character before optional trailing comment
|
jpayne@68
|
360 self.lastopenbracketpos
|
jpayne@68
|
361 if continuation is C_BRACKET, index of last open bracket
|
jpayne@68
|
362 """
|
jpayne@68
|
363 if self.study_level >= 2:
|
jpayne@68
|
364 return
|
jpayne@68
|
365 self._study1()
|
jpayne@68
|
366 self.study_level = 2
|
jpayne@68
|
367
|
jpayne@68
|
368 # Set p and q to slice indices of last interesting stmt.
|
jpayne@68
|
369 code, goodlines = self.code, self.goodlines
|
jpayne@68
|
370 i = len(goodlines) - 1 # Index of newest line.
|
jpayne@68
|
371 p = len(code) # End of goodlines[i]
|
jpayne@68
|
372 while i:
|
jpayne@68
|
373 assert p
|
jpayne@68
|
374 # Make p be the index of the stmt at line number goodlines[i].
|
jpayne@68
|
375 # Move p back to the stmt at line number goodlines[i-1].
|
jpayne@68
|
376 q = p
|
jpayne@68
|
377 for nothing in range(goodlines[i-1], goodlines[i]):
|
jpayne@68
|
378 # tricky: sets p to 0 if no preceding newline
|
jpayne@68
|
379 p = code.rfind('\n', 0, p-1) + 1
|
jpayne@68
|
380 # The stmt code[p:q] isn't a continuation, but may be blank
|
jpayne@68
|
381 # or a non-indenting comment line.
|
jpayne@68
|
382 if _junkre(code, p):
|
jpayne@68
|
383 i = i-1
|
jpayne@68
|
384 else:
|
jpayne@68
|
385 break
|
jpayne@68
|
386 if i == 0:
|
jpayne@68
|
387 # nothing but junk!
|
jpayne@68
|
388 assert p == 0
|
jpayne@68
|
389 q = p
|
jpayne@68
|
390 self.stmt_start, self.stmt_end = p, q
|
jpayne@68
|
391
|
jpayne@68
|
392 # Analyze this stmt, to find the last open bracket (if any)
|
jpayne@68
|
393 # and last interesting character (if any).
|
jpayne@68
|
394 lastch = ""
|
jpayne@68
|
395 stack = [] # stack of open bracket indices
|
jpayne@68
|
396 push_stack = stack.append
|
jpayne@68
|
397 bracketing = [(p, 0)]
|
jpayne@68
|
398 while p < q:
|
jpayne@68
|
399 # suck up all except ()[]{}'"#\\
|
jpayne@68
|
400 m = _chew_ordinaryre(code, p, q)
|
jpayne@68
|
401 if m:
|
jpayne@68
|
402 # we skipped at least one boring char
|
jpayne@68
|
403 newp = m.end()
|
jpayne@68
|
404 # back up over totally boring whitespace
|
jpayne@68
|
405 i = newp - 1 # index of last boring char
|
jpayne@68
|
406 while i >= p and code[i] in " \t\n":
|
jpayne@68
|
407 i = i-1
|
jpayne@68
|
408 if i >= p:
|
jpayne@68
|
409 lastch = code[i]
|
jpayne@68
|
410 p = newp
|
jpayne@68
|
411 if p >= q:
|
jpayne@68
|
412 break
|
jpayne@68
|
413
|
jpayne@68
|
414 ch = code[p]
|
jpayne@68
|
415
|
jpayne@68
|
416 if ch in "([{":
|
jpayne@68
|
417 push_stack(p)
|
jpayne@68
|
418 bracketing.append((p, len(stack)))
|
jpayne@68
|
419 lastch = ch
|
jpayne@68
|
420 p = p+1
|
jpayne@68
|
421 continue
|
jpayne@68
|
422
|
jpayne@68
|
423 if ch in ")]}":
|
jpayne@68
|
424 if stack:
|
jpayne@68
|
425 del stack[-1]
|
jpayne@68
|
426 lastch = ch
|
jpayne@68
|
427 p = p+1
|
jpayne@68
|
428 bracketing.append((p, len(stack)))
|
jpayne@68
|
429 continue
|
jpayne@68
|
430
|
jpayne@68
|
431 if ch == '"' or ch == "'":
|
jpayne@68
|
432 # consume string
|
jpayne@68
|
433 # Note that study1 did this with a Python loop, but
|
jpayne@68
|
434 # we use a regexp here; the reason is speed in both
|
jpayne@68
|
435 # cases; the string may be huge, but study1 pre-squashed
|
jpayne@68
|
436 # strings to a couple of characters per line. study1
|
jpayne@68
|
437 # also needed to keep track of newlines, and we don't
|
jpayne@68
|
438 # have to.
|
jpayne@68
|
439 bracketing.append((p, len(stack)+1))
|
jpayne@68
|
440 lastch = ch
|
jpayne@68
|
441 p = _match_stringre(code, p, q).end()
|
jpayne@68
|
442 bracketing.append((p, len(stack)))
|
jpayne@68
|
443 continue
|
jpayne@68
|
444
|
jpayne@68
|
445 if ch == '#':
|
jpayne@68
|
446 # consume comment and trailing newline
|
jpayne@68
|
447 bracketing.append((p, len(stack)+1))
|
jpayne@68
|
448 p = code.find('\n', p, q) + 1
|
jpayne@68
|
449 assert p > 0
|
jpayne@68
|
450 bracketing.append((p, len(stack)))
|
jpayne@68
|
451 continue
|
jpayne@68
|
452
|
jpayne@68
|
453 assert ch == '\\'
|
jpayne@68
|
454 p = p+1 # beyond backslash
|
jpayne@68
|
455 assert p < q
|
jpayne@68
|
456 if code[p] != '\n':
|
jpayne@68
|
457 # the program is invalid, but can't complain
|
jpayne@68
|
458 lastch = ch + code[p]
|
jpayne@68
|
459 p = p+1 # beyond escaped char
|
jpayne@68
|
460
|
jpayne@68
|
461 # end while p < q:
|
jpayne@68
|
462
|
jpayne@68
|
463 self.lastch = lastch
|
jpayne@68
|
464 self.lastopenbracketpos = stack[-1] if stack else None
|
jpayne@68
|
465 self.stmt_bracketing = tuple(bracketing)
|
jpayne@68
|
466
|
jpayne@68
|
467 def compute_bracket_indent(self):
|
jpayne@68
|
468 """Return number of spaces the next line should be indented.
|
jpayne@68
|
469
|
jpayne@68
|
470 Line continuation must be C_BRACKET.
|
jpayne@68
|
471 """
|
jpayne@68
|
472 self._study2()
|
jpayne@68
|
473 assert self.continuation == C_BRACKET
|
jpayne@68
|
474 j = self.lastopenbracketpos
|
jpayne@68
|
475 code = self.code
|
jpayne@68
|
476 n = len(code)
|
jpayne@68
|
477 origi = i = code.rfind('\n', 0, j) + 1
|
jpayne@68
|
478 j = j+1 # one beyond open bracket
|
jpayne@68
|
479 # find first list item; set i to start of its line
|
jpayne@68
|
480 while j < n:
|
jpayne@68
|
481 m = _itemre(code, j)
|
jpayne@68
|
482 if m:
|
jpayne@68
|
483 j = m.end() - 1 # index of first interesting char
|
jpayne@68
|
484 extra = 0
|
jpayne@68
|
485 break
|
jpayne@68
|
486 else:
|
jpayne@68
|
487 # this line is junk; advance to next line
|
jpayne@68
|
488 i = j = code.find('\n', j) + 1
|
jpayne@68
|
489 else:
|
jpayne@68
|
490 # nothing interesting follows the bracket;
|
jpayne@68
|
491 # reproduce the bracket line's indentation + a level
|
jpayne@68
|
492 j = i = origi
|
jpayne@68
|
493 while code[j] in " \t":
|
jpayne@68
|
494 j = j+1
|
jpayne@68
|
495 extra = self.indentwidth
|
jpayne@68
|
496 return len(code[i:j].expandtabs(self.tabwidth)) + extra
|
jpayne@68
|
497
|
jpayne@68
|
498 def get_num_lines_in_stmt(self):
|
jpayne@68
|
499 """Return number of physical lines in last stmt.
|
jpayne@68
|
500
|
jpayne@68
|
501 The statement doesn't have to be an interesting statement. This is
|
jpayne@68
|
502 intended to be called when continuation is C_BACKSLASH.
|
jpayne@68
|
503 """
|
jpayne@68
|
504 self._study1()
|
jpayne@68
|
505 goodlines = self.goodlines
|
jpayne@68
|
506 return goodlines[-1] - goodlines[-2]
|
jpayne@68
|
507
|
jpayne@68
|
508 def compute_backslash_indent(self):
|
jpayne@68
|
509 """Return number of spaces the next line should be indented.
|
jpayne@68
|
510
|
jpayne@68
|
511 Line continuation must be C_BACKSLASH. Also assume that the new
|
jpayne@68
|
512 line is the first one following the initial line of the stmt.
|
jpayne@68
|
513 """
|
jpayne@68
|
514 self._study2()
|
jpayne@68
|
515 assert self.continuation == C_BACKSLASH
|
jpayne@68
|
516 code = self.code
|
jpayne@68
|
517 i = self.stmt_start
|
jpayne@68
|
518 while code[i] in " \t":
|
jpayne@68
|
519 i = i+1
|
jpayne@68
|
520 startpos = i
|
jpayne@68
|
521
|
jpayne@68
|
522 # See whether the initial line starts an assignment stmt; i.e.,
|
jpayne@68
|
523 # look for an = operator
|
jpayne@68
|
524 endpos = code.find('\n', startpos) + 1
|
jpayne@68
|
525 found = level = 0
|
jpayne@68
|
526 while i < endpos:
|
jpayne@68
|
527 ch = code[i]
|
jpayne@68
|
528 if ch in "([{":
|
jpayne@68
|
529 level = level + 1
|
jpayne@68
|
530 i = i+1
|
jpayne@68
|
531 elif ch in ")]}":
|
jpayne@68
|
532 if level:
|
jpayne@68
|
533 level = level - 1
|
jpayne@68
|
534 i = i+1
|
jpayne@68
|
535 elif ch == '"' or ch == "'":
|
jpayne@68
|
536 i = _match_stringre(code, i, endpos).end()
|
jpayne@68
|
537 elif ch == '#':
|
jpayne@68
|
538 # This line is unreachable because the # makes a comment of
|
jpayne@68
|
539 # everything after it.
|
jpayne@68
|
540 break
|
jpayne@68
|
541 elif level == 0 and ch == '=' and \
|
jpayne@68
|
542 (i == 0 or code[i-1] not in "=<>!") and \
|
jpayne@68
|
543 code[i+1] != '=':
|
jpayne@68
|
544 found = 1
|
jpayne@68
|
545 break
|
jpayne@68
|
546 else:
|
jpayne@68
|
547 i = i+1
|
jpayne@68
|
548
|
jpayne@68
|
549 if found:
|
jpayne@68
|
550 # found a legit =, but it may be the last interesting
|
jpayne@68
|
551 # thing on the line
|
jpayne@68
|
552 i = i+1 # move beyond the =
|
jpayne@68
|
553 found = re.match(r"\s*\\", code[i:endpos]) is None
|
jpayne@68
|
554
|
jpayne@68
|
555 if not found:
|
jpayne@68
|
556 # oh well ... settle for moving beyond the first chunk
|
jpayne@68
|
557 # of non-whitespace chars
|
jpayne@68
|
558 i = startpos
|
jpayne@68
|
559 while code[i] not in " \t\n":
|
jpayne@68
|
560 i = i+1
|
jpayne@68
|
561
|
jpayne@68
|
562 return len(code[self.stmt_start:i].expandtabs(\
|
jpayne@68
|
563 self.tabwidth)) + 1
|
jpayne@68
|
564
|
jpayne@68
|
565 def get_base_indent_string(self):
|
jpayne@68
|
566 """Return the leading whitespace on the initial line of the last
|
jpayne@68
|
567 interesting stmt.
|
jpayne@68
|
568 """
|
jpayne@68
|
569 self._study2()
|
jpayne@68
|
570 i, n = self.stmt_start, self.stmt_end
|
jpayne@68
|
571 j = i
|
jpayne@68
|
572 code = self.code
|
jpayne@68
|
573 while j < n and code[j] in " \t":
|
jpayne@68
|
574 j = j + 1
|
jpayne@68
|
575 return code[i:j]
|
jpayne@68
|
576
|
jpayne@68
|
577 def is_block_opener(self):
|
jpayne@68
|
578 "Return True if the last interesting statement opens a block."
|
jpayne@68
|
579 self._study2()
|
jpayne@68
|
580 return self.lastch == ':'
|
jpayne@68
|
581
|
jpayne@68
|
582 def is_block_closer(self):
|
jpayne@68
|
583 "Return True if the last interesting statement closes a block."
|
jpayne@68
|
584 self._study2()
|
jpayne@68
|
585 return _closere(self.code, self.stmt_start) is not None
|
jpayne@68
|
586
|
jpayne@68
|
587 def get_last_stmt_bracketing(self):
|
jpayne@68
|
588 """Return bracketing structure of the last interesting statement.
|
jpayne@68
|
589
|
jpayne@68
|
590 The returned tuple is in the format defined in _study2().
|
jpayne@68
|
591 """
|
jpayne@68
|
592 self._study2()
|
jpayne@68
|
593 return self.stmt_bracketing
|
jpayne@68
|
594
|
jpayne@68
|
595
|
jpayne@68
|
596 if __name__ == '__main__':
|
jpayne@68
|
597 from unittest import main
|
jpayne@68
|
598 main('idlelib.idle_test.test_pyparse', verbosity=2)
|