jpayne@68: """Define partial Python code Parser used by editor and hyperparser.
jpayne@68: 
jpayne@68: Instances of ParseMap are used with str.translate.
jpayne@68: 
jpayne@68: The following bound search and match functions are defined:
jpayne@68: _synchre - start of popular statement;
jpayne@68: _junkre - whitespace or comment line;
jpayne@68: _match_stringre: string, possibly without closer;
jpayne@68: _itemre - line that may have bracket structure start;
jpayne@68: _closere - line that must be followed by dedent.
jpayne@68: _chew_ordinaryre - non-special characters.
jpayne@68: """
jpayne@68: import re
jpayne@68: 
jpayne@68: # Reason last statement is continued (or C_NONE if it's not).
jpayne@68: (C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
jpayne@68:  C_STRING_NEXT_LINES, C_BRACKET) = range(5)
jpayne@68: 
jpayne@68: # Find what looks like the start of a popular statement.
jpayne@68: 
jpayne@68: _synchre = re.compile(r"""
jpayne@68:     ^
jpayne@68:     [ \t]*
jpayne@68:     (?: while
jpayne@68:     |   else
jpayne@68:     |   def
jpayne@68:     |   return
jpayne@68:     |   assert
jpayne@68:     |   break
jpayne@68:     |   class
jpayne@68:     |   continue
jpayne@68:     |   elif
jpayne@68:     |   try
jpayne@68:     |   except
jpayne@68:     |   raise
jpayne@68:     |   import
jpayne@68:     |   yield
jpayne@68:     )
jpayne@68:     \b
jpayne@68: """, re.VERBOSE | re.MULTILINE).search
jpayne@68: 
jpayne@68: # Match blank line or non-indenting comment line.
jpayne@68: 
jpayne@68: _junkre = re.compile(r"""
jpayne@68:     [ \t]*
jpayne@68:     (?: \# \S .* )?
jpayne@68:     \n
jpayne@68: """, re.VERBOSE).match
jpayne@68: 
jpayne@68: # Match any flavor of string; the terminating quote is optional
jpayne@68: # so that we're robust in the face of incomplete program text.
jpayne@68: 
jpayne@68: _match_stringre = re.compile(r"""
jpayne@68:     \""" [^"\\]* (?:
jpayne@68:                      (?: \\. | "(?!"") )
jpayne@68:                      [^"\\]*
jpayne@68:                  )*
jpayne@68:     (?: \""" )?
jpayne@68: 
jpayne@68: |   " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
jpayne@68: 
jpayne@68: |   ''' [^'\\]* (?:
jpayne@68:                    (?: \\. | '(?!'') )
jpayne@68:                    [^'\\]*
jpayne@68:                 )*
jpayne@68:     (?: ''' )?
jpayne@68: 
jpayne@68: |   ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
jpayne@68: """, re.VERBOSE | re.DOTALL).match
jpayne@68: 
jpayne@68: # Match a line that starts with something interesting;
jpayne@68: # used to find the first item of a bracket structure.
jpayne@68: 
jpayne@68: _itemre = re.compile(r"""
jpayne@68:     [ \t]*
jpayne@68:     [^\s#\\]    # if we match, m.end()-1 is the interesting char
jpayne@68: """, re.VERBOSE).match
jpayne@68: 
jpayne@68: # Match start of statements that should be followed by a dedent.
jpayne@68: 
jpayne@68: _closere = re.compile(r"""
jpayne@68:     \s*
jpayne@68:     (?: return
jpayne@68:     |   break
jpayne@68:     |   continue
jpayne@68:     |   raise
jpayne@68:     |   pass
jpayne@68:     )
jpayne@68:     \b
jpayne@68: """, re.VERBOSE).match
jpayne@68: 
jpayne@68: # Chew up non-special chars as quickly as possible.  If match is
jpayne@68: # successful, m.end() less 1 is the index of the last boring char
jpayne@68: # matched.  If match is unsuccessful, the string starts with an
jpayne@68: # interesting char.
jpayne@68: 
jpayne@68: _chew_ordinaryre = re.compile(r"""
jpayne@68:     [^[\](){}#'"\\]+
jpayne@68: """, re.VERBOSE).match
jpayne@68: 
jpayne@68: 
jpayne@68: class ParseMap(dict):
jpayne@68:     r"""Dict subclass that maps anything not in dict to 'x'.
jpayne@68: 
jpayne@68:     This is designed to be used with str.translate in study1.
jpayne@68:     Anything not specifically mapped otherwise becomes 'x'.
jpayne@68:     Example: replace everything except whitespace with 'x'.
jpayne@68: 
jpayne@68:     >>> keepwhite = ParseMap((ord(c), ord(c)) for c in ' \t\n\r')
jpayne@68:     >>> "a + b\tc\nd".translate(keepwhite)
jpayne@68:     'x x x\tx\nx'
jpayne@68:     """
jpayne@68:     # Calling this triples access time; see bpo-32940
jpayne@68:     def __missing__(self, key):
jpayne@68:         return 120  # ord('x')
jpayne@68: 
jpayne@68: 
jpayne@68: # Map all ascii to 120 to avoid __missing__ call, then replace some.
jpayne@68: trans = ParseMap.fromkeys(range(128), 120)
jpayne@68: trans.update((ord(c), ord('(')) for c in "({[")  # open brackets => '(';
jpayne@68: trans.update((ord(c), ord(')')) for c in ")}]")  # close brackets => ')'.
jpayne@68: trans.update((ord(c), ord(c)) for c in "\"'\\\n#")  # Keep these.
jpayne@68: 
jpayne@68: 
jpayne@68: class Parser:
jpayne@68: 
jpayne@68:     def __init__(self, indentwidth, tabwidth):
jpayne@68:         self.indentwidth = indentwidth
jpayne@68:         self.tabwidth = tabwidth
jpayne@68: 
jpayne@68:     def set_code(self, s):
jpayne@68:         assert len(s) == 0 or s[-1] == '\n'
jpayne@68:         self.code = s
jpayne@68:         self.study_level = 0
jpayne@68: 
jpayne@68:     def find_good_parse_start(self, is_char_in_string=None,
jpayne@68:                               _synchre=_synchre):
jpayne@68:         """
jpayne@68:         Return index of a good place to begin parsing, as close to the
jpayne@68:         end of the string as possible.  This will be the start of some
jpayne@68:         popular stmt like "if" or "def".  Return None if none found:
jpayne@68:         the caller should pass more prior context then, if possible, or
jpayne@68:         if not (the entire program text up until the point of interest
jpayne@68:         has already been tried) pass 0 to set_lo().
jpayne@68: 
jpayne@68:         This will be reliable iff given a reliable is_char_in_string()
jpayne@68:         function, meaning that when it says "no", it's absolutely
jpayne@68:         guaranteed that the char is not in a string.
jpayne@68:         """
jpayne@68:         code, pos = self.code, None
jpayne@68: 
jpayne@68:         if not is_char_in_string:
jpayne@68:             # no clue -- make the caller pass everything
jpayne@68:             return None
jpayne@68: 
jpayne@68:         # Peek back from the end for a good place to start,
jpayne@68:         # but don't try too often; pos will be left None, or
jpayne@68:         # bumped to a legitimate synch point.
jpayne@68:         limit = len(code)
jpayne@68:         for tries in range(5):
jpayne@68:             i = code.rfind(":\n", 0, limit)
jpayne@68:             if i < 0:
jpayne@68:                 break
jpayne@68:             i = code.rfind('\n', 0, i) + 1  # start of colon line (-1+1=0)
jpayne@68:             m = _synchre(code, i, limit)
jpayne@68:             if m and not is_char_in_string(m.start()):
jpayne@68:                 pos = m.start()
jpayne@68:                 break
jpayne@68:             limit = i
jpayne@68:         if pos is None:
jpayne@68:             # Nothing looks like a block-opener, or stuff does
jpayne@68:             # but is_char_in_string keeps returning true; most likely
jpayne@68:             # we're in or near a giant string, the colorizer hasn't
jpayne@68:             # caught up enough to be helpful, or there simply *aren't*
jpayne@68:             # any interesting stmts.  In any of these cases we're
jpayne@68:             # going to have to parse the whole thing to be sure, so
jpayne@68:             # give it one last try from the start, but stop wasting
jpayne@68:             # time here regardless of the outcome.
jpayne@68:             m = _synchre(code)
jpayne@68:             if m and not is_char_in_string(m.start()):
jpayne@68:                 pos = m.start()
jpayne@68:             return pos
jpayne@68: 
jpayne@68:         # Peeking back worked; look forward until _synchre no longer
jpayne@68:         # matches.
jpayne@68:         i = pos + 1
jpayne@68:         while 1:
jpayne@68:             m = _synchre(code, i)
jpayne@68:             if m:
jpayne@68:                 s, i = m.span()
jpayne@68:                 if not is_char_in_string(s):
jpayne@68:                     pos = s
jpayne@68:             else:
jpayne@68:                 break
jpayne@68:         return pos
jpayne@68: 
jpayne@68:     def set_lo(self, lo):
jpayne@68:         """ Throw away the start of the string.
jpayne@68: 
jpayne@68:         Intended to be called with the result of find_good_parse_start().
jpayne@68:         """
jpayne@68:         assert lo == 0 or self.code[lo-1] == '\n'
jpayne@68:         if lo > 0:
jpayne@68:             self.code = self.code[lo:]
jpayne@68: 
jpayne@68:     def _study1(self):
jpayne@68:         """Find the line numbers of non-continuation lines.
jpayne@68: 
jpayne@68:         As quickly as humanly possible <wink>, find the line numbers (0-
jpayne@68:         based) of the non-continuation lines.
jpayne@68:         Creates self.{goodlines, continuation}.
jpayne@68:         """
jpayne@68:         if self.study_level >= 1:
jpayne@68:             return
jpayne@68:         self.study_level = 1
jpayne@68: 
jpayne@68:         # Map all uninteresting characters to "x", all open brackets
jpayne@68:         # to "(", all close brackets to ")", then collapse runs of
jpayne@68:         # uninteresting characters.  This can cut the number of chars
jpayne@68:         # by a factor of 10-40, and so greatly speed the following loop.
jpayne@68:         code = self.code
jpayne@68:         code = code.translate(trans)
jpayne@68:         code = code.replace('xxxxxxxx', 'x')
jpayne@68:         code = code.replace('xxxx', 'x')
jpayne@68:         code = code.replace('xx', 'x')
jpayne@68:         code = code.replace('xx', 'x')
jpayne@68:         code = code.replace('\nx', '\n')
jpayne@68:         # Replacing x\n with \n would be incorrect because
jpayne@68:         # x may be preceded by a backslash.
jpayne@68: 
jpayne@68:         # March over the squashed version of the program, accumulating
jpayne@68:         # the line numbers of non-continued stmts, and determining
jpayne@68:         # whether & why the last stmt is a continuation.
jpayne@68:         continuation = C_NONE
jpayne@68:         level = lno = 0     # level is nesting level; lno is line number
jpayne@68:         self.goodlines = goodlines = [0]
jpayne@68:         push_good = goodlines.append
jpayne@68:         i, n = 0, len(code)
jpayne@68:         while i < n:
jpayne@68:             ch = code[i]
jpayne@68:             i = i+1
jpayne@68: 
jpayne@68:             # cases are checked in decreasing order of frequency
jpayne@68:             if ch == 'x':
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             if ch == '\n':
jpayne@68:                 lno = lno + 1
jpayne@68:                 if level == 0:
jpayne@68:                     push_good(lno)
jpayne@68:                     # else we're in an unclosed bracket structure
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             if ch == '(':
jpayne@68:                 level = level + 1
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             if ch == ')':
jpayne@68:                 if level:
jpayne@68:                     level = level - 1
jpayne@68:                     # else the program is invalid, but we can't complain
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             if ch == '"' or ch == "'":
jpayne@68:                 # consume the string
jpayne@68:                 quote = ch
jpayne@68:                 if code[i-1:i+2] == quote * 3:
jpayne@68:                     quote = quote * 3
jpayne@68:                 firstlno = lno
jpayne@68:                 w = len(quote) - 1
jpayne@68:                 i = i+w
jpayne@68:                 while i < n:
jpayne@68:                     ch = code[i]
jpayne@68:                     i = i+1
jpayne@68: 
jpayne@68:                     if ch == 'x':
jpayne@68:                         continue
jpayne@68: 
jpayne@68:                     if code[i-1:i+w] == quote:
jpayne@68:                         i = i+w
jpayne@68:                         break
jpayne@68: 
jpayne@68:                     if ch == '\n':
jpayne@68:                         lno = lno + 1
jpayne@68:                         if w == 0:
jpayne@68:                             # unterminated single-quoted string
jpayne@68:                             if level == 0:
jpayne@68:                                 push_good(lno)
jpayne@68:                             break
jpayne@68:                         continue
jpayne@68: 
jpayne@68:                     if ch == '\\':
jpayne@68:                         assert i < n
jpayne@68:                         if code[i] == '\n':
jpayne@68:                             lno = lno + 1
jpayne@68:                         i = i+1
jpayne@68:                         continue
jpayne@68: 
jpayne@68:                     # else comment char or paren inside string
jpayne@68: 
jpayne@68:                 else:
jpayne@68:                     # didn't break out of the loop, so we're still
jpayne@68:                     # inside a string
jpayne@68:                     if (lno - 1) == firstlno:
jpayne@68:                         # before the previous \n in code, we were in the first
jpayne@68:                         # line of the string
jpayne@68:                         continuation = C_STRING_FIRST_LINE
jpayne@68:                     else:
jpayne@68:                         continuation = C_STRING_NEXT_LINES
jpayne@68:                 continue    # with outer loop
jpayne@68: 
jpayne@68:             if ch == '#':
jpayne@68:                 # consume the comment
jpayne@68:                 i = code.find('\n', i)
jpayne@68:                 assert i >= 0
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             assert ch == '\\'
jpayne@68:             assert i < n
jpayne@68:             if code[i] == '\n':
jpayne@68:                 lno = lno + 1
jpayne@68:                 if i+1 == n:
jpayne@68:                     continuation = C_BACKSLASH
jpayne@68:             i = i+1
jpayne@68: 
jpayne@68:         # The last stmt may be continued for all 3 reasons.
jpayne@68:         # String continuation takes precedence over bracket
jpayne@68:         # continuation, which beats backslash continuation.
jpayne@68:         if (continuation != C_STRING_FIRST_LINE
jpayne@68:             and continuation != C_STRING_NEXT_LINES and level > 0):
jpayne@68:             continuation = C_BRACKET
jpayne@68:         self.continuation = continuation
jpayne@68: 
jpayne@68:         # Push the final line number as a sentinel value, regardless of
jpayne@68:         # whether it's continued.
jpayne@68:         assert (continuation == C_NONE) == (goodlines[-1] == lno)
jpayne@68:         if goodlines[-1] != lno:
jpayne@68:             push_good(lno)
jpayne@68: 
jpayne@68:     def get_continuation_type(self):
jpayne@68:         self._study1()
jpayne@68:         return self.continuation
jpayne@68: 
jpayne@68:     def _study2(self):
jpayne@68:         """
jpayne@68:         study1 was sufficient to determine the continuation status,
jpayne@68:         but doing more requires looking at every character.  study2
jpayne@68:         does this for the last interesting statement in the block.
jpayne@68:         Creates:
jpayne@68:             self.stmt_start, stmt_end
jpayne@68:                 slice indices of last interesting stmt
jpayne@68:             self.stmt_bracketing
jpayne@68:                 the bracketing structure of the last interesting stmt; for
jpayne@68:                 example, for the statement "say(boo) or die",
jpayne@68:                 stmt_bracketing will be ((0, 0), (0, 1), (2, 0), (2, 1),
jpayne@68:                 (4, 0)). Strings and comments are treated as brackets, for
jpayne@68:                 the matter.
jpayne@68:             self.lastch
jpayne@68:                 last interesting character before optional trailing comment
jpayne@68:             self.lastopenbracketpos
jpayne@68:                 if continuation is C_BRACKET, index of last open bracket
jpayne@68:         """
jpayne@68:         if self.study_level >= 2:
jpayne@68:             return
jpayne@68:         self._study1()
jpayne@68:         self.study_level = 2
jpayne@68: 
jpayne@68:         # Set p and q to slice indices of last interesting stmt.
jpayne@68:         code, goodlines = self.code, self.goodlines
jpayne@68:         i = len(goodlines) - 1  # Index of newest line.
jpayne@68:         p = len(code)  # End of goodlines[i]
jpayne@68:         while i:
jpayne@68:             assert p
jpayne@68:             # Make p be the index of the stmt at line number goodlines[i].
jpayne@68:             # Move p back to the stmt at line number goodlines[i-1].
jpayne@68:             q = p
jpayne@68:             for nothing in range(goodlines[i-1], goodlines[i]):
jpayne@68:                 # tricky: sets p to 0 if no preceding newline
jpayne@68:                 p = code.rfind('\n', 0, p-1) + 1
jpayne@68:             # The stmt code[p:q] isn't a continuation, but may be blank
jpayne@68:             # or a non-indenting comment line.
jpayne@68:             if  _junkre(code, p):
jpayne@68:                 i = i-1
jpayne@68:             else:
jpayne@68:                 break
jpayne@68:         if i == 0:
jpayne@68:             # nothing but junk!
jpayne@68:             assert p == 0
jpayne@68:             q = p
jpayne@68:         self.stmt_start, self.stmt_end = p, q
jpayne@68: 
jpayne@68:         # Analyze this stmt, to find the last open bracket (if any)
jpayne@68:         # and last interesting character (if any).
jpayne@68:         lastch = ""
jpayne@68:         stack = []  # stack of open bracket indices
jpayne@68:         push_stack = stack.append
jpayne@68:         bracketing = [(p, 0)]
jpayne@68:         while p < q:
jpayne@68:             # suck up all except ()[]{}'"#\\
jpayne@68:             m = _chew_ordinaryre(code, p, q)
jpayne@68:             if m:
jpayne@68:                 # we skipped at least one boring char
jpayne@68:                 newp = m.end()
jpayne@68:                 # back up over totally boring whitespace
jpayne@68:                 i = newp - 1    # index of last boring char
jpayne@68:                 while i >= p and code[i] in " \t\n":
jpayne@68:                     i = i-1
jpayne@68:                 if i >= p:
jpayne@68:                     lastch = code[i]
jpayne@68:                 p = newp
jpayne@68:                 if p >= q:
jpayne@68:                     break
jpayne@68: 
jpayne@68:             ch = code[p]
jpayne@68: 
jpayne@68:             if ch in "([{":
jpayne@68:                 push_stack(p)
jpayne@68:                 bracketing.append((p, len(stack)))
jpayne@68:                 lastch = ch
jpayne@68:                 p = p+1
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             if ch in ")]}":
jpayne@68:                 if stack:
jpayne@68:                     del stack[-1]
jpayne@68:                 lastch = ch
jpayne@68:                 p = p+1
jpayne@68:                 bracketing.append((p, len(stack)))
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             if ch == '"' or ch == "'":
jpayne@68:                 # consume string
jpayne@68:                 # Note that study1 did this with a Python loop, but
jpayne@68:                 # we use a regexp here; the reason is speed in both
jpayne@68:                 # cases; the string may be huge, but study1 pre-squashed
jpayne@68:                 # strings to a couple of characters per line.  study1
jpayne@68:                 # also needed to keep track of newlines, and we don't
jpayne@68:                 # have to.
jpayne@68:                 bracketing.append((p, len(stack)+1))
jpayne@68:                 lastch = ch
jpayne@68:                 p = _match_stringre(code, p, q).end()
jpayne@68:                 bracketing.append((p, len(stack)))
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             if ch == '#':
jpayne@68:                 # consume comment and trailing newline
jpayne@68:                 bracketing.append((p, len(stack)+1))
jpayne@68:                 p = code.find('\n', p, q) + 1
jpayne@68:                 assert p > 0
jpayne@68:                 bracketing.append((p, len(stack)))
jpayne@68:                 continue
jpayne@68: 
jpayne@68:             assert ch == '\\'
jpayne@68:             p = p+1     # beyond backslash
jpayne@68:             assert p < q
jpayne@68:             if code[p] != '\n':
jpayne@68:                 # the program is invalid, but can't complain
jpayne@68:                 lastch = ch + code[p]
jpayne@68:             p = p+1     # beyond escaped char
jpayne@68: 
jpayne@68:         # end while p < q:
jpayne@68: 
jpayne@68:         self.lastch = lastch
jpayne@68:         self.lastopenbracketpos = stack[-1] if stack else None
jpayne@68:         self.stmt_bracketing = tuple(bracketing)
jpayne@68: 
jpayne@68:     def compute_bracket_indent(self):
jpayne@68:         """Return number of spaces the next line should be indented.
jpayne@68: 
jpayne@68:         Line continuation must be C_BRACKET.
jpayne@68:         """
jpayne@68:         self._study2()
jpayne@68:         assert self.continuation == C_BRACKET
jpayne@68:         j = self.lastopenbracketpos
jpayne@68:         code = self.code
jpayne@68:         n = len(code)
jpayne@68:         origi = i = code.rfind('\n', 0, j) + 1
jpayne@68:         j = j+1     # one beyond open bracket
jpayne@68:         # find first list item; set i to start of its line
jpayne@68:         while j < n:
jpayne@68:             m = _itemre(code, j)
jpayne@68:             if m:
jpayne@68:                 j = m.end() - 1     # index of first interesting char
jpayne@68:                 extra = 0
jpayne@68:                 break
jpayne@68:             else:
jpayne@68:                 # this line is junk; advance to next line
jpayne@68:                 i = j = code.find('\n', j) + 1
jpayne@68:         else:
jpayne@68:             # nothing interesting follows the bracket;
jpayne@68:             # reproduce the bracket line's indentation + a level
jpayne@68:             j = i = origi
jpayne@68:             while code[j] in " \t":
jpayne@68:                 j = j+1
jpayne@68:             extra = self.indentwidth
jpayne@68:         return len(code[i:j].expandtabs(self.tabwidth)) + extra
jpayne@68: 
jpayne@68:     def get_num_lines_in_stmt(self):
jpayne@68:         """Return number of physical lines in last stmt.
jpayne@68: 
jpayne@68:         The statement doesn't have to be an interesting statement.  This is
jpayne@68:         intended to be called when continuation is C_BACKSLASH.
jpayne@68:         """
jpayne@68:         self._study1()
jpayne@68:         goodlines = self.goodlines
jpayne@68:         return goodlines[-1] - goodlines[-2]
jpayne@68: 
jpayne@68:     def compute_backslash_indent(self):
jpayne@68:         """Return number of spaces the next line should be indented.
jpayne@68: 
jpayne@68:         Line continuation must be C_BACKSLASH.  Also assume that the new
jpayne@68:         line is the first one following the initial line of the stmt.
jpayne@68:         """
jpayne@68:         self._study2()
jpayne@68:         assert self.continuation == C_BACKSLASH
jpayne@68:         code = self.code
jpayne@68:         i = self.stmt_start
jpayne@68:         while code[i] in " \t":
jpayne@68:             i = i+1
jpayne@68:         startpos = i
jpayne@68: 
jpayne@68:         # See whether the initial line starts an assignment stmt; i.e.,
jpayne@68:         # look for an = operator
jpayne@68:         endpos = code.find('\n', startpos) + 1
jpayne@68:         found = level = 0
jpayne@68:         while i < endpos:
jpayne@68:             ch = code[i]
jpayne@68:             if ch in "([{":
jpayne@68:                 level = level + 1
jpayne@68:                 i = i+1
jpayne@68:             elif ch in ")]}":
jpayne@68:                 if level:
jpayne@68:                     level = level - 1
jpayne@68:                 i = i+1
jpayne@68:             elif ch == '"' or ch == "'":
jpayne@68:                 i = _match_stringre(code, i, endpos).end()
jpayne@68:             elif ch == '#':
jpayne@68:                 # This line is unreachable because the # makes a comment of
jpayne@68:                 # everything after it.
jpayne@68:                 break
jpayne@68:             elif level == 0 and ch == '=' and \
jpayne@68:                    (i == 0 or code[i-1] not in "=<>!") and \
jpayne@68:                    code[i+1] != '=':
jpayne@68:                 found = 1
jpayne@68:                 break
jpayne@68:             else:
jpayne@68:                 i = i+1
jpayne@68: 
jpayne@68:         if found:
jpayne@68:             # found a legit =, but it may be the last interesting
jpayne@68:             # thing on the line
jpayne@68:             i = i+1     # move beyond the =
jpayne@68:             found = re.match(r"\s*\\", code[i:endpos]) is None
jpayne@68: 
jpayne@68:         if not found:
jpayne@68:             # oh well ... settle for moving beyond the first chunk
jpayne@68:             # of non-whitespace chars
jpayne@68:             i = startpos
jpayne@68:             while code[i] not in " \t\n":
jpayne@68:                 i = i+1
jpayne@68: 
jpayne@68:         return len(code[self.stmt_start:i].expandtabs(\
jpayne@68:                                      self.tabwidth)) + 1
jpayne@68: 
jpayne@68:     def get_base_indent_string(self):
jpayne@68:         """Return the leading whitespace on the initial line of the last
jpayne@68:         interesting stmt.
jpayne@68:         """
jpayne@68:         self._study2()
jpayne@68:         i, n = self.stmt_start, self.stmt_end
jpayne@68:         j = i
jpayne@68:         code = self.code
jpayne@68:         while j < n and code[j] in " \t":
jpayne@68:             j = j + 1
jpayne@68:         return code[i:j]
jpayne@68: 
jpayne@68:     def is_block_opener(self):
jpayne@68:         "Return True if the last interesting statement opens a block."
jpayne@68:         self._study2()
jpayne@68:         return self.lastch == ':'
jpayne@68: 
jpayne@68:     def is_block_closer(self):
jpayne@68:         "Return True if the last interesting statement closes a block."
jpayne@68:         self._study2()
jpayne@68:         return _closere(self.code, self.stmt_start) is not None
jpayne@68: 
jpayne@68:     def get_last_stmt_bracketing(self):
jpayne@68:         """Return bracketing structure of the last interesting statement.
jpayne@68: 
jpayne@68:         The returned tuple is in the format defined in _study2().
jpayne@68:         """
jpayne@68:         self._study2()
jpayne@68:         return self.stmt_bracketing
jpayne@68: 
jpayne@68: 
jpayne@68: if __name__ == '__main__':
jpayne@68:     from unittest import main
jpayne@68:     main('idlelib.idle_test.test_pyparse', verbosity=2)