csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/idlelib/hyperparser.py annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/idlelib/hyperparser.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 16:23:26 -0400
parents
children

rev	line source
jpayne@68	1 """Provide advanced parsing abilities for ParenMatch and other extensions.
jpayne@68	2
jpayne@68	3 HyperParser uses PyParser. PyParser mostly gives information on the
jpayne@68	4 proper indentation of code. HyperParser gives additional information on
jpayne@68	5 the structure of code.
jpayne@68	6 """
jpayne@68	7 from keyword import iskeyword
jpayne@68	8 import string
jpayne@68	9
jpayne@68	10 from idlelib import pyparse
jpayne@68	11
jpayne@68	12 # all ASCII chars that may be in an identifier
jpayne@68	13 _ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
jpayne@68	14 # all ASCII chars that may be the first char of an identifier
jpayne@68	15 _ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")
jpayne@68	16
jpayne@68	17 # lookup table for whether 7-bit ASCII chars are valid in a Python identifier
jpayne@68	18 _IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
jpayne@68	19 # lookup table for whether 7-bit ASCII chars are valid as the first
jpayne@68	20 # char in a Python identifier
jpayne@68	21 _IS_ASCII_ID_FIRST_CHAR = \
jpayne@68	22 [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
jpayne@68	23
jpayne@68	24
jpayne@68	25 class HyperParser:
jpayne@68	26 def __init__(self, editwin, index):
jpayne@68	27 "To initialize, analyze the surroundings of the given index."
jpayne@68	28
jpayne@68	29 self.editwin = editwin
jpayne@68	30 self.text = text = editwin.text
jpayne@68	31
jpayne@68	32 parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth)
jpayne@68	33
jpayne@68	34 def index2line(index):
jpayne@68	35 return int(float(index))
jpayne@68	36 lno = index2line(text.index(index))
jpayne@68	37
jpayne@68	38 if not editwin.prompt_last_line:
jpayne@68	39 for context in editwin.num_context_lines:
jpayne@68	40 startat = max(lno - context, 1)
jpayne@68	41 startatindex = repr(startat) + ".0"
jpayne@68	42 stopatindex = "%d.end" % lno
jpayne@68	43 # We add the newline because PyParse requires a newline
jpayne@68	44 # at end. We add a space so that index won't be at end
jpayne@68	45 # of line, so that its status will be the same as the
jpayne@68	46 # char before it, if should.
jpayne@68	47 parser.set_code(text.get(startatindex, stopatindex)+' \n')
jpayne@68	48 bod = parser.find_good_parse_start(
jpayne@68	49 editwin._build_char_in_string_func(startatindex))
jpayne@68	50 if bod is not None or startat == 1:
jpayne@68	51 break
jpayne@68	52 parser.set_lo(bod or 0)
jpayne@68	53 else:
jpayne@68	54 r = text.tag_prevrange("console", index)
jpayne@68	55 if r:
jpayne@68	56 startatindex = r[1]
jpayne@68	57 else:
jpayne@68	58 startatindex = "1.0"
jpayne@68	59 stopatindex = "%d.end" % lno
jpayne@68	60 # We add the newline because PyParse requires it. We add a
jpayne@68	61 # space so that index won't be at end of line, so that its
jpayne@68	62 # status will be the same as the char before it, if should.
jpayne@68	63 parser.set_code(text.get(startatindex, stopatindex)+' \n')
jpayne@68	64 parser.set_lo(0)
jpayne@68	65
jpayne@68	66 # We want what the parser has, minus the last newline and space.
jpayne@68	67 self.rawtext = parser.code[:-2]
jpayne@68	68 # Parser.code apparently preserves the statement we are in, so
jpayne@68	69 # that stopatindex can be used to synchronize the string with
jpayne@68	70 # the text box indices.
jpayne@68	71 self.stopatindex = stopatindex
jpayne@68	72 self.bracketing = parser.get_last_stmt_bracketing()
jpayne@68	73 # find which pairs of bracketing are openers. These always
jpayne@68	74 # correspond to a character of rawtext.
jpayne@68	75 self.isopener = [i>0 and self.bracketing[i][1] >
jpayne@68	76 self.bracketing[i-1][1]
jpayne@68	77 for i in range(len(self.bracketing))]
jpayne@68	78
jpayne@68	79 self.set_index(index)
jpayne@68	80
jpayne@68	81 def set_index(self, index):
jpayne@68	82 """Set the index to which the functions relate.
jpayne@68	83
jpayne@68	84 The index must be in the same statement.
jpayne@68	85 """
jpayne@68	86 indexinrawtext = (len(self.rawtext) -
jpayne@68	87 len(self.text.get(index, self.stopatindex)))
jpayne@68	88 if indexinrawtext < 0:
jpayne@68	89 raise ValueError("Index %s precedes the analyzed statement"
jpayne@68	90 % index)
jpayne@68	91 self.indexinrawtext = indexinrawtext
jpayne@68	92 # find the rightmost bracket to which index belongs
jpayne@68	93 self.indexbracket = 0
jpayne@68	94 while (self.indexbracket < len(self.bracketing)-1 and
jpayne@68	95 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
jpayne@68	96 self.indexbracket += 1
jpayne@68	97 if (self.indexbracket < len(self.bracketing)-1 and
jpayne@68	98 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
jpayne@68	99 not self.isopener[self.indexbracket+1]):
jpayne@68	100 self.indexbracket += 1
jpayne@68	101
jpayne@68	102 def is_in_string(self):
jpayne@68	103 """Is the index given to the HyperParser in a string?"""
jpayne@68	104 # The bracket to which we belong should be an opener.
jpayne@68	105 # If it's an opener, it has to have a character.
jpayne@68	106 return (self.isopener[self.indexbracket] and
jpayne@68	107 self.rawtext[self.bracketing[self.indexbracket][0]]
jpayne@68	108 in ('"', "'"))
jpayne@68	109
jpayne@68	110 def is_in_code(self):
jpayne@68	111 """Is the index given to the HyperParser in normal code?"""
jpayne@68	112 return (not self.isopener[self.indexbracket] or
jpayne@68	113 self.rawtext[self.bracketing[self.indexbracket][0]]
jpayne@68	114 not in ('#', '"', "'"))
jpayne@68	115
jpayne@68	116 def get_surrounding_brackets(self, openers='([{', mustclose=False):
jpayne@68	117 """Return bracket indexes or None.
jpayne@68	118
jpayne@68	119 If the index given to the HyperParser is surrounded by a
jpayne@68	120 bracket defined in openers (or at least has one before it),
jpayne@68	121 return the indices of the opening bracket and the closing
jpayne@68	122 bracket (or the end of line, whichever comes first).
jpayne@68	123
jpayne@68	124 If it is not surrounded by brackets, or the end of line comes
jpayne@68	125 before the closing bracket and mustclose is True, returns None.
jpayne@68	126 """
jpayne@68	127
jpayne@68	128 bracketinglevel = self.bracketing[self.indexbracket][1]
jpayne@68	129 before = self.indexbracket
jpayne@68	130 while (not self.isopener[before] or
jpayne@68	131 self.rawtext[self.bracketing[before][0]] not in openers or
jpayne@68	132 self.bracketing[before][1] > bracketinglevel):
jpayne@68	133 before -= 1
jpayne@68	134 if before < 0:
jpayne@68	135 return None
jpayne@68	136 bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
jpayne@68	137 after = self.indexbracket + 1
jpayne@68	138 while (after < len(self.bracketing) and
jpayne@68	139 self.bracketing[after][1] >= bracketinglevel):
jpayne@68	140 after += 1
jpayne@68	141
jpayne@68	142 beforeindex = self.text.index("%s-%dc" %
jpayne@68	143 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
jpayne@68	144 if (after >= len(self.bracketing) or
jpayne@68	145 self.bracketing[after][0] > len(self.rawtext)):
jpayne@68	146 if mustclose:
jpayne@68	147 return None
jpayne@68	148 afterindex = self.stopatindex
jpayne@68	149 else:
jpayne@68	150 # We are after a real char, so it is a ')' and we give the
jpayne@68	151 # index before it.
jpayne@68	152 afterindex = self.text.index(
jpayne@68	153 "%s-%dc" % (self.stopatindex,
jpayne@68	154 len(self.rawtext)-(self.bracketing[after][0]-1)))
jpayne@68	155
jpayne@68	156 return beforeindex, afterindex
jpayne@68	157
jpayne@68	158 # the set of built-in identifiers which are also keywords,
jpayne@68	159 # i.e. keyword.iskeyword() returns True for them
jpayne@68	160 _ID_KEYWORDS = frozenset({"True", "False", "None"})
jpayne@68	161
jpayne@68	162 @classmethod
jpayne@68	163 def _eat_identifier(cls, str, limit, pos):
jpayne@68	164 """Given a string and pos, return the number of chars in the
jpayne@68	165 identifier which ends at pos, or 0 if there is no such one.
jpayne@68	166
jpayne@68	167 This ignores non-identifier eywords are not identifiers.
jpayne@68	168 """
jpayne@68	169 is_ascii_id_char = _IS_ASCII_ID_CHAR
jpayne@68	170
jpayne@68	171 # Start at the end (pos) and work backwards.
jpayne@68	172 i = pos
jpayne@68	173
jpayne@68	174 # Go backwards as long as the characters are valid ASCII
jpayne@68	175 # identifier characters. This is an optimization, since it
jpayne@68	176 # is faster in the common case where most of the characters
jpayne@68	177 # are ASCII.
jpayne@68	178 while i > limit and (
jpayne@68	179 ord(str[i - 1]) < 128 and
jpayne@68	180 is_ascii_id_char[ord(str[i - 1])]
jpayne@68	181 ):
jpayne@68	182 i -= 1
jpayne@68	183
jpayne@68	184 # If the above loop ended due to reaching a non-ASCII
jpayne@68	185 # character, continue going backwards using the most generic
jpayne@68	186 # test for whether a string contains only valid identifier
jpayne@68	187 # characters.
jpayne@68	188 if i > limit and ord(str[i - 1]) >= 128:
jpayne@68	189 while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
jpayne@68	190 i -= 4
jpayne@68	191 if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
jpayne@68	192 i -= 2
jpayne@68	193 if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
jpayne@68	194 i -= 1
jpayne@68	195
jpayne@68	196 # The identifier candidate starts here. If it isn't a valid
jpayne@68	197 # identifier, don't eat anything. At this point that is only
jpayne@68	198 # possible if the first character isn't a valid first
jpayne@68	199 # character for an identifier.
jpayne@68	200 if not str[i:pos].isidentifier():
jpayne@68	201 return 0
jpayne@68	202 elif i < pos:
jpayne@68	203 # All characters in str[i:pos] are valid ASCII identifier
jpayne@68	204 # characters, so it is enough to check that the first is
jpayne@68	205 # valid as the first character of an identifier.
jpayne@68	206 if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:
jpayne@68	207 return 0
jpayne@68	208
jpayne@68	209 # All keywords are valid identifiers, but should not be
jpayne@68	210 # considered identifiers here, except for True, False and None.
jpayne@68	211 if i < pos and (
jpayne@68	212 iskeyword(str[i:pos]) and
jpayne@68	213 str[i:pos] not in cls._ID_KEYWORDS
jpayne@68	214 ):
jpayne@68	215 return 0
jpayne@68	216
jpayne@68	217 return pos - i
jpayne@68	218
jpayne@68	219 # This string includes all chars that may be in a white space
jpayne@68	220 _whitespace_chars = " \t\n\\"
jpayne@68	221
jpayne@68	222 def get_expression(self):
jpayne@68	223 """Return a string with the Python expression which ends at the
jpayne@68	224 given index, which is empty if there is no real one.
jpayne@68	225 """
jpayne@68	226 if not self.is_in_code():
jpayne@68	227 raise ValueError("get_expression should only be called "
jpayne@68	228 "if index is inside a code.")
jpayne@68	229
jpayne@68	230 rawtext = self.rawtext
jpayne@68	231 bracketing = self.bracketing
jpayne@68	232
jpayne@68	233 brck_index = self.indexbracket
jpayne@68	234 brck_limit = bracketing[brck_index][0]
jpayne@68	235 pos = self.indexinrawtext
jpayne@68	236
jpayne@68	237 last_identifier_pos = pos
jpayne@68	238 postdot_phase = True
jpayne@68	239
jpayne@68	240 while 1:
jpayne@68	241 # Eat whitespaces, comments, and if postdot_phase is False - a dot
jpayne@68	242 while 1:
jpayne@68	243 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
jpayne@68	244 # Eat a whitespace
jpayne@68	245 pos -= 1
jpayne@68	246 elif (not postdot_phase and
jpayne@68	247 pos > brck_limit and rawtext[pos-1] == '.'):
jpayne@68	248 # Eat a dot
jpayne@68	249 pos -= 1
jpayne@68	250 postdot_phase = True
jpayne@68	251 # The next line will fail if we are inside a comment,
jpayne@68	252 # but we shouldn't be.
jpayne@68	253 elif (pos == brck_limit and brck_index > 0 and
jpayne@68	254 rawtext[bracketing[brck_index-1][0]] == '#'):
jpayne@68	255 # Eat a comment
jpayne@68	256 brck_index -= 2
jpayne@68	257 brck_limit = bracketing[brck_index][0]
jpayne@68	258 pos = bracketing[brck_index+1][0]
jpayne@68	259 else:
jpayne@68	260 # If we didn't eat anything, quit.
jpayne@68	261 break
jpayne@68	262
jpayne@68	263 if not postdot_phase:
jpayne@68	264 # We didn't find a dot, so the expression end at the
jpayne@68	265 # last identifier pos.
jpayne@68	266 break
jpayne@68	267
jpayne@68	268 ret = self._eat_identifier(rawtext, brck_limit, pos)
jpayne@68	269 if ret:
jpayne@68	270 # There is an identifier to eat
jpayne@68	271 pos = pos - ret
jpayne@68	272 last_identifier_pos = pos
jpayne@68	273 # Now, to continue the search, we must find a dot.
jpayne@68	274 postdot_phase = False
jpayne@68	275 # (the loop continues now)
jpayne@68	276
jpayne@68	277 elif pos == brck_limit:
jpayne@68	278 # We are at a bracketing limit. If it is a closing
jpayne@68	279 # bracket, eat the bracket, otherwise, stop the search.
jpayne@68	280 level = bracketing[brck_index][1]
jpayne@68	281 while brck_index > 0 and bracketing[brck_index-1][1] > level:
jpayne@68	282 brck_index -= 1
jpayne@68	283 if bracketing[brck_index][0] == brck_limit:
jpayne@68	284 # We were not at the end of a closing bracket
jpayne@68	285 break
jpayne@68	286 pos = bracketing[brck_index][0]
jpayne@68	287 brck_index -= 1
jpayne@68	288 brck_limit = bracketing[brck_index][0]
jpayne@68	289 last_identifier_pos = pos
jpayne@68	290 if rawtext[pos] in "([":
jpayne@68	291 # [] and () may be used after an identifier, so we
jpayne@68	292 # continue. postdot_phase is True, so we don't allow a dot.
jpayne@68	293 pass
jpayne@68	294 else:
jpayne@68	295 # We can't continue after other types of brackets
jpayne@68	296 if rawtext[pos] in "'\"":
jpayne@68	297 # Scan a string prefix
jpayne@68	298 while pos > 0 and rawtext[pos - 1] in "rRbBuU":
jpayne@68	299 pos -= 1
jpayne@68	300 last_identifier_pos = pos
jpayne@68	301 break
jpayne@68	302
jpayne@68	303 else:
jpayne@68	304 # We've found an operator or something.
jpayne@68	305 break
jpayne@68	306
jpayne@68	307 return rawtext[last_identifier_pos:self.indexinrawtext]
jpayne@68	308
jpayne@68	309
jpayne@68	310 if __name__ == '__main__':
jpayne@68	311 from unittest import main
jpayne@68	312 main('idlelib.idle_test.test_hyperparser', verbosity=2)

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/idlelib/hyperparser.py @ 68:5028fdace37b