Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/etree/ElementPath.py @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 # | |
2 # ElementTree | |
3 # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ | |
4 # | |
5 # limited xpath support for element trees | |
6 # | |
7 # history: | |
8 # 2003-05-23 fl created | |
9 # 2003-05-28 fl added support for // etc | |
10 # 2003-08-27 fl fixed parsing of periods in element names | |
11 # 2007-09-10 fl new selection engine | |
12 # 2007-09-12 fl fixed parent selector | |
13 # 2007-09-13 fl added iterfind; changed findall to return a list | |
14 # 2007-11-30 fl added namespaces support | |
15 # 2009-10-30 fl added child element value filter | |
16 # | |
17 # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. | |
18 # | |
19 # fredrik@pythonware.com | |
20 # http://www.pythonware.com | |
21 # | |
22 # -------------------------------------------------------------------- | |
23 # The ElementTree toolkit is | |
24 # | |
25 # Copyright (c) 1999-2009 by Fredrik Lundh | |
26 # | |
27 # By obtaining, using, and/or copying this software and/or its | |
28 # associated documentation, you agree that you have read, understood, | |
29 # and will comply with the following terms and conditions: | |
30 # | |
31 # Permission to use, copy, modify, and distribute this software and | |
32 # its associated documentation for any purpose and without fee is | |
33 # hereby granted, provided that the above copyright notice appears in | |
34 # all copies, and that both that copyright notice and this permission | |
35 # notice appear in supporting documentation, and that the name of | |
36 # Secret Labs AB or the author not be used in advertising or publicity | |
37 # pertaining to distribution of the software without specific, written | |
38 # prior permission. | |
39 # | |
40 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD | |
41 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- | |
42 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR | |
43 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY | |
44 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
45 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS | |
46 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | |
47 # OF THIS SOFTWARE. | |
48 # -------------------------------------------------------------------- | |
49 | |
50 # Licensed to PSF under a Contributor Agreement. | |
51 # See http://www.python.org/psf/license for licensing details. | |
52 | |
53 ## | |
54 # Implementation module for XPath support. There's usually no reason | |
55 # to import this module directly; the <b>ElementTree</b> does this for | |
56 # you, if needed. | |
57 ## | |
58 | |
59 import re | |
60 | |
61 xpath_tokenizer_re = re.compile( | |
62 r"(" | |
63 r"'[^']*'|\"[^\"]*\"|" | |
64 r"::|" | |
65 r"//?|" | |
66 r"\.\.|" | |
67 r"\(\)|" | |
68 r"[/.*:\[\]\(\)@=])|" | |
69 r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" | |
70 r"\s+" | |
71 ) | |
72 | |
73 def xpath_tokenizer(pattern, namespaces=None): | |
74 default_namespace = namespaces.get('') if namespaces else None | |
75 parsing_attribute = False | |
76 for token in xpath_tokenizer_re.findall(pattern): | |
77 ttype, tag = token | |
78 if tag and tag[0] != "{": | |
79 if ":" in tag: | |
80 prefix, uri = tag.split(":", 1) | |
81 try: | |
82 if not namespaces: | |
83 raise KeyError | |
84 yield ttype, "{%s}%s" % (namespaces[prefix], uri) | |
85 except KeyError: | |
86 raise SyntaxError("prefix %r not found in prefix map" % prefix) from None | |
87 elif default_namespace and not parsing_attribute: | |
88 yield ttype, "{%s}%s" % (default_namespace, tag) | |
89 else: | |
90 yield token | |
91 parsing_attribute = False | |
92 else: | |
93 yield token | |
94 parsing_attribute = ttype == '@' | |
95 | |
96 | |
97 def get_parent_map(context): | |
98 parent_map = context.parent_map | |
99 if parent_map is None: | |
100 context.parent_map = parent_map = {} | |
101 for p in context.root.iter(): | |
102 for e in p: | |
103 parent_map[e] = p | |
104 return parent_map | |
105 | |
106 | |
107 def _is_wildcard_tag(tag): | |
108 return tag[:3] == '{*}' or tag[-2:] == '}*' | |
109 | |
110 | |
111 def _prepare_tag(tag): | |
112 _isinstance, _str = isinstance, str | |
113 if tag == '{*}*': | |
114 # Same as '*', but no comments or processing instructions. | |
115 # It can be a surprise that '*' includes those, but there is no | |
116 # justification for '{*}*' doing the same. | |
117 def select(context, result): | |
118 for elem in result: | |
119 if _isinstance(elem.tag, _str): | |
120 yield elem | |
121 elif tag == '{}*': | |
122 # Any tag that is not in a namespace. | |
123 def select(context, result): | |
124 for elem in result: | |
125 el_tag = elem.tag | |
126 if _isinstance(el_tag, _str) and el_tag[0] != '{': | |
127 yield elem | |
128 elif tag[:3] == '{*}': | |
129 # The tag in any (or no) namespace. | |
130 suffix = tag[2:] # '}name' | |
131 no_ns = slice(-len(suffix), None) | |
132 tag = tag[3:] | |
133 def select(context, result): | |
134 for elem in result: | |
135 el_tag = elem.tag | |
136 if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix: | |
137 yield elem | |
138 elif tag[-2:] == '}*': | |
139 # Any tag in the given namespace. | |
140 ns = tag[:-1] | |
141 ns_only = slice(None, len(ns)) | |
142 def select(context, result): | |
143 for elem in result: | |
144 el_tag = elem.tag | |
145 if _isinstance(el_tag, _str) and el_tag[ns_only] == ns: | |
146 yield elem | |
147 else: | |
148 raise RuntimeError(f"internal parser error, got {tag}") | |
149 return select | |
150 | |
151 | |
152 def prepare_child(next, token): | |
153 tag = token[1] | |
154 if _is_wildcard_tag(tag): | |
155 select_tag = _prepare_tag(tag) | |
156 def select(context, result): | |
157 def select_child(result): | |
158 for elem in result: | |
159 yield from elem | |
160 return select_tag(context, select_child(result)) | |
161 else: | |
162 if tag[:2] == '{}': | |
163 tag = tag[2:] # '{}tag' == 'tag' | |
164 def select(context, result): | |
165 for elem in result: | |
166 for e in elem: | |
167 if e.tag == tag: | |
168 yield e | |
169 return select | |
170 | |
171 def prepare_star(next, token): | |
172 def select(context, result): | |
173 for elem in result: | |
174 yield from elem | |
175 return select | |
176 | |
177 def prepare_self(next, token): | |
178 def select(context, result): | |
179 yield from result | |
180 return select | |
181 | |
182 def prepare_descendant(next, token): | |
183 try: | |
184 token = next() | |
185 except StopIteration: | |
186 return | |
187 if token[0] == "*": | |
188 tag = "*" | |
189 elif not token[0]: | |
190 tag = token[1] | |
191 else: | |
192 raise SyntaxError("invalid descendant") | |
193 | |
194 if _is_wildcard_tag(tag): | |
195 select_tag = _prepare_tag(tag) | |
196 def select(context, result): | |
197 def select_child(result): | |
198 for elem in result: | |
199 for e in elem.iter(): | |
200 if e is not elem: | |
201 yield e | |
202 return select_tag(context, select_child(result)) | |
203 else: | |
204 if tag[:2] == '{}': | |
205 tag = tag[2:] # '{}tag' == 'tag' | |
206 def select(context, result): | |
207 for elem in result: | |
208 for e in elem.iter(tag): | |
209 if e is not elem: | |
210 yield e | |
211 return select | |
212 | |
213 def prepare_parent(next, token): | |
214 def select(context, result): | |
215 # FIXME: raise error if .. is applied at toplevel? | |
216 parent_map = get_parent_map(context) | |
217 result_map = {} | |
218 for elem in result: | |
219 if elem in parent_map: | |
220 parent = parent_map[elem] | |
221 if parent not in result_map: | |
222 result_map[parent] = None | |
223 yield parent | |
224 return select | |
225 | |
226 def prepare_predicate(next, token): | |
227 # FIXME: replace with real parser!!! refs: | |
228 # http://effbot.org/zone/simple-iterator-parser.htm | |
229 # http://javascript.crockford.com/tdop/tdop.html | |
230 signature = [] | |
231 predicate = [] | |
232 while 1: | |
233 try: | |
234 token = next() | |
235 except StopIteration: | |
236 return | |
237 if token[0] == "]": | |
238 break | |
239 if token == ('', ''): | |
240 # ignore whitespace | |
241 continue | |
242 if token[0] and token[0][:1] in "'\"": | |
243 token = "'", token[0][1:-1] | |
244 signature.append(token[0] or "-") | |
245 predicate.append(token[1]) | |
246 signature = "".join(signature) | |
247 # use signature to determine predicate type | |
248 if signature == "@-": | |
249 # [@attribute] predicate | |
250 key = predicate[1] | |
251 def select(context, result): | |
252 for elem in result: | |
253 if elem.get(key) is not None: | |
254 yield elem | |
255 return select | |
256 if signature == "@-='": | |
257 # [@attribute='value'] | |
258 key = predicate[1] | |
259 value = predicate[-1] | |
260 def select(context, result): | |
261 for elem in result: | |
262 if elem.get(key) == value: | |
263 yield elem | |
264 return select | |
265 if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): | |
266 # [tag] | |
267 tag = predicate[0] | |
268 def select(context, result): | |
269 for elem in result: | |
270 if elem.find(tag) is not None: | |
271 yield elem | |
272 return select | |
273 if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])): | |
274 # [.='value'] or [tag='value'] | |
275 tag = predicate[0] | |
276 value = predicate[-1] | |
277 if tag: | |
278 def select(context, result): | |
279 for elem in result: | |
280 for e in elem.findall(tag): | |
281 if "".join(e.itertext()) == value: | |
282 yield elem | |
283 break | |
284 else: | |
285 def select(context, result): | |
286 for elem in result: | |
287 if "".join(elem.itertext()) == value: | |
288 yield elem | |
289 return select | |
290 if signature == "-" or signature == "-()" or signature == "-()-": | |
291 # [index] or [last()] or [last()-index] | |
292 if signature == "-": | |
293 # [index] | |
294 index = int(predicate[0]) - 1 | |
295 if index < 0: | |
296 raise SyntaxError("XPath position >= 1 expected") | |
297 else: | |
298 if predicate[0] != "last": | |
299 raise SyntaxError("unsupported function") | |
300 if signature == "-()-": | |
301 try: | |
302 index = int(predicate[2]) - 1 | |
303 except ValueError: | |
304 raise SyntaxError("unsupported expression") | |
305 if index > -2: | |
306 raise SyntaxError("XPath offset from last() must be negative") | |
307 else: | |
308 index = -1 | |
309 def select(context, result): | |
310 parent_map = get_parent_map(context) | |
311 for elem in result: | |
312 try: | |
313 parent = parent_map[elem] | |
314 # FIXME: what if the selector is "*" ? | |
315 elems = list(parent.findall(elem.tag)) | |
316 if elems[index] is elem: | |
317 yield elem | |
318 except (IndexError, KeyError): | |
319 pass | |
320 return select | |
321 raise SyntaxError("invalid predicate") | |
322 | |
323 ops = { | |
324 "": prepare_child, | |
325 "*": prepare_star, | |
326 ".": prepare_self, | |
327 "..": prepare_parent, | |
328 "//": prepare_descendant, | |
329 "[": prepare_predicate, | |
330 } | |
331 | |
332 _cache = {} | |
333 | |
334 class _SelectorContext: | |
335 parent_map = None | |
336 def __init__(self, root): | |
337 self.root = root | |
338 | |
339 # -------------------------------------------------------------------- | |
340 | |
341 ## | |
342 # Generate all matching objects. | |
343 | |
344 def iterfind(elem, path, namespaces=None): | |
345 # compile selector pattern | |
346 if path[-1:] == "/": | |
347 path = path + "*" # implicit all (FIXME: keep this?) | |
348 | |
349 cache_key = (path,) | |
350 if namespaces: | |
351 cache_key += tuple(sorted(namespaces.items())) | |
352 | |
353 try: | |
354 selector = _cache[cache_key] | |
355 except KeyError: | |
356 if len(_cache) > 100: | |
357 _cache.clear() | |
358 if path[:1] == "/": | |
359 raise SyntaxError("cannot use absolute path on element") | |
360 next = iter(xpath_tokenizer(path, namespaces)).__next__ | |
361 try: | |
362 token = next() | |
363 except StopIteration: | |
364 return | |
365 selector = [] | |
366 while 1: | |
367 try: | |
368 selector.append(ops[token[0]](next, token)) | |
369 except StopIteration: | |
370 raise SyntaxError("invalid path") from None | |
371 try: | |
372 token = next() | |
373 if token[0] == "/": | |
374 token = next() | |
375 except StopIteration: | |
376 break | |
377 _cache[cache_key] = selector | |
378 # execute selector pattern | |
379 result = [elem] | |
380 context = _SelectorContext(elem) | |
381 for select in selector: | |
382 result = select(context, result) | |
383 return result | |
384 | |
385 ## | |
386 # Find first matching object. | |
387 | |
388 def find(elem, path, namespaces=None): | |
389 return next(iterfind(elem, path, namespaces), None) | |
390 | |
391 ## | |
392 # Find all matching objects. | |
393 | |
394 def findall(elem, path, namespaces=None): | |
395 return list(iterfind(elem, path, namespaces)) | |
396 | |
397 ## | |
398 # Find text for first matching object. | |
399 | |
400 def findtext(elem, path, default=None, namespaces=None): | |
401 try: | |
402 elem = next(iterfind(elem, path, namespaces)) | |
403 return elem.text or "" | |
404 except StopIteration: | |
405 return default |