Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/etree/ElementTree.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 """Lightweight XML support for Python. | |
2 | |
3 XML is an inherently hierarchical data format, and the most natural way to | |
4 represent it is with a tree. This module has two classes for this purpose: | |
5 | |
6 1. ElementTree represents the whole XML document as a tree and | |
7 | |
8 2. Element represents a single node in this tree. | |
9 | |
10 Interactions with the whole document (reading and writing to/from files) are | |
11 usually done on the ElementTree level. Interactions with a single XML element | |
12 and its sub-elements are done on the Element level. | |
13 | |
14 Element is a flexible container object designed to store hierarchical data | |
15 structures in memory. It can be described as a cross between a list and a | |
16 dictionary. Each Element has a number of properties associated with it: | |
17 | |
18 'tag' - a string containing the element's name. | |
19 | |
20 'attributes' - a Python dictionary storing the element's attributes. | |
21 | |
22 'text' - a string containing the element's text content. | |
23 | |
24 'tail' - an optional string containing text after the element's end tag. | |
25 | |
26 And a number of child elements stored in a Python sequence. | |
27 | |
28 To create an element instance, use the Element constructor, | |
29 or the SubElement factory function. | |
30 | |
31 You can also use the ElementTree class to wrap an element structure | |
32 and convert it to and from XML. | |
33 | |
34 """ | |
35 | |
36 #--------------------------------------------------------------------- | |
37 # Licensed to PSF under a Contributor Agreement. | |
38 # See http://www.python.org/psf/license for licensing details. | |
39 # | |
40 # ElementTree | |
41 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. | |
42 # | |
43 # fredrik@pythonware.com | |
44 # http://www.pythonware.com | |
45 # -------------------------------------------------------------------- | |
46 # The ElementTree toolkit is | |
47 # | |
48 # Copyright (c) 1999-2008 by Fredrik Lundh | |
49 # | |
50 # By obtaining, using, and/or copying this software and/or its | |
51 # associated documentation, you agree that you have read, understood, | |
52 # and will comply with the following terms and conditions: | |
53 # | |
54 # Permission to use, copy, modify, and distribute this software and | |
55 # its associated documentation for any purpose and without fee is | |
56 # hereby granted, provided that the above copyright notice appears in | |
57 # all copies, and that both that copyright notice and this permission | |
58 # notice appear in supporting documentation, and that the name of | |
59 # Secret Labs AB or the author not be used in advertising or publicity | |
60 # pertaining to distribution of the software without specific, written | |
61 # prior permission. | |
62 # | |
63 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD | |
64 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- | |
65 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR | |
66 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY | |
67 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
68 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS | |
69 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | |
70 # OF THIS SOFTWARE. | |
71 # -------------------------------------------------------------------- | |
72 | |
73 __all__ = [ | |
74 # public symbols | |
75 "Comment", | |
76 "dump", | |
77 "Element", "ElementTree", | |
78 "fromstring", "fromstringlist", | |
79 "iselement", "iterparse", | |
80 "parse", "ParseError", | |
81 "PI", "ProcessingInstruction", | |
82 "QName", | |
83 "SubElement", | |
84 "tostring", "tostringlist", | |
85 "TreeBuilder", | |
86 "VERSION", | |
87 "XML", "XMLID", | |
88 "XMLParser", "XMLPullParser", | |
89 "register_namespace", | |
90 "canonicalize", "C14NWriterTarget", | |
91 ] | |
92 | |
93 VERSION = "1.3.0" | |
94 | |
95 import sys | |
96 import re | |
97 import warnings | |
98 import io | |
99 import collections | |
100 import collections.abc | |
101 import contextlib | |
102 | |
103 from . import ElementPath | |
104 | |
105 | |
106 class ParseError(SyntaxError): | |
107 """An error when parsing an XML document. | |
108 | |
109 In addition to its exception value, a ParseError contains | |
110 two extra attributes: | |
111 'code' - the specific exception code | |
112 'position' - the line and column of the error | |
113 | |
114 """ | |
115 pass | |
116 | |
117 # -------------------------------------------------------------------- | |
118 | |
119 | |
120 def iselement(element): | |
121 """Return True if *element* appears to be an Element.""" | |
122 return hasattr(element, 'tag') | |
123 | |
124 | |
125 class Element: | |
126 """An XML element. | |
127 | |
128 This class is the reference implementation of the Element interface. | |
129 | |
130 An element's length is its number of subelements. That means if you | |
131 want to check if an element is truly empty, you should check BOTH | |
132 its length AND its text attribute. | |
133 | |
134 The element tag, attribute names, and attribute values can be either | |
135 bytes or strings. | |
136 | |
137 *tag* is the element name. *attrib* is an optional dictionary containing | |
138 element attributes. *extra* are additional element attributes given as | |
139 keyword arguments. | |
140 | |
141 Example form: | |
142 <tag attrib>text<child/>...</tag>tail | |
143 | |
144 """ | |
145 | |
146 tag = None | |
147 """The element's name.""" | |
148 | |
149 attrib = None | |
150 """Dictionary of the element's attributes.""" | |
151 | |
152 text = None | |
153 """ | |
154 Text before first subelement. This is either a string or the value None. | |
155 Note that if there is no text, this attribute may be either | |
156 None or the empty string, depending on the parser. | |
157 | |
158 """ | |
159 | |
160 tail = None | |
161 """ | |
162 Text after this element's end tag, but before the next sibling element's | |
163 start tag. This is either a string or the value None. Note that if there | |
164 was no text, this attribute may be either None or an empty string, | |
165 depending on the parser. | |
166 | |
167 """ | |
168 | |
169 def __init__(self, tag, attrib={}, **extra): | |
170 if not isinstance(attrib, dict): | |
171 raise TypeError("attrib must be dict, not %s" % ( | |
172 attrib.__class__.__name__,)) | |
173 self.tag = tag | |
174 self.attrib = {**attrib, **extra} | |
175 self._children = [] | |
176 | |
177 def __repr__(self): | |
178 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self)) | |
179 | |
180 def makeelement(self, tag, attrib): | |
181 """Create a new element with the same type. | |
182 | |
183 *tag* is a string containing the element name. | |
184 *attrib* is a dictionary containing the element attributes. | |
185 | |
186 Do not call this method, use the SubElement factory function instead. | |
187 | |
188 """ | |
189 return self.__class__(tag, attrib) | |
190 | |
191 def copy(self): | |
192 """Return copy of current element. | |
193 | |
194 This creates a shallow copy. Subelements will be shared with the | |
195 original tree. | |
196 | |
197 """ | |
198 elem = self.makeelement(self.tag, self.attrib) | |
199 elem.text = self.text | |
200 elem.tail = self.tail | |
201 elem[:] = self | |
202 return elem | |
203 | |
204 def __len__(self): | |
205 return len(self._children) | |
206 | |
207 def __bool__(self): | |
208 warnings.warn( | |
209 "The behavior of this method will change in future versions. " | |
210 "Use specific 'len(elem)' or 'elem is not None' test instead.", | |
211 FutureWarning, stacklevel=2 | |
212 ) | |
213 return len(self._children) != 0 # emulate old behaviour, for now | |
214 | |
215 def __getitem__(self, index): | |
216 return self._children[index] | |
217 | |
218 def __setitem__(self, index, element): | |
219 if isinstance(index, slice): | |
220 for elt in element: | |
221 self._assert_is_element(elt) | |
222 else: | |
223 self._assert_is_element(element) | |
224 self._children[index] = element | |
225 | |
226 def __delitem__(self, index): | |
227 del self._children[index] | |
228 | |
229 def append(self, subelement): | |
230 """Add *subelement* to the end of this element. | |
231 | |
232 The new element will appear in document order after the last existing | |
233 subelement (or directly after the text, if it's the first subelement), | |
234 but before the end tag for this element. | |
235 | |
236 """ | |
237 self._assert_is_element(subelement) | |
238 self._children.append(subelement) | |
239 | |
240 def extend(self, elements): | |
241 """Append subelements from a sequence. | |
242 | |
243 *elements* is a sequence with zero or more elements. | |
244 | |
245 """ | |
246 for element in elements: | |
247 self._assert_is_element(element) | |
248 self._children.extend(elements) | |
249 | |
250 def insert(self, index, subelement): | |
251 """Insert *subelement* at position *index*.""" | |
252 self._assert_is_element(subelement) | |
253 self._children.insert(index, subelement) | |
254 | |
255 def _assert_is_element(self, e): | |
256 # Need to refer to the actual Python implementation, not the | |
257 # shadowing C implementation. | |
258 if not isinstance(e, _Element_Py): | |
259 raise TypeError('expected an Element, not %s' % type(e).__name__) | |
260 | |
261 def remove(self, subelement): | |
262 """Remove matching subelement. | |
263 | |
264 Unlike the find methods, this method compares elements based on | |
265 identity, NOT ON tag value or contents. To remove subelements by | |
266 other means, the easiest way is to use a list comprehension to | |
267 select what elements to keep, and then use slice assignment to update | |
268 the parent element. | |
269 | |
270 ValueError is raised if a matching element could not be found. | |
271 | |
272 """ | |
273 # assert iselement(element) | |
274 self._children.remove(subelement) | |
275 | |
276 def getchildren(self): | |
277 """(Deprecated) Return all subelements. | |
278 | |
279 Elements are returned in document order. | |
280 | |
281 """ | |
282 warnings.warn( | |
283 "This method will be removed in future versions. " | |
284 "Use 'list(elem)' or iteration over elem instead.", | |
285 DeprecationWarning, stacklevel=2 | |
286 ) | |
287 return self._children | |
288 | |
289 def find(self, path, namespaces=None): | |
290 """Find first matching element by tag name or path. | |
291 | |
292 *path* is a string having either an element tag or an XPath, | |
293 *namespaces* is an optional mapping from namespace prefix to full name. | |
294 | |
295 Return the first matching element, or None if no element was found. | |
296 | |
297 """ | |
298 return ElementPath.find(self, path, namespaces) | |
299 | |
300 def findtext(self, path, default=None, namespaces=None): | |
301 """Find text for first matching element by tag name or path. | |
302 | |
303 *path* is a string having either an element tag or an XPath, | |
304 *default* is the value to return if the element was not found, | |
305 *namespaces* is an optional mapping from namespace prefix to full name. | |
306 | |
307 Return text content of first matching element, or default value if | |
308 none was found. Note that if an element is found having no text | |
309 content, the empty string is returned. | |
310 | |
311 """ | |
312 return ElementPath.findtext(self, path, default, namespaces) | |
313 | |
314 def findall(self, path, namespaces=None): | |
315 """Find all matching subelements by tag name or path. | |
316 | |
317 *path* is a string having either an element tag or an XPath, | |
318 *namespaces* is an optional mapping from namespace prefix to full name. | |
319 | |
320 Returns list containing all matching elements in document order. | |
321 | |
322 """ | |
323 return ElementPath.findall(self, path, namespaces) | |
324 | |
325 def iterfind(self, path, namespaces=None): | |
326 """Find all matching subelements by tag name or path. | |
327 | |
328 *path* is a string having either an element tag or an XPath, | |
329 *namespaces* is an optional mapping from namespace prefix to full name. | |
330 | |
331 Return an iterable yielding all matching elements in document order. | |
332 | |
333 """ | |
334 return ElementPath.iterfind(self, path, namespaces) | |
335 | |
336 def clear(self): | |
337 """Reset element. | |
338 | |
339 This function removes all subelements, clears all attributes, and sets | |
340 the text and tail attributes to None. | |
341 | |
342 """ | |
343 self.attrib.clear() | |
344 self._children = [] | |
345 self.text = self.tail = None | |
346 | |
347 def get(self, key, default=None): | |
348 """Get element attribute. | |
349 | |
350 Equivalent to attrib.get, but some implementations may handle this a | |
351 bit more efficiently. *key* is what attribute to look for, and | |
352 *default* is what to return if the attribute was not found. | |
353 | |
354 Returns a string containing the attribute value, or the default if | |
355 attribute was not found. | |
356 | |
357 """ | |
358 return self.attrib.get(key, default) | |
359 | |
360 def set(self, key, value): | |
361 """Set element attribute. | |
362 | |
363 Equivalent to attrib[key] = value, but some implementations may handle | |
364 this a bit more efficiently. *key* is what attribute to set, and | |
365 *value* is the attribute value to set it to. | |
366 | |
367 """ | |
368 self.attrib[key] = value | |
369 | |
370 def keys(self): | |
371 """Get list of attribute names. | |
372 | |
373 Names are returned in an arbitrary order, just like an ordinary | |
374 Python dict. Equivalent to attrib.keys() | |
375 | |
376 """ | |
377 return self.attrib.keys() | |
378 | |
379 def items(self): | |
380 """Get element attributes as a sequence. | |
381 | |
382 The attributes are returned in arbitrary order. Equivalent to | |
383 attrib.items(). | |
384 | |
385 Return a list of (name, value) tuples. | |
386 | |
387 """ | |
388 return self.attrib.items() | |
389 | |
390 def iter(self, tag=None): | |
391 """Create tree iterator. | |
392 | |
393 The iterator loops over the element and all subelements in document | |
394 order, returning all elements with a matching tag. | |
395 | |
396 If the tree structure is modified during iteration, new or removed | |
397 elements may or may not be included. To get a stable set, use the | |
398 list() function on the iterator, and loop over the resulting list. | |
399 | |
400 *tag* is what tags to look for (default is to return all elements) | |
401 | |
402 Return an iterator containing all the matching elements. | |
403 | |
404 """ | |
405 if tag == "*": | |
406 tag = None | |
407 if tag is None or self.tag == tag: | |
408 yield self | |
409 for e in self._children: | |
410 yield from e.iter(tag) | |
411 | |
412 # compatibility | |
413 def getiterator(self, tag=None): | |
414 warnings.warn( | |
415 "This method will be removed in future versions. " | |
416 "Use 'elem.iter()' or 'list(elem.iter())' instead.", | |
417 DeprecationWarning, stacklevel=2 | |
418 ) | |
419 return list(self.iter(tag)) | |
420 | |
421 def itertext(self): | |
422 """Create text iterator. | |
423 | |
424 The iterator loops over the element and all subelements in document | |
425 order, returning all inner text. | |
426 | |
427 """ | |
428 tag = self.tag | |
429 if not isinstance(tag, str) and tag is not None: | |
430 return | |
431 t = self.text | |
432 if t: | |
433 yield t | |
434 for e in self: | |
435 yield from e.itertext() | |
436 t = e.tail | |
437 if t: | |
438 yield t | |
439 | |
440 | |
441 def SubElement(parent, tag, attrib={}, **extra): | |
442 """Subelement factory which creates an element instance, and appends it | |
443 to an existing parent. | |
444 | |
445 The element tag, attribute names, and attribute values can be either | |
446 bytes or Unicode strings. | |
447 | |
448 *parent* is the parent element, *tag* is the subelements name, *attrib* is | |
449 an optional directory containing element attributes, *extra* are | |
450 additional attributes given as keyword arguments. | |
451 | |
452 """ | |
453 attrib = {**attrib, **extra} | |
454 element = parent.makeelement(tag, attrib) | |
455 parent.append(element) | |
456 return element | |
457 | |
458 | |
459 def Comment(text=None): | |
460 """Comment element factory. | |
461 | |
462 This function creates a special element which the standard serializer | |
463 serializes as an XML comment. | |
464 | |
465 *text* is a string containing the comment string. | |
466 | |
467 """ | |
468 element = Element(Comment) | |
469 element.text = text | |
470 return element | |
471 | |
472 | |
473 def ProcessingInstruction(target, text=None): | |
474 """Processing Instruction element factory. | |
475 | |
476 This function creates a special element which the standard serializer | |
477 serializes as an XML comment. | |
478 | |
479 *target* is a string containing the processing instruction, *text* is a | |
480 string containing the processing instruction contents, if any. | |
481 | |
482 """ | |
483 element = Element(ProcessingInstruction) | |
484 element.text = target | |
485 if text: | |
486 element.text = element.text + " " + text | |
487 return element | |
488 | |
489 PI = ProcessingInstruction | |
490 | |
491 | |
492 class QName: | |
493 """Qualified name wrapper. | |
494 | |
495 This class can be used to wrap a QName attribute value in order to get | |
496 proper namespace handing on output. | |
497 | |
498 *text_or_uri* is a string containing the QName value either in the form | |
499 {uri}local, or if the tag argument is given, the URI part of a QName. | |
500 | |
501 *tag* is an optional argument which if given, will make the first | |
502 argument (text_or_uri) be interpreted as a URI, and this argument (tag) | |
503 be interpreted as a local name. | |
504 | |
505 """ | |
506 def __init__(self, text_or_uri, tag=None): | |
507 if tag: | |
508 text_or_uri = "{%s}%s" % (text_or_uri, tag) | |
509 self.text = text_or_uri | |
510 def __str__(self): | |
511 return self.text | |
512 def __repr__(self): | |
513 return '<%s %r>' % (self.__class__.__name__, self.text) | |
514 def __hash__(self): | |
515 return hash(self.text) | |
516 def __le__(self, other): | |
517 if isinstance(other, QName): | |
518 return self.text <= other.text | |
519 return self.text <= other | |
520 def __lt__(self, other): | |
521 if isinstance(other, QName): | |
522 return self.text < other.text | |
523 return self.text < other | |
524 def __ge__(self, other): | |
525 if isinstance(other, QName): | |
526 return self.text >= other.text | |
527 return self.text >= other | |
528 def __gt__(self, other): | |
529 if isinstance(other, QName): | |
530 return self.text > other.text | |
531 return self.text > other | |
532 def __eq__(self, other): | |
533 if isinstance(other, QName): | |
534 return self.text == other.text | |
535 return self.text == other | |
536 | |
537 # -------------------------------------------------------------------- | |
538 | |
539 | |
540 class ElementTree: | |
541 """An XML element hierarchy. | |
542 | |
543 This class also provides support for serialization to and from | |
544 standard XML. | |
545 | |
546 *element* is an optional root element node, | |
547 *file* is an optional file handle or file name of an XML file whose | |
548 contents will be used to initialize the tree with. | |
549 | |
550 """ | |
551 def __init__(self, element=None, file=None): | |
552 # assert element is None or iselement(element) | |
553 self._root = element # first node | |
554 if file: | |
555 self.parse(file) | |
556 | |
557 def getroot(self): | |
558 """Return root element of this tree.""" | |
559 return self._root | |
560 | |
561 def _setroot(self, element): | |
562 """Replace root element of this tree. | |
563 | |
564 This will discard the current contents of the tree and replace it | |
565 with the given element. Use with care! | |
566 | |
567 """ | |
568 # assert iselement(element) | |
569 self._root = element | |
570 | |
571 def parse(self, source, parser=None): | |
572 """Load external XML document into element tree. | |
573 | |
574 *source* is a file name or file object, *parser* is an optional parser | |
575 instance that defaults to XMLParser. | |
576 | |
577 ParseError is raised if the parser fails to parse the document. | |
578 | |
579 Returns the root element of the given source document. | |
580 | |
581 """ | |
582 close_source = False | |
583 if not hasattr(source, "read"): | |
584 source = open(source, "rb") | |
585 close_source = True | |
586 try: | |
587 if parser is None: | |
588 # If no parser was specified, create a default XMLParser | |
589 parser = XMLParser() | |
590 if hasattr(parser, '_parse_whole'): | |
591 # The default XMLParser, when it comes from an accelerator, | |
592 # can define an internal _parse_whole API for efficiency. | |
593 # It can be used to parse the whole source without feeding | |
594 # it with chunks. | |
595 self._root = parser._parse_whole(source) | |
596 return self._root | |
597 while True: | |
598 data = source.read(65536) | |
599 if not data: | |
600 break | |
601 parser.feed(data) | |
602 self._root = parser.close() | |
603 return self._root | |
604 finally: | |
605 if close_source: | |
606 source.close() | |
607 | |
608 def iter(self, tag=None): | |
609 """Create and return tree iterator for the root element. | |
610 | |
611 The iterator loops over all elements in this tree, in document order. | |
612 | |
613 *tag* is a string with the tag name to iterate over | |
614 (default is to return all elements). | |
615 | |
616 """ | |
617 # assert self._root is not None | |
618 return self._root.iter(tag) | |
619 | |
620 # compatibility | |
621 def getiterator(self, tag=None): | |
622 warnings.warn( | |
623 "This method will be removed in future versions. " | |
624 "Use 'tree.iter()' or 'list(tree.iter())' instead.", | |
625 DeprecationWarning, stacklevel=2 | |
626 ) | |
627 return list(self.iter(tag)) | |
628 | |
629 def find(self, path, namespaces=None): | |
630 """Find first matching element by tag name or path. | |
631 | |
632 Same as getroot().find(path), which is Element.find() | |
633 | |
634 *path* is a string having either an element tag or an XPath, | |
635 *namespaces* is an optional mapping from namespace prefix to full name. | |
636 | |
637 Return the first matching element, or None if no element was found. | |
638 | |
639 """ | |
640 # assert self._root is not None | |
641 if path[:1] == "/": | |
642 path = "." + path | |
643 warnings.warn( | |
644 "This search is broken in 1.3 and earlier, and will be " | |
645 "fixed in a future version. If you rely on the current " | |
646 "behaviour, change it to %r" % path, | |
647 FutureWarning, stacklevel=2 | |
648 ) | |
649 return self._root.find(path, namespaces) | |
650 | |
651 def findtext(self, path, default=None, namespaces=None): | |
652 """Find first matching element by tag name or path. | |
653 | |
654 Same as getroot().findtext(path), which is Element.findtext() | |
655 | |
656 *path* is a string having either an element tag or an XPath, | |
657 *namespaces* is an optional mapping from namespace prefix to full name. | |
658 | |
659 Return the first matching element, or None if no element was found. | |
660 | |
661 """ | |
662 # assert self._root is not None | |
663 if path[:1] == "/": | |
664 path = "." + path | |
665 warnings.warn( | |
666 "This search is broken in 1.3 and earlier, and will be " | |
667 "fixed in a future version. If you rely on the current " | |
668 "behaviour, change it to %r" % path, | |
669 FutureWarning, stacklevel=2 | |
670 ) | |
671 return self._root.findtext(path, default, namespaces) | |
672 | |
673 def findall(self, path, namespaces=None): | |
674 """Find all matching subelements by tag name or path. | |
675 | |
676 Same as getroot().findall(path), which is Element.findall(). | |
677 | |
678 *path* is a string having either an element tag or an XPath, | |
679 *namespaces* is an optional mapping from namespace prefix to full name. | |
680 | |
681 Return list containing all matching elements in document order. | |
682 | |
683 """ | |
684 # assert self._root is not None | |
685 if path[:1] == "/": | |
686 path = "." + path | |
687 warnings.warn( | |
688 "This search is broken in 1.3 and earlier, and will be " | |
689 "fixed in a future version. If you rely on the current " | |
690 "behaviour, change it to %r" % path, | |
691 FutureWarning, stacklevel=2 | |
692 ) | |
693 return self._root.findall(path, namespaces) | |
694 | |
695 def iterfind(self, path, namespaces=None): | |
696 """Find all matching subelements by tag name or path. | |
697 | |
698 Same as getroot().iterfind(path), which is element.iterfind() | |
699 | |
700 *path* is a string having either an element tag or an XPath, | |
701 *namespaces* is an optional mapping from namespace prefix to full name. | |
702 | |
703 Return an iterable yielding all matching elements in document order. | |
704 | |
705 """ | |
706 # assert self._root is not None | |
707 if path[:1] == "/": | |
708 path = "." + path | |
709 warnings.warn( | |
710 "This search is broken in 1.3 and earlier, and will be " | |
711 "fixed in a future version. If you rely on the current " | |
712 "behaviour, change it to %r" % path, | |
713 FutureWarning, stacklevel=2 | |
714 ) | |
715 return self._root.iterfind(path, namespaces) | |
716 | |
717 def write(self, file_or_filename, | |
718 encoding=None, | |
719 xml_declaration=None, | |
720 default_namespace=None, | |
721 method=None, *, | |
722 short_empty_elements=True): | |
723 """Write element tree to a file as XML. | |
724 | |
725 Arguments: | |
726 *file_or_filename* -- file name or a file object opened for writing | |
727 | |
728 *encoding* -- the output encoding (default: US-ASCII) | |
729 | |
730 *xml_declaration* -- bool indicating if an XML declaration should be | |
731 added to the output. If None, an XML declaration | |
732 is added if encoding IS NOT either of: | |
733 US-ASCII, UTF-8, or Unicode | |
734 | |
735 *default_namespace* -- sets the default XML namespace (for "xmlns") | |
736 | |
737 *method* -- either "xml" (default), "html, "text", or "c14n" | |
738 | |
739 *short_empty_elements* -- controls the formatting of elements | |
740 that contain no content. If True (default) | |
741 they are emitted as a single self-closed | |
742 tag, otherwise they are emitted as a pair | |
743 of start/end tags | |
744 | |
745 """ | |
746 if not method: | |
747 method = "xml" | |
748 elif method not in _serialize: | |
749 raise ValueError("unknown method %r" % method) | |
750 if not encoding: | |
751 if method == "c14n": | |
752 encoding = "utf-8" | |
753 else: | |
754 encoding = "us-ascii" | |
755 enc_lower = encoding.lower() | |
756 with _get_writer(file_or_filename, enc_lower) as write: | |
757 if method == "xml" and (xml_declaration or | |
758 (xml_declaration is None and | |
759 enc_lower not in ("utf-8", "us-ascii", "unicode"))): | |
760 declared_encoding = encoding | |
761 if enc_lower == "unicode": | |
762 # Retrieve the default encoding for the xml declaration | |
763 import locale | |
764 declared_encoding = locale.getpreferredencoding() | |
765 write("<?xml version='1.0' encoding='%s'?>\n" % ( | |
766 declared_encoding,)) | |
767 if method == "text": | |
768 _serialize_text(write, self._root) | |
769 else: | |
770 qnames, namespaces = _namespaces(self._root, default_namespace) | |
771 serialize = _serialize[method] | |
772 serialize(write, self._root, qnames, namespaces, | |
773 short_empty_elements=short_empty_elements) | |
774 | |
775 def write_c14n(self, file): | |
776 # lxml.etree compatibility. use output method instead | |
777 return self.write(file, method="c14n") | |
778 | |
779 # -------------------------------------------------------------------- | |
780 # serialization support | |
781 | |
782 @contextlib.contextmanager | |
783 def _get_writer(file_or_filename, encoding): | |
784 # returns text write method and release all resources after using | |
785 try: | |
786 write = file_or_filename.write | |
787 except AttributeError: | |
788 # file_or_filename is a file name | |
789 if encoding == "unicode": | |
790 file = open(file_or_filename, "w") | |
791 else: | |
792 file = open(file_or_filename, "w", encoding=encoding, | |
793 errors="xmlcharrefreplace") | |
794 with file: | |
795 yield file.write | |
796 else: | |
797 # file_or_filename is a file-like object | |
798 # encoding determines if it is a text or binary writer | |
799 if encoding == "unicode": | |
800 # use a text writer as is | |
801 yield write | |
802 else: | |
803 # wrap a binary writer with TextIOWrapper | |
804 with contextlib.ExitStack() as stack: | |
805 if isinstance(file_or_filename, io.BufferedIOBase): | |
806 file = file_or_filename | |
807 elif isinstance(file_or_filename, io.RawIOBase): | |
808 file = io.BufferedWriter(file_or_filename) | |
809 # Keep the original file open when the BufferedWriter is | |
810 # destroyed | |
811 stack.callback(file.detach) | |
812 else: | |
813 # This is to handle passed objects that aren't in the | |
814 # IOBase hierarchy, but just have a write method | |
815 file = io.BufferedIOBase() | |
816 file.writable = lambda: True | |
817 file.write = write | |
818 try: | |
819 # TextIOWrapper uses this methods to determine | |
820 # if BOM (for UTF-16, etc) should be added | |
821 file.seekable = file_or_filename.seekable | |
822 file.tell = file_or_filename.tell | |
823 except AttributeError: | |
824 pass | |
825 file = io.TextIOWrapper(file, | |
826 encoding=encoding, | |
827 errors="xmlcharrefreplace", | |
828 newline="\n") | |
829 # Keep the original file open when the TextIOWrapper is | |
830 # destroyed | |
831 stack.callback(file.detach) | |
832 yield file.write | |
833 | |
834 def _namespaces(elem, default_namespace=None): | |
835 # identify namespaces used in this tree | |
836 | |
837 # maps qnames to *encoded* prefix:local names | |
838 qnames = {None: None} | |
839 | |
840 # maps uri:s to prefixes | |
841 namespaces = {} | |
842 if default_namespace: | |
843 namespaces[default_namespace] = "" | |
844 | |
845 def add_qname(qname): | |
846 # calculate serialized qname representation | |
847 try: | |
848 if qname[:1] == "{": | |
849 uri, tag = qname[1:].rsplit("}", 1) | |
850 prefix = namespaces.get(uri) | |
851 if prefix is None: | |
852 prefix = _namespace_map.get(uri) | |
853 if prefix is None: | |
854 prefix = "ns%d" % len(namespaces) | |
855 if prefix != "xml": | |
856 namespaces[uri] = prefix | |
857 if prefix: | |
858 qnames[qname] = "%s:%s" % (prefix, tag) | |
859 else: | |
860 qnames[qname] = tag # default element | |
861 else: | |
862 if default_namespace: | |
863 # FIXME: can this be handled in XML 1.0? | |
864 raise ValueError( | |
865 "cannot use non-qualified names with " | |
866 "default_namespace option" | |
867 ) | |
868 qnames[qname] = qname | |
869 except TypeError: | |
870 _raise_serialization_error(qname) | |
871 | |
872 # populate qname and namespaces table | |
873 for elem in elem.iter(): | |
874 tag = elem.tag | |
875 if isinstance(tag, QName): | |
876 if tag.text not in qnames: | |
877 add_qname(tag.text) | |
878 elif isinstance(tag, str): | |
879 if tag not in qnames: | |
880 add_qname(tag) | |
881 elif tag is not None and tag is not Comment and tag is not PI: | |
882 _raise_serialization_error(tag) | |
883 for key, value in elem.items(): | |
884 if isinstance(key, QName): | |
885 key = key.text | |
886 if key not in qnames: | |
887 add_qname(key) | |
888 if isinstance(value, QName) and value.text not in qnames: | |
889 add_qname(value.text) | |
890 text = elem.text | |
891 if isinstance(text, QName) and text.text not in qnames: | |
892 add_qname(text.text) | |
893 return qnames, namespaces | |
894 | |
895 def _serialize_xml(write, elem, qnames, namespaces, | |
896 short_empty_elements, **kwargs): | |
897 tag = elem.tag | |
898 text = elem.text | |
899 if tag is Comment: | |
900 write("<!--%s-->" % text) | |
901 elif tag is ProcessingInstruction: | |
902 write("<?%s?>" % text) | |
903 else: | |
904 tag = qnames[tag] | |
905 if tag is None: | |
906 if text: | |
907 write(_escape_cdata(text)) | |
908 for e in elem: | |
909 _serialize_xml(write, e, qnames, None, | |
910 short_empty_elements=short_empty_elements) | |
911 else: | |
912 write("<" + tag) | |
913 items = list(elem.items()) | |
914 if items or namespaces: | |
915 if namespaces: | |
916 for v, k in sorted(namespaces.items(), | |
917 key=lambda x: x[1]): # sort on prefix | |
918 if k: | |
919 k = ":" + k | |
920 write(" xmlns%s=\"%s\"" % ( | |
921 k, | |
922 _escape_attrib(v) | |
923 )) | |
924 for k, v in items: | |
925 if isinstance(k, QName): | |
926 k = k.text | |
927 if isinstance(v, QName): | |
928 v = qnames[v.text] | |
929 else: | |
930 v = _escape_attrib(v) | |
931 write(" %s=\"%s\"" % (qnames[k], v)) | |
932 if text or len(elem) or not short_empty_elements: | |
933 write(">") | |
934 if text: | |
935 write(_escape_cdata(text)) | |
936 for e in elem: | |
937 _serialize_xml(write, e, qnames, None, | |
938 short_empty_elements=short_empty_elements) | |
939 write("</" + tag + ">") | |
940 else: | |
941 write(" />") | |
942 if elem.tail: | |
943 write(_escape_cdata(elem.tail)) | |
944 | |
945 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", | |
946 "img", "input", "isindex", "link", "meta", "param") | |
947 | |
948 try: | |
949 HTML_EMPTY = set(HTML_EMPTY) | |
950 except NameError: | |
951 pass | |
952 | |
953 def _serialize_html(write, elem, qnames, namespaces, **kwargs): | |
954 tag = elem.tag | |
955 text = elem.text | |
956 if tag is Comment: | |
957 write("<!--%s-->" % _escape_cdata(text)) | |
958 elif tag is ProcessingInstruction: | |
959 write("<?%s?>" % _escape_cdata(text)) | |
960 else: | |
961 tag = qnames[tag] | |
962 if tag is None: | |
963 if text: | |
964 write(_escape_cdata(text)) | |
965 for e in elem: | |
966 _serialize_html(write, e, qnames, None) | |
967 else: | |
968 write("<" + tag) | |
969 items = list(elem.items()) | |
970 if items or namespaces: | |
971 if namespaces: | |
972 for v, k in sorted(namespaces.items(), | |
973 key=lambda x: x[1]): # sort on prefix | |
974 if k: | |
975 k = ":" + k | |
976 write(" xmlns%s=\"%s\"" % ( | |
977 k, | |
978 _escape_attrib(v) | |
979 )) | |
980 for k, v in items: | |
981 if isinstance(k, QName): | |
982 k = k.text | |
983 if isinstance(v, QName): | |
984 v = qnames[v.text] | |
985 else: | |
986 v = _escape_attrib_html(v) | |
987 # FIXME: handle boolean attributes | |
988 write(" %s=\"%s\"" % (qnames[k], v)) | |
989 write(">") | |
990 ltag = tag.lower() | |
991 if text: | |
992 if ltag == "script" or ltag == "style": | |
993 write(text) | |
994 else: | |
995 write(_escape_cdata(text)) | |
996 for e in elem: | |
997 _serialize_html(write, e, qnames, None) | |
998 if ltag not in HTML_EMPTY: | |
999 write("</" + tag + ">") | |
1000 if elem.tail: | |
1001 write(_escape_cdata(elem.tail)) | |
1002 | |
1003 def _serialize_text(write, elem): | |
1004 for part in elem.itertext(): | |
1005 write(part) | |
1006 if elem.tail: | |
1007 write(elem.tail) | |
1008 | |
1009 _serialize = { | |
1010 "xml": _serialize_xml, | |
1011 "html": _serialize_html, | |
1012 "text": _serialize_text, | |
1013 # this optional method is imported at the end of the module | |
1014 # "c14n": _serialize_c14n, | |
1015 } | |
1016 | |
1017 | |
1018 def register_namespace(prefix, uri): | |
1019 """Register a namespace prefix. | |
1020 | |
1021 The registry is global, and any existing mapping for either the | |
1022 given prefix or the namespace URI will be removed. | |
1023 | |
1024 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and | |
1025 attributes in this namespace will be serialized with prefix if possible. | |
1026 | |
1027 ValueError is raised if prefix is reserved or is invalid. | |
1028 | |
1029 """ | |
1030 if re.match(r"ns\d+$", prefix): | |
1031 raise ValueError("Prefix format reserved for internal use") | |
1032 for k, v in list(_namespace_map.items()): | |
1033 if k == uri or v == prefix: | |
1034 del _namespace_map[k] | |
1035 _namespace_map[uri] = prefix | |
1036 | |
1037 _namespace_map = { | |
1038 # "well-known" namespace prefixes | |
1039 "http://www.w3.org/XML/1998/namespace": "xml", | |
1040 "http://www.w3.org/1999/xhtml": "html", | |
1041 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", | |
1042 "http://schemas.xmlsoap.org/wsdl/": "wsdl", | |
1043 # xml schema | |
1044 "http://www.w3.org/2001/XMLSchema": "xs", | |
1045 "http://www.w3.org/2001/XMLSchema-instance": "xsi", | |
1046 # dublin core | |
1047 "http://purl.org/dc/elements/1.1/": "dc", | |
1048 } | |
1049 # For tests and troubleshooting | |
1050 register_namespace._namespace_map = _namespace_map | |
1051 | |
1052 def _raise_serialization_error(text): | |
1053 raise TypeError( | |
1054 "cannot serialize %r (type %s)" % (text, type(text).__name__) | |
1055 ) | |
1056 | |
1057 def _escape_cdata(text): | |
1058 # escape character data | |
1059 try: | |
1060 # it's worth avoiding do-nothing calls for strings that are | |
1061 # shorter than 500 characters, or so. assume that's, by far, | |
1062 # the most common case in most applications. | |
1063 if "&" in text: | |
1064 text = text.replace("&", "&") | |
1065 if "<" in text: | |
1066 text = text.replace("<", "<") | |
1067 if ">" in text: | |
1068 text = text.replace(">", ">") | |
1069 return text | |
1070 except (TypeError, AttributeError): | |
1071 _raise_serialization_error(text) | |
1072 | |
1073 def _escape_attrib(text): | |
1074 # escape attribute value | |
1075 try: | |
1076 if "&" in text: | |
1077 text = text.replace("&", "&") | |
1078 if "<" in text: | |
1079 text = text.replace("<", "<") | |
1080 if ">" in text: | |
1081 text = text.replace(">", ">") | |
1082 if "\"" in text: | |
1083 text = text.replace("\"", """) | |
1084 # The following business with carriage returns is to satisfy | |
1085 # Section 2.11 of the XML specification, stating that | |
1086 # CR or CR LN should be replaced with just LN | |
1087 # http://www.w3.org/TR/REC-xml/#sec-line-ends | |
1088 if "\r\n" in text: | |
1089 text = text.replace("\r\n", "\n") | |
1090 if "\r" in text: | |
1091 text = text.replace("\r", "\n") | |
1092 #The following four lines are issue 17582 | |
1093 if "\n" in text: | |
1094 text = text.replace("\n", " ") | |
1095 if "\t" in text: | |
1096 text = text.replace("\t", "	") | |
1097 return text | |
1098 except (TypeError, AttributeError): | |
1099 _raise_serialization_error(text) | |
1100 | |
1101 def _escape_attrib_html(text): | |
1102 # escape attribute value | |
1103 try: | |
1104 if "&" in text: | |
1105 text = text.replace("&", "&") | |
1106 if ">" in text: | |
1107 text = text.replace(">", ">") | |
1108 if "\"" in text: | |
1109 text = text.replace("\"", """) | |
1110 return text | |
1111 except (TypeError, AttributeError): | |
1112 _raise_serialization_error(text) | |
1113 | |
1114 # -------------------------------------------------------------------- | |
1115 | |
1116 def tostring(element, encoding=None, method=None, *, | |
1117 xml_declaration=None, default_namespace=None, | |
1118 short_empty_elements=True): | |
1119 """Generate string representation of XML element. | |
1120 | |
1121 All subelements are included. If encoding is "unicode", a string | |
1122 is returned. Otherwise a bytestring is returned. | |
1123 | |
1124 *element* is an Element instance, *encoding* is an optional output | |
1125 encoding defaulting to US-ASCII, *method* is an optional output which can | |
1126 be one of "xml" (default), "html", "text" or "c14n", *default_namespace* | |
1127 sets the default XML namespace (for "xmlns"). | |
1128 | |
1129 Returns an (optionally) encoded string containing the XML data. | |
1130 | |
1131 """ | |
1132 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() | |
1133 ElementTree(element).write(stream, encoding, | |
1134 xml_declaration=xml_declaration, | |
1135 default_namespace=default_namespace, | |
1136 method=method, | |
1137 short_empty_elements=short_empty_elements) | |
1138 return stream.getvalue() | |
1139 | |
1140 class _ListDataStream(io.BufferedIOBase): | |
1141 """An auxiliary stream accumulating into a list reference.""" | |
1142 def __init__(self, lst): | |
1143 self.lst = lst | |
1144 | |
1145 def writable(self): | |
1146 return True | |
1147 | |
1148 def seekable(self): | |
1149 return True | |
1150 | |
1151 def write(self, b): | |
1152 self.lst.append(b) | |
1153 | |
1154 def tell(self): | |
1155 return len(self.lst) | |
1156 | |
1157 def tostringlist(element, encoding=None, method=None, *, | |
1158 xml_declaration=None, default_namespace=None, | |
1159 short_empty_elements=True): | |
1160 lst = [] | |
1161 stream = _ListDataStream(lst) | |
1162 ElementTree(element).write(stream, encoding, | |
1163 xml_declaration=xml_declaration, | |
1164 default_namespace=default_namespace, | |
1165 method=method, | |
1166 short_empty_elements=short_empty_elements) | |
1167 return lst | |
1168 | |
1169 | |
1170 def dump(elem): | |
1171 """Write element tree or element structure to sys.stdout. | |
1172 | |
1173 This function should be used for debugging only. | |
1174 | |
1175 *elem* is either an ElementTree, or a single Element. The exact output | |
1176 format is implementation dependent. In this version, it's written as an | |
1177 ordinary XML file. | |
1178 | |
1179 """ | |
1180 # debugging | |
1181 if not isinstance(elem, ElementTree): | |
1182 elem = ElementTree(elem) | |
1183 elem.write(sys.stdout, encoding="unicode") | |
1184 tail = elem.getroot().tail | |
1185 if not tail or tail[-1] != "\n": | |
1186 sys.stdout.write("\n") | |
1187 | |
1188 # -------------------------------------------------------------------- | |
1189 # parsing | |
1190 | |
1191 | |
1192 def parse(source, parser=None): | |
1193 """Parse XML document into element tree. | |
1194 | |
1195 *source* is a filename or file object containing XML data, | |
1196 *parser* is an optional parser instance defaulting to XMLParser. | |
1197 | |
1198 Return an ElementTree instance. | |
1199 | |
1200 """ | |
1201 tree = ElementTree() | |
1202 tree.parse(source, parser) | |
1203 return tree | |
1204 | |
1205 | |
1206 def iterparse(source, events=None, parser=None): | |
1207 """Incrementally parse XML document into ElementTree. | |
1208 | |
1209 This class also reports what's going on to the user based on the | |
1210 *events* it is initialized with. The supported events are the strings | |
1211 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get | |
1212 detailed namespace information). If *events* is omitted, only | |
1213 "end" events are reported. | |
1214 | |
1215 *source* is a filename or file object containing XML data, *events* is | |
1216 a list of events to report back, *parser* is an optional parser instance. | |
1217 | |
1218 Returns an iterator providing (event, elem) pairs. | |
1219 | |
1220 """ | |
1221 # Use the internal, undocumented _parser argument for now; When the | |
1222 # parser argument of iterparse is removed, this can be killed. | |
1223 pullparser = XMLPullParser(events=events, _parser=parser) | |
1224 def iterator(): | |
1225 try: | |
1226 while True: | |
1227 yield from pullparser.read_events() | |
1228 # load event buffer | |
1229 data = source.read(16 * 1024) | |
1230 if not data: | |
1231 break | |
1232 pullparser.feed(data) | |
1233 root = pullparser._close_and_return_root() | |
1234 yield from pullparser.read_events() | |
1235 it.root = root | |
1236 finally: | |
1237 if close_source: | |
1238 source.close() | |
1239 | |
1240 class IterParseIterator(collections.abc.Iterator): | |
1241 __next__ = iterator().__next__ | |
1242 it = IterParseIterator() | |
1243 it.root = None | |
1244 del iterator, IterParseIterator | |
1245 | |
1246 close_source = False | |
1247 if not hasattr(source, "read"): | |
1248 source = open(source, "rb") | |
1249 close_source = True | |
1250 | |
1251 return it | |
1252 | |
1253 | |
1254 class XMLPullParser: | |
1255 | |
1256 def __init__(self, events=None, *, _parser=None): | |
1257 # The _parser argument is for internal use only and must not be relied | |
1258 # upon in user code. It will be removed in a future release. | |
1259 # See http://bugs.python.org/issue17741 for more details. | |
1260 | |
1261 self._events_queue = collections.deque() | |
1262 self._parser = _parser or XMLParser(target=TreeBuilder()) | |
1263 # wire up the parser for event reporting | |
1264 if events is None: | |
1265 events = ("end",) | |
1266 self._parser._setevents(self._events_queue, events) | |
1267 | |
1268 def feed(self, data): | |
1269 """Feed encoded data to parser.""" | |
1270 if self._parser is None: | |
1271 raise ValueError("feed() called after end of stream") | |
1272 if data: | |
1273 try: | |
1274 self._parser.feed(data) | |
1275 except SyntaxError as exc: | |
1276 self._events_queue.append(exc) | |
1277 | |
1278 def _close_and_return_root(self): | |
1279 # iterparse needs this to set its root attribute properly :( | |
1280 root = self._parser.close() | |
1281 self._parser = None | |
1282 return root | |
1283 | |
1284 def close(self): | |
1285 """Finish feeding data to parser. | |
1286 | |
1287 Unlike XMLParser, does not return the root element. Use | |
1288 read_events() to consume elements from XMLPullParser. | |
1289 """ | |
1290 self._close_and_return_root() | |
1291 | |
1292 def read_events(self): | |
1293 """Return an iterator over currently available (event, elem) pairs. | |
1294 | |
1295 Events are consumed from the internal event queue as they are | |
1296 retrieved from the iterator. | |
1297 """ | |
1298 events = self._events_queue | |
1299 while events: | |
1300 event = events.popleft() | |
1301 if isinstance(event, Exception): | |
1302 raise event | |
1303 else: | |
1304 yield event | |
1305 | |
1306 | |
1307 def XML(text, parser=None): | |
1308 """Parse XML document from string constant. | |
1309 | |
1310 This function can be used to embed "XML Literals" in Python code. | |
1311 | |
1312 *text* is a string containing XML data, *parser* is an | |
1313 optional parser instance, defaulting to the standard XMLParser. | |
1314 | |
1315 Returns an Element instance. | |
1316 | |
1317 """ | |
1318 if not parser: | |
1319 parser = XMLParser(target=TreeBuilder()) | |
1320 parser.feed(text) | |
1321 return parser.close() | |
1322 | |
1323 | |
1324 def XMLID(text, parser=None): | |
1325 """Parse XML document from string constant for its IDs. | |
1326 | |
1327 *text* is a string containing XML data, *parser* is an | |
1328 optional parser instance, defaulting to the standard XMLParser. | |
1329 | |
1330 Returns an (Element, dict) tuple, in which the | |
1331 dict maps element id:s to elements. | |
1332 | |
1333 """ | |
1334 if not parser: | |
1335 parser = XMLParser(target=TreeBuilder()) | |
1336 parser.feed(text) | |
1337 tree = parser.close() | |
1338 ids = {} | |
1339 for elem in tree.iter(): | |
1340 id = elem.get("id") | |
1341 if id: | |
1342 ids[id] = elem | |
1343 return tree, ids | |
1344 | |
1345 # Parse XML document from string constant. Alias for XML(). | |
1346 fromstring = XML | |
1347 | |
1348 def fromstringlist(sequence, parser=None): | |
1349 """Parse XML document from sequence of string fragments. | |
1350 | |
1351 *sequence* is a list of other sequence, *parser* is an optional parser | |
1352 instance, defaulting to the standard XMLParser. | |
1353 | |
1354 Returns an Element instance. | |
1355 | |
1356 """ | |
1357 if not parser: | |
1358 parser = XMLParser(target=TreeBuilder()) | |
1359 for text in sequence: | |
1360 parser.feed(text) | |
1361 return parser.close() | |
1362 | |
1363 # -------------------------------------------------------------------- | |
1364 | |
1365 | |
1366 class TreeBuilder: | |
1367 """Generic element structure builder. | |
1368 | |
1369 This builder converts a sequence of start, data, and end method | |
1370 calls to a well-formed element structure. | |
1371 | |
1372 You can use this class to build an element structure using a custom XML | |
1373 parser, or a parser for some other XML-like format. | |
1374 | |
1375 *element_factory* is an optional element factory which is called | |
1376 to create new Element instances, as necessary. | |
1377 | |
1378 *comment_factory* is a factory to create comments to be used instead of | |
1379 the standard factory. If *insert_comments* is false (the default), | |
1380 comments will not be inserted into the tree. | |
1381 | |
1382 *pi_factory* is a factory to create processing instructions to be used | |
1383 instead of the standard factory. If *insert_pis* is false (the default), | |
1384 processing instructions will not be inserted into the tree. | |
1385 """ | |
1386 def __init__(self, element_factory=None, *, | |
1387 comment_factory=None, pi_factory=None, | |
1388 insert_comments=False, insert_pis=False): | |
1389 self._data = [] # data collector | |
1390 self._elem = [] # element stack | |
1391 self._last = None # last element | |
1392 self._root = None # root element | |
1393 self._tail = None # true if we're after an end tag | |
1394 if comment_factory is None: | |
1395 comment_factory = Comment | |
1396 self._comment_factory = comment_factory | |
1397 self.insert_comments = insert_comments | |
1398 if pi_factory is None: | |
1399 pi_factory = ProcessingInstruction | |
1400 self._pi_factory = pi_factory | |
1401 self.insert_pis = insert_pis | |
1402 if element_factory is None: | |
1403 element_factory = Element | |
1404 self._factory = element_factory | |
1405 | |
1406 def close(self): | |
1407 """Flush builder buffers and return toplevel document Element.""" | |
1408 assert len(self._elem) == 0, "missing end tags" | |
1409 assert self._root is not None, "missing toplevel element" | |
1410 return self._root | |
1411 | |
1412 def _flush(self): | |
1413 if self._data: | |
1414 if self._last is not None: | |
1415 text = "".join(self._data) | |
1416 if self._tail: | |
1417 assert self._last.tail is None, "internal error (tail)" | |
1418 self._last.tail = text | |
1419 else: | |
1420 assert self._last.text is None, "internal error (text)" | |
1421 self._last.text = text | |
1422 self._data = [] | |
1423 | |
1424 def data(self, data): | |
1425 """Add text to current element.""" | |
1426 self._data.append(data) | |
1427 | |
1428 def start(self, tag, attrs): | |
1429 """Open new element and return it. | |
1430 | |
1431 *tag* is the element name, *attrs* is a dict containing element | |
1432 attributes. | |
1433 | |
1434 """ | |
1435 self._flush() | |
1436 self._last = elem = self._factory(tag, attrs) | |
1437 if self._elem: | |
1438 self._elem[-1].append(elem) | |
1439 elif self._root is None: | |
1440 self._root = elem | |
1441 self._elem.append(elem) | |
1442 self._tail = 0 | |
1443 return elem | |
1444 | |
1445 def end(self, tag): | |
1446 """Close and return current Element. | |
1447 | |
1448 *tag* is the element name. | |
1449 | |
1450 """ | |
1451 self._flush() | |
1452 self._last = self._elem.pop() | |
1453 assert self._last.tag == tag,\ | |
1454 "end tag mismatch (expected %s, got %s)" % ( | |
1455 self._last.tag, tag) | |
1456 self._tail = 1 | |
1457 return self._last | |
1458 | |
1459 def comment(self, text): | |
1460 """Create a comment using the comment_factory. | |
1461 | |
1462 *text* is the text of the comment. | |
1463 """ | |
1464 return self._handle_single( | |
1465 self._comment_factory, self.insert_comments, text) | |
1466 | |
1467 def pi(self, target, text=None): | |
1468 """Create a processing instruction using the pi_factory. | |
1469 | |
1470 *target* is the target name of the processing instruction. | |
1471 *text* is the data of the processing instruction, or ''. | |
1472 """ | |
1473 return self._handle_single( | |
1474 self._pi_factory, self.insert_pis, target, text) | |
1475 | |
1476 def _handle_single(self, factory, insert, *args): | |
1477 elem = factory(*args) | |
1478 if insert: | |
1479 self._flush() | |
1480 self._last = elem | |
1481 if self._elem: | |
1482 self._elem[-1].append(elem) | |
1483 self._tail = 1 | |
1484 return elem | |
1485 | |
1486 | |
1487 # also see ElementTree and TreeBuilder | |
1488 class XMLParser: | |
1489 """Element structure builder for XML source data based on the expat parser. | |
1490 | |
1491 *target* is an optional target object which defaults to an instance of the | |
1492 standard TreeBuilder class, *encoding* is an optional encoding string | |
1493 which if given, overrides the encoding specified in the XML file: | |
1494 http://www.iana.org/assignments/character-sets | |
1495 | |
1496 """ | |
1497 | |
1498 def __init__(self, *, target=None, encoding=None): | |
1499 try: | |
1500 from xml.parsers import expat | |
1501 except ImportError: | |
1502 try: | |
1503 import pyexpat as expat | |
1504 except ImportError: | |
1505 raise ImportError( | |
1506 "No module named expat; use SimpleXMLTreeBuilder instead" | |
1507 ) | |
1508 parser = expat.ParserCreate(encoding, "}") | |
1509 if target is None: | |
1510 target = TreeBuilder() | |
1511 # underscored names are provided for compatibility only | |
1512 self.parser = self._parser = parser | |
1513 self.target = self._target = target | |
1514 self._error = expat.error | |
1515 self._names = {} # name memo cache | |
1516 # main callbacks | |
1517 parser.DefaultHandlerExpand = self._default | |
1518 if hasattr(target, 'start'): | |
1519 parser.StartElementHandler = self._start | |
1520 if hasattr(target, 'end'): | |
1521 parser.EndElementHandler = self._end | |
1522 if hasattr(target, 'start_ns'): | |
1523 parser.StartNamespaceDeclHandler = self._start_ns | |
1524 if hasattr(target, 'end_ns'): | |
1525 parser.EndNamespaceDeclHandler = self._end_ns | |
1526 if hasattr(target, 'data'): | |
1527 parser.CharacterDataHandler = target.data | |
1528 # miscellaneous callbacks | |
1529 if hasattr(target, 'comment'): | |
1530 parser.CommentHandler = target.comment | |
1531 if hasattr(target, 'pi'): | |
1532 parser.ProcessingInstructionHandler = target.pi | |
1533 # Configure pyexpat: buffering, new-style attribute handling. | |
1534 parser.buffer_text = 1 | |
1535 parser.ordered_attributes = 1 | |
1536 parser.specified_attributes = 1 | |
1537 self._doctype = None | |
1538 self.entity = {} | |
1539 try: | |
1540 self.version = "Expat %d.%d.%d" % expat.version_info | |
1541 except AttributeError: | |
1542 pass # unknown | |
1543 | |
1544 def _setevents(self, events_queue, events_to_report): | |
1545 # Internal API for XMLPullParser | |
1546 # events_to_report: a list of events to report during parsing (same as | |
1547 # the *events* of XMLPullParser's constructor. | |
1548 # events_queue: a list of actual parsing events that will be populated | |
1549 # by the underlying parser. | |
1550 # | |
1551 parser = self._parser | |
1552 append = events_queue.append | |
1553 for event_name in events_to_report: | |
1554 if event_name == "start": | |
1555 parser.ordered_attributes = 1 | |
1556 parser.specified_attributes = 1 | |
1557 def handler(tag, attrib_in, event=event_name, append=append, | |
1558 start=self._start): | |
1559 append((event, start(tag, attrib_in))) | |
1560 parser.StartElementHandler = handler | |
1561 elif event_name == "end": | |
1562 def handler(tag, event=event_name, append=append, | |
1563 end=self._end): | |
1564 append((event, end(tag))) | |
1565 parser.EndElementHandler = handler | |
1566 elif event_name == "start-ns": | |
1567 # TreeBuilder does not implement .start_ns() | |
1568 if hasattr(self.target, "start_ns"): | |
1569 def handler(prefix, uri, event=event_name, append=append, | |
1570 start_ns=self._start_ns): | |
1571 append((event, start_ns(prefix, uri))) | |
1572 else: | |
1573 def handler(prefix, uri, event=event_name, append=append): | |
1574 append((event, (prefix or '', uri or ''))) | |
1575 parser.StartNamespaceDeclHandler = handler | |
1576 elif event_name == "end-ns": | |
1577 # TreeBuilder does not implement .end_ns() | |
1578 if hasattr(self.target, "end_ns"): | |
1579 def handler(prefix, event=event_name, append=append, | |
1580 end_ns=self._end_ns): | |
1581 append((event, end_ns(prefix))) | |
1582 else: | |
1583 def handler(prefix, event=event_name, append=append): | |
1584 append((event, None)) | |
1585 parser.EndNamespaceDeclHandler = handler | |
1586 elif event_name == 'comment': | |
1587 def handler(text, event=event_name, append=append, self=self): | |
1588 append((event, self.target.comment(text))) | |
1589 parser.CommentHandler = handler | |
1590 elif event_name == 'pi': | |
1591 def handler(pi_target, data, event=event_name, append=append, | |
1592 self=self): | |
1593 append((event, self.target.pi(pi_target, data))) | |
1594 parser.ProcessingInstructionHandler = handler | |
1595 else: | |
1596 raise ValueError("unknown event %r" % event_name) | |
1597 | |
1598 def _raiseerror(self, value): | |
1599 err = ParseError(value) | |
1600 err.code = value.code | |
1601 err.position = value.lineno, value.offset | |
1602 raise err | |
1603 | |
1604 def _fixname(self, key): | |
1605 # expand qname, and convert name string to ascii, if possible | |
1606 try: | |
1607 name = self._names[key] | |
1608 except KeyError: | |
1609 name = key | |
1610 if "}" in name: | |
1611 name = "{" + name | |
1612 self._names[key] = name | |
1613 return name | |
1614 | |
1615 def _start_ns(self, prefix, uri): | |
1616 return self.target.start_ns(prefix or '', uri or '') | |
1617 | |
1618 def _end_ns(self, prefix): | |
1619 return self.target.end_ns(prefix or '') | |
1620 | |
1621 def _start(self, tag, attr_list): | |
1622 # Handler for expat's StartElementHandler. Since ordered_attributes | |
1623 # is set, the attributes are reported as a list of alternating | |
1624 # attribute name,value. | |
1625 fixname = self._fixname | |
1626 tag = fixname(tag) | |
1627 attrib = {} | |
1628 if attr_list: | |
1629 for i in range(0, len(attr_list), 2): | |
1630 attrib[fixname(attr_list[i])] = attr_list[i+1] | |
1631 return self.target.start(tag, attrib) | |
1632 | |
1633 def _end(self, tag): | |
1634 return self.target.end(self._fixname(tag)) | |
1635 | |
1636 def _default(self, text): | |
1637 prefix = text[:1] | |
1638 if prefix == "&": | |
1639 # deal with undefined entities | |
1640 try: | |
1641 data_handler = self.target.data | |
1642 except AttributeError: | |
1643 return | |
1644 try: | |
1645 data_handler(self.entity[text[1:-1]]) | |
1646 except KeyError: | |
1647 from xml.parsers import expat | |
1648 err = expat.error( | |
1649 "undefined entity %s: line %d, column %d" % | |
1650 (text, self.parser.ErrorLineNumber, | |
1651 self.parser.ErrorColumnNumber) | |
1652 ) | |
1653 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY | |
1654 err.lineno = self.parser.ErrorLineNumber | |
1655 err.offset = self.parser.ErrorColumnNumber | |
1656 raise err | |
1657 elif prefix == "<" and text[:9] == "<!DOCTYPE": | |
1658 self._doctype = [] # inside a doctype declaration | |
1659 elif self._doctype is not None: | |
1660 # parse doctype contents | |
1661 if prefix == ">": | |
1662 self._doctype = None | |
1663 return | |
1664 text = text.strip() | |
1665 if not text: | |
1666 return | |
1667 self._doctype.append(text) | |
1668 n = len(self._doctype) | |
1669 if n > 2: | |
1670 type = self._doctype[1] | |
1671 if type == "PUBLIC" and n == 4: | |
1672 name, type, pubid, system = self._doctype | |
1673 if pubid: | |
1674 pubid = pubid[1:-1] | |
1675 elif type == "SYSTEM" and n == 3: | |
1676 name, type, system = self._doctype | |
1677 pubid = None | |
1678 else: | |
1679 return | |
1680 if hasattr(self.target, "doctype"): | |
1681 self.target.doctype(name, pubid, system[1:-1]) | |
1682 elif hasattr(self, "doctype"): | |
1683 warnings.warn( | |
1684 "The doctype() method of XMLParser is ignored. " | |
1685 "Define doctype() method on the TreeBuilder target.", | |
1686 RuntimeWarning) | |
1687 | |
1688 self._doctype = None | |
1689 | |
1690 def feed(self, data): | |
1691 """Feed encoded data to parser.""" | |
1692 try: | |
1693 self.parser.Parse(data, 0) | |
1694 except self._error as v: | |
1695 self._raiseerror(v) | |
1696 | |
1697 def close(self): | |
1698 """Finish feeding data to parser and return element structure.""" | |
1699 try: | |
1700 self.parser.Parse("", 1) # end of data | |
1701 except self._error as v: | |
1702 self._raiseerror(v) | |
1703 try: | |
1704 close_handler = self.target.close | |
1705 except AttributeError: | |
1706 pass | |
1707 else: | |
1708 return close_handler() | |
1709 finally: | |
1710 # get rid of circular references | |
1711 del self.parser, self._parser | |
1712 del self.target, self._target | |
1713 | |
1714 | |
1715 # -------------------------------------------------------------------- | |
1716 # C14N 2.0 | |
1717 | |
1718 def canonicalize(xml_data=None, *, out=None, from_file=None, **options): | |
1719 """Convert XML to its C14N 2.0 serialised form. | |
1720 | |
1721 If *out* is provided, it must be a file or file-like object that receives | |
1722 the serialised canonical XML output (text, not bytes) through its ``.write()`` | |
1723 method. To write to a file, open it in text mode with encoding "utf-8". | |
1724 If *out* is not provided, this function returns the output as text string. | |
1725 | |
1726 Either *xml_data* (an XML string) or *from_file* (a file path or | |
1727 file-like object) must be provided as input. | |
1728 | |
1729 The configuration options are the same as for the ``C14NWriterTarget``. | |
1730 """ | |
1731 if xml_data is None and from_file is None: | |
1732 raise ValueError("Either 'xml_data' or 'from_file' must be provided as input") | |
1733 sio = None | |
1734 if out is None: | |
1735 sio = out = io.StringIO() | |
1736 | |
1737 parser = XMLParser(target=C14NWriterTarget(out.write, **options)) | |
1738 | |
1739 if xml_data is not None: | |
1740 parser.feed(xml_data) | |
1741 parser.close() | |
1742 elif from_file is not None: | |
1743 parse(from_file, parser=parser) | |
1744 | |
1745 return sio.getvalue() if sio is not None else None | |
1746 | |
1747 | |
1748 _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match | |
1749 | |
1750 | |
1751 class C14NWriterTarget: | |
1752 """ | |
1753 Canonicalization writer target for the XMLParser. | |
1754 | |
1755 Serialises parse events to XML C14N 2.0. | |
1756 | |
1757 The *write* function is used for writing out the resulting data stream | |
1758 as text (not bytes). To write to a file, open it in text mode with encoding | |
1759 "utf-8" and pass its ``.write`` method. | |
1760 | |
1761 Configuration options: | |
1762 | |
1763 - *with_comments*: set to true to include comments | |
1764 - *strip_text*: set to true to strip whitespace before and after text content | |
1765 - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}" | |
1766 - *qname_aware_tags*: a set of qname aware tag names in which prefixes | |
1767 should be replaced in text content | |
1768 - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes | |
1769 should be replaced in text content | |
1770 - *exclude_attrs*: a set of attribute names that should not be serialised | |
1771 - *exclude_tags*: a set of tag names that should not be serialised | |
1772 """ | |
1773 def __init__(self, write, *, | |
1774 with_comments=False, strip_text=False, rewrite_prefixes=False, | |
1775 qname_aware_tags=None, qname_aware_attrs=None, | |
1776 exclude_attrs=None, exclude_tags=None): | |
1777 self._write = write | |
1778 self._data = [] | |
1779 self._with_comments = with_comments | |
1780 self._strip_text = strip_text | |
1781 self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None | |
1782 self._exclude_tags = set(exclude_tags) if exclude_tags else None | |
1783 | |
1784 self._rewrite_prefixes = rewrite_prefixes | |
1785 if qname_aware_tags: | |
1786 self._qname_aware_tags = set(qname_aware_tags) | |
1787 else: | |
1788 self._qname_aware_tags = None | |
1789 if qname_aware_attrs: | |
1790 self._find_qname_aware_attrs = set(qname_aware_attrs).intersection | |
1791 else: | |
1792 self._find_qname_aware_attrs = None | |
1793 | |
1794 # Stack with globally and newly declared namespaces as (uri, prefix) pairs. | |
1795 self._declared_ns_stack = [[ | |
1796 ("http://www.w3.org/XML/1998/namespace", "xml"), | |
1797 ]] | |
1798 # Stack with user declared namespace prefixes as (uri, prefix) pairs. | |
1799 self._ns_stack = [] | |
1800 if not rewrite_prefixes: | |
1801 self._ns_stack.append(list(_namespace_map.items())) | |
1802 self._ns_stack.append([]) | |
1803 self._prefix_map = {} | |
1804 self._preserve_space = [False] | |
1805 self._pending_start = None | |
1806 self._root_seen = False | |
1807 self._root_done = False | |
1808 self._ignored_depth = 0 | |
1809 | |
1810 def _iter_namespaces(self, ns_stack, _reversed=reversed): | |
1811 for namespaces in _reversed(ns_stack): | |
1812 if namespaces: # almost no element declares new namespaces | |
1813 yield from namespaces | |
1814 | |
1815 def _resolve_prefix_name(self, prefixed_name): | |
1816 prefix, name = prefixed_name.split(':', 1) | |
1817 for uri, p in self._iter_namespaces(self._ns_stack): | |
1818 if p == prefix: | |
1819 return f'{{{uri}}}{name}' | |
1820 raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope') | |
1821 | |
1822 def _qname(self, qname, uri=None): | |
1823 if uri is None: | |
1824 uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname) | |
1825 else: | |
1826 tag = qname | |
1827 | |
1828 prefixes_seen = set() | |
1829 for u, prefix in self._iter_namespaces(self._declared_ns_stack): | |
1830 if u == uri and prefix not in prefixes_seen: | |
1831 return f'{prefix}:{tag}' if prefix else tag, tag, uri | |
1832 prefixes_seen.add(prefix) | |
1833 | |
1834 # Not declared yet => add new declaration. | |
1835 if self._rewrite_prefixes: | |
1836 if uri in self._prefix_map: | |
1837 prefix = self._prefix_map[uri] | |
1838 else: | |
1839 prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}' | |
1840 self._declared_ns_stack[-1].append((uri, prefix)) | |
1841 return f'{prefix}:{tag}', tag, uri | |
1842 | |
1843 if not uri and '' not in prefixes_seen: | |
1844 # No default namespace declared => no prefix needed. | |
1845 return tag, tag, uri | |
1846 | |
1847 for u, prefix in self._iter_namespaces(self._ns_stack): | |
1848 if u == uri: | |
1849 self._declared_ns_stack[-1].append((uri, prefix)) | |
1850 return f'{prefix}:{tag}' if prefix else tag, tag, uri | |
1851 | |
1852 raise ValueError(f'Namespace "{uri}" is not declared in scope') | |
1853 | |
1854 def data(self, data): | |
1855 if not self._ignored_depth: | |
1856 self._data.append(data) | |
1857 | |
1858 def _flush(self, _join_text=''.join): | |
1859 data = _join_text(self._data) | |
1860 del self._data[:] | |
1861 if self._strip_text and not self._preserve_space[-1]: | |
1862 data = data.strip() | |
1863 if self._pending_start is not None: | |
1864 args, self._pending_start = self._pending_start, None | |
1865 qname_text = data if data and _looks_like_prefix_name(data) else None | |
1866 self._start(*args, qname_text) | |
1867 if qname_text is not None: | |
1868 return | |
1869 if data and self._root_seen: | |
1870 self._write(_escape_cdata_c14n(data)) | |
1871 | |
1872 def start_ns(self, prefix, uri): | |
1873 if self._ignored_depth: | |
1874 return | |
1875 # we may have to resolve qnames in text content | |
1876 if self._data: | |
1877 self._flush() | |
1878 self._ns_stack[-1].append((uri, prefix)) | |
1879 | |
1880 def start(self, tag, attrs): | |
1881 if self._exclude_tags is not None and ( | |
1882 self._ignored_depth or tag in self._exclude_tags): | |
1883 self._ignored_depth += 1 | |
1884 return | |
1885 if self._data: | |
1886 self._flush() | |
1887 | |
1888 new_namespaces = [] | |
1889 self._declared_ns_stack.append(new_namespaces) | |
1890 | |
1891 if self._qname_aware_tags is not None and tag in self._qname_aware_tags: | |
1892 # Need to parse text first to see if it requires a prefix declaration. | |
1893 self._pending_start = (tag, attrs, new_namespaces) | |
1894 return | |
1895 self._start(tag, attrs, new_namespaces) | |
1896 | |
1897 def _start(self, tag, attrs, new_namespaces, qname_text=None): | |
1898 if self._exclude_attrs is not None and attrs: | |
1899 attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs} | |
1900 | |
1901 qnames = {tag, *attrs} | |
1902 resolved_names = {} | |
1903 | |
1904 # Resolve prefixes in attribute and tag text. | |
1905 if qname_text is not None: | |
1906 qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text) | |
1907 qnames.add(qname) | |
1908 if self._find_qname_aware_attrs is not None and attrs: | |
1909 qattrs = self._find_qname_aware_attrs(attrs) | |
1910 if qattrs: | |
1911 for attr_name in qattrs: | |
1912 value = attrs[attr_name] | |
1913 if _looks_like_prefix_name(value): | |
1914 qname = resolved_names[value] = self._resolve_prefix_name(value) | |
1915 qnames.add(qname) | |
1916 else: | |
1917 qattrs = None | |
1918 else: | |
1919 qattrs = None | |
1920 | |
1921 # Assign prefixes in lexicographical order of used URIs. | |
1922 parse_qname = self._qname | |
1923 parsed_qnames = {n: parse_qname(n) for n in sorted( | |
1924 qnames, key=lambda n: n.split('}', 1))} | |
1925 | |
1926 # Write namespace declarations in prefix order ... | |
1927 if new_namespaces: | |
1928 attr_list = [ | |
1929 ('xmlns:' + prefix if prefix else 'xmlns', uri) | |
1930 for uri, prefix in new_namespaces | |
1931 ] | |
1932 attr_list.sort() | |
1933 else: | |
1934 # almost always empty | |
1935 attr_list = [] | |
1936 | |
1937 # ... followed by attributes in URI+name order | |
1938 if attrs: | |
1939 for k, v in sorted(attrs.items()): | |
1940 if qattrs is not None and k in qattrs and v in resolved_names: | |
1941 v = parsed_qnames[resolved_names[v]][0] | |
1942 attr_qname, attr_name, uri = parsed_qnames[k] | |
1943 # No prefix for attributes in default ('') namespace. | |
1944 attr_list.append((attr_qname if uri else attr_name, v)) | |
1945 | |
1946 # Honour xml:space attributes. | |
1947 space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space') | |
1948 self._preserve_space.append( | |
1949 space_behaviour == 'preserve' if space_behaviour | |
1950 else self._preserve_space[-1]) | |
1951 | |
1952 # Write the tag. | |
1953 write = self._write | |
1954 write('<' + parsed_qnames[tag][0]) | |
1955 if attr_list: | |
1956 write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list])) | |
1957 write('>') | |
1958 | |
1959 # Write the resolved qname text content. | |
1960 if qname_text is not None: | |
1961 write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0])) | |
1962 | |
1963 self._root_seen = True | |
1964 self._ns_stack.append([]) | |
1965 | |
1966 def end(self, tag): | |
1967 if self._ignored_depth: | |
1968 self._ignored_depth -= 1 | |
1969 return | |
1970 if self._data: | |
1971 self._flush() | |
1972 self._write(f'</{self._qname(tag)[0]}>') | |
1973 self._preserve_space.pop() | |
1974 self._root_done = len(self._preserve_space) == 1 | |
1975 self._declared_ns_stack.pop() | |
1976 self._ns_stack.pop() | |
1977 | |
1978 def comment(self, text): | |
1979 if not self._with_comments: | |
1980 return | |
1981 if self._ignored_depth: | |
1982 return | |
1983 if self._root_done: | |
1984 self._write('\n') | |
1985 elif self._root_seen and self._data: | |
1986 self._flush() | |
1987 self._write(f'<!--{_escape_cdata_c14n(text)}-->') | |
1988 if not self._root_seen: | |
1989 self._write('\n') | |
1990 | |
1991 def pi(self, target, data): | |
1992 if self._ignored_depth: | |
1993 return | |
1994 if self._root_done: | |
1995 self._write('\n') | |
1996 elif self._root_seen and self._data: | |
1997 self._flush() | |
1998 self._write( | |
1999 f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>') | |
2000 if not self._root_seen: | |
2001 self._write('\n') | |
2002 | |
2003 | |
2004 def _escape_cdata_c14n(text): | |
2005 # escape character data | |
2006 try: | |
2007 # it's worth avoiding do-nothing calls for strings that are | |
2008 # shorter than 500 character, or so. assume that's, by far, | |
2009 # the most common case in most applications. | |
2010 if '&' in text: | |
2011 text = text.replace('&', '&') | |
2012 if '<' in text: | |
2013 text = text.replace('<', '<') | |
2014 if '>' in text: | |
2015 text = text.replace('>', '>') | |
2016 if '\r' in text: | |
2017 text = text.replace('\r', '
') | |
2018 return text | |
2019 except (TypeError, AttributeError): | |
2020 _raise_serialization_error(text) | |
2021 | |
2022 | |
2023 def _escape_attrib_c14n(text): | |
2024 # escape attribute value | |
2025 try: | |
2026 if '&' in text: | |
2027 text = text.replace('&', '&') | |
2028 if '<' in text: | |
2029 text = text.replace('<', '<') | |
2030 if '"' in text: | |
2031 text = text.replace('"', '"') | |
2032 if '\t' in text: | |
2033 text = text.replace('\t', '	') | |
2034 if '\n' in text: | |
2035 text = text.replace('\n', '
') | |
2036 if '\r' in text: | |
2037 text = text.replace('\r', '
') | |
2038 return text | |
2039 except (TypeError, AttributeError): | |
2040 _raise_serialization_error(text) | |
2041 | |
2042 | |
2043 # -------------------------------------------------------------------- | |
2044 | |
2045 # Import the C accelerators | |
2046 try: | |
2047 # Element is going to be shadowed by the C implementation. We need to keep | |
2048 # the Python version of it accessible for some "creative" by external code | |
2049 # (see tests) | |
2050 _Element_Py = Element | |
2051 | |
2052 # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories | |
2053 from _elementtree import * | |
2054 from _elementtree import _set_factories | |
2055 except ImportError: | |
2056 pass | |
2057 else: | |
2058 _set_factories(Comment, ProcessingInstruction) |