csp2: CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/etree/ElementTree.py annotate

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/etree/ElementTree.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d

author	jpayne
date	Tue, 18 Mar 2025 16:23:26 -0400
parents
children

rev	line source
jpayne@68	1 """Lightweight XML support for Python.
jpayne@68	2
jpayne@68	3 XML is an inherently hierarchical data format, and the most natural way to
jpayne@68	4 represent it is with a tree. This module has two classes for this purpose:
jpayne@68	5
jpayne@68	6 1. ElementTree represents the whole XML document as a tree and
jpayne@68	7
jpayne@68	8 2. Element represents a single node in this tree.
jpayne@68	9
jpayne@68	10 Interactions with the whole document (reading and writing to/from files) are
jpayne@68	11 usually done on the ElementTree level. Interactions with a single XML element
jpayne@68	12 and its sub-elements are done on the Element level.
jpayne@68	13
jpayne@68	14 Element is a flexible container object designed to store hierarchical data
jpayne@68	15 structures in memory. It can be described as a cross between a list and a
jpayne@68	16 dictionary. Each Element has a number of properties associated with it:
jpayne@68	17
jpayne@68	18 'tag' - a string containing the element's name.
jpayne@68	19
jpayne@68	20 'attributes' - a Python dictionary storing the element's attributes.
jpayne@68	21
jpayne@68	22 'text' - a string containing the element's text content.
jpayne@68	23
jpayne@68	24 'tail' - an optional string containing text after the element's end tag.
jpayne@68	25
jpayne@68	26 And a number of child elements stored in a Python sequence.
jpayne@68	27
jpayne@68	28 To create an element instance, use the Element constructor,
jpayne@68	29 or the SubElement factory function.
jpayne@68	30
jpayne@68	31 You can also use the ElementTree class to wrap an element structure
jpayne@68	32 and convert it to and from XML.
jpayne@68	33
jpayne@68	34 """
jpayne@68	35
jpayne@68	36 #---------------------------------------------------------------------
jpayne@68	37 # Licensed to PSF under a Contributor Agreement.
jpayne@68	38 # See http://www.python.org/psf/license for licensing details.
jpayne@68	39 #
jpayne@68	40 # ElementTree
jpayne@68	41 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
jpayne@68	42 #
jpayne@68	43 # fredrik@pythonware.com
jpayne@68	44 # http://www.pythonware.com
jpayne@68	45 # --------------------------------------------------------------------
jpayne@68	46 # The ElementTree toolkit is
jpayne@68	47 #
jpayne@68	48 # Copyright (c) 1999-2008 by Fredrik Lundh
jpayne@68	49 #
jpayne@68	50 # By obtaining, using, and/or copying this software and/or its
jpayne@68	51 # associated documentation, you agree that you have read, understood,
jpayne@68	52 # and will comply with the following terms and conditions:
jpayne@68	53 #
jpayne@68	54 # Permission to use, copy, modify, and distribute this software and
jpayne@68	55 # its associated documentation for any purpose and without fee is
jpayne@68	56 # hereby granted, provided that the above copyright notice appears in
jpayne@68	57 # all copies, and that both that copyright notice and this permission
jpayne@68	58 # notice appear in supporting documentation, and that the name of
jpayne@68	59 # Secret Labs AB or the author not be used in advertising or publicity
jpayne@68	60 # pertaining to distribution of the software without specific, written
jpayne@68	61 # prior permission.
jpayne@68	62 #
jpayne@68	63 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
jpayne@68	64 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
jpayne@68	65 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
jpayne@68	66 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
jpayne@68	67 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
jpayne@68	68 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
jpayne@68	69 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
jpayne@68	70 # OF THIS SOFTWARE.
jpayne@68	71 # --------------------------------------------------------------------
jpayne@68	72
jpayne@68	73 __all__ = [
jpayne@68	74 # public symbols
jpayne@68	75 "Comment",
jpayne@68	76 "dump",
jpayne@68	77 "Element", "ElementTree",
jpayne@68	78 "fromstring", "fromstringlist",
jpayne@68	79 "iselement", "iterparse",
jpayne@68	80 "parse", "ParseError",
jpayne@68	81 "PI", "ProcessingInstruction",
jpayne@68	82 "QName",
jpayne@68	83 "SubElement",
jpayne@68	84 "tostring", "tostringlist",
jpayne@68	85 "TreeBuilder",
jpayne@68	86 "VERSION",
jpayne@68	87 "XML", "XMLID",
jpayne@68	88 "XMLParser", "XMLPullParser",
jpayne@68	89 "register_namespace",
jpayne@68	90 "canonicalize", "C14NWriterTarget",
jpayne@68	91 ]
jpayne@68	92
jpayne@68	93 VERSION = "1.3.0"
jpayne@68	94
jpayne@68	95 import sys
jpayne@68	96 import re
jpayne@68	97 import warnings
jpayne@68	98 import io
jpayne@68	99 import collections
jpayne@68	100 import collections.abc
jpayne@68	101 import contextlib
jpayne@68	102
jpayne@68	103 from . import ElementPath
jpayne@68	104
jpayne@68	105
jpayne@68	106 class ParseError(SyntaxError):
jpayne@68	107 """An error when parsing an XML document.
jpayne@68	108
jpayne@68	109 In addition to its exception value, a ParseError contains
jpayne@68	110 two extra attributes:
jpayne@68	111 'code' - the specific exception code
jpayne@68	112 'position' - the line and column of the error
jpayne@68	113
jpayne@68	114 """
jpayne@68	115 pass
jpayne@68	116
jpayne@68	117 # --------------------------------------------------------------------
jpayne@68	118
jpayne@68	119
jpayne@68	120 def iselement(element):
jpayne@68	121 """Return True if element appears to be an Element."""
jpayne@68	122 return hasattr(element, 'tag')
jpayne@68	123
jpayne@68	124
jpayne@68	125 class Element:
jpayne@68	126 """An XML element.
jpayne@68	127
jpayne@68	128 This class is the reference implementation of the Element interface.
jpayne@68	129
jpayne@68	130 An element's length is its number of subelements. That means if you
jpayne@68	131 want to check if an element is truly empty, you should check BOTH
jpayne@68	132 its length AND its text attribute.
jpayne@68	133
jpayne@68	134 The element tag, attribute names, and attribute values can be either
jpayne@68	135 bytes or strings.
jpayne@68	136
jpayne@68	137 tag is the element name. attrib is an optional dictionary containing
jpayne@68	138 element attributes. extra are additional element attributes given as
jpayne@68	139 keyword arguments.
jpayne@68	140
jpayne@68	141 Example form:
jpayne@68	142 <tag attrib>text<child/>...</tag>tail
jpayne@68	143
jpayne@68	144 """
jpayne@68	145
jpayne@68	146 tag = None
jpayne@68	147 """The element's name."""
jpayne@68	148
jpayne@68	149 attrib = None
jpayne@68	150 """Dictionary of the element's attributes."""
jpayne@68	151
jpayne@68	152 text = None
jpayne@68	153 """
jpayne@68	154 Text before first subelement. This is either a string or the value None.
jpayne@68	155 Note that if there is no text, this attribute may be either
jpayne@68	156 None or the empty string, depending on the parser.
jpayne@68	157
jpayne@68	158 """
jpayne@68	159
jpayne@68	160 tail = None
jpayne@68	161 """
jpayne@68	162 Text after this element's end tag, but before the next sibling element's
jpayne@68	163 start tag. This is either a string or the value None. Note that if there
jpayne@68	164 was no text, this attribute may be either None or an empty string,
jpayne@68	165 depending on the parser.
jpayne@68	166
jpayne@68	167 """
jpayne@68	168
jpayne@68	169 def __init__(self, tag, attrib={}, **extra):
jpayne@68	170 if not isinstance(attrib, dict):
jpayne@68	171 raise TypeError("attrib must be dict, not %s" % (
jpayne@68	172 attrib.__class__.__name__,))
jpayne@68	173 self.tag = tag
jpayne@68	174 self.attrib = {attrib, extra}
jpayne@68	175 self._children = []
jpayne@68	176
jpayne@68	177 def __repr__(self):
jpayne@68	178 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
jpayne@68	179
jpayne@68	180 def makeelement(self, tag, attrib):
jpayne@68	181 """Create a new element with the same type.
jpayne@68	182
jpayne@68	183 tag is a string containing the element name.
jpayne@68	184 attrib is a dictionary containing the element attributes.
jpayne@68	185
jpayne@68	186 Do not call this method, use the SubElement factory function instead.
jpayne@68	187
jpayne@68	188 """
jpayne@68	189 return self.__class__(tag, attrib)
jpayne@68	190
jpayne@68	191 def copy(self):
jpayne@68	192 """Return copy of current element.
jpayne@68	193
jpayne@68	194 This creates a shallow copy. Subelements will be shared with the
jpayne@68	195 original tree.
jpayne@68	196
jpayne@68	197 """
jpayne@68	198 elem = self.makeelement(self.tag, self.attrib)
jpayne@68	199 elem.text = self.text
jpayne@68	200 elem.tail = self.tail
jpayne@68	201 elem[:] = self
jpayne@68	202 return elem
jpayne@68	203
jpayne@68	204 def __len__(self):
jpayne@68	205 return len(self._children)
jpayne@68	206
jpayne@68	207 def __bool__(self):
jpayne@68	208 warnings.warn(
jpayne@68	209 "The behavior of this method will change in future versions. "
jpayne@68	210 "Use specific 'len(elem)' or 'elem is not None' test instead.",
jpayne@68	211 FutureWarning, stacklevel=2
jpayne@68	212 )
jpayne@68	213 return len(self._children) != 0 # emulate old behaviour, for now
jpayne@68	214
jpayne@68	215 def __getitem__(self, index):
jpayne@68	216 return self._children[index]
jpayne@68	217
jpayne@68	218 def __setitem__(self, index, element):
jpayne@68	219 if isinstance(index, slice):
jpayne@68	220 for elt in element:
jpayne@68	221 self._assert_is_element(elt)
jpayne@68	222 else:
jpayne@68	223 self._assert_is_element(element)
jpayne@68	224 self._children[index] = element
jpayne@68	225
jpayne@68	226 def __delitem__(self, index):
jpayne@68	227 del self._children[index]
jpayne@68	228
jpayne@68	229 def append(self, subelement):
jpayne@68	230 """Add subelement to the end of this element.
jpayne@68	231
jpayne@68	232 The new element will appear in document order after the last existing
jpayne@68	233 subelement (or directly after the text, if it's the first subelement),
jpayne@68	234 but before the end tag for this element.
jpayne@68	235
jpayne@68	236 """
jpayne@68	237 self._assert_is_element(subelement)
jpayne@68	238 self._children.append(subelement)
jpayne@68	239
jpayne@68	240 def extend(self, elements):
jpayne@68	241 """Append subelements from a sequence.
jpayne@68	242
jpayne@68	243 elements is a sequence with zero or more elements.
jpayne@68	244
jpayne@68	245 """
jpayne@68	246 for element in elements:
jpayne@68	247 self._assert_is_element(element)
jpayne@68	248 self._children.extend(elements)
jpayne@68	249
jpayne@68	250 def insert(self, index, subelement):
jpayne@68	251 """Insert subelement at position index."""
jpayne@68	252 self._assert_is_element(subelement)
jpayne@68	253 self._children.insert(index, subelement)
jpayne@68	254
jpayne@68	255 def _assert_is_element(self, e):
jpayne@68	256 # Need to refer to the actual Python implementation, not the
jpayne@68	257 # shadowing C implementation.
jpayne@68	258 if not isinstance(e, _Element_Py):
jpayne@68	259 raise TypeError('expected an Element, not %s' % type(e).__name__)
jpayne@68	260
jpayne@68	261 def remove(self, subelement):
jpayne@68	262 """Remove matching subelement.
jpayne@68	263
jpayne@68	264 Unlike the find methods, this method compares elements based on
jpayne@68	265 identity, NOT ON tag value or contents. To remove subelements by
jpayne@68	266 other means, the easiest way is to use a list comprehension to
jpayne@68	267 select what elements to keep, and then use slice assignment to update
jpayne@68	268 the parent element.
jpayne@68	269
jpayne@68	270 ValueError is raised if a matching element could not be found.
jpayne@68	271
jpayne@68	272 """
jpayne@68	273 # assert iselement(element)
jpayne@68	274 self._children.remove(subelement)
jpayne@68	275
jpayne@68	276 def getchildren(self):
jpayne@68	277 """(Deprecated) Return all subelements.
jpayne@68	278
jpayne@68	279 Elements are returned in document order.
jpayne@68	280
jpayne@68	281 """
jpayne@68	282 warnings.warn(
jpayne@68	283 "This method will be removed in future versions. "
jpayne@68	284 "Use 'list(elem)' or iteration over elem instead.",
jpayne@68	285 DeprecationWarning, stacklevel=2
jpayne@68	286 )
jpayne@68	287 return self._children
jpayne@68	288
jpayne@68	289 def find(self, path, namespaces=None):
jpayne@68	290 """Find first matching element by tag name or path.
jpayne@68	291
jpayne@68	292 path is a string having either an element tag or an XPath,
jpayne@68	293 namespaces is an optional mapping from namespace prefix to full name.
jpayne@68	294
jpayne@68	295 Return the first matching element, or None if no element was found.
jpayne@68	296
jpayne@68	297 """
jpayne@68	298 return ElementPath.find(self, path, namespaces)
jpayne@68	299
jpayne@68	300 def findtext(self, path, default=None, namespaces=None):
jpayne@68	301 """Find text for first matching element by tag name or path.
jpayne@68	302
jpayne@68	303 path is a string having either an element tag or an XPath,
jpayne@68	304 default is the value to return if the element was not found,
jpayne@68	305 namespaces is an optional mapping from namespace prefix to full name.
jpayne@68	306
jpayne@68	307 Return text content of first matching element, or default value if
jpayne@68	308 none was found. Note that if an element is found having no text
jpayne@68	309 content, the empty string is returned.
jpayne@68	310
jpayne@68	311 """
jpayne@68	312 return ElementPath.findtext(self, path, default, namespaces)
jpayne@68	313
jpayne@68	314 def findall(self, path, namespaces=None):
jpayne@68	315 """Find all matching subelements by tag name or path.
jpayne@68	316
jpayne@68	317 path is a string having either an element tag or an XPath,
jpayne@68	318 namespaces is an optional mapping from namespace prefix to full name.
jpayne@68	319
jpayne@68	320 Returns list containing all matching elements in document order.
jpayne@68	321
jpayne@68	322 """
jpayne@68	323 return ElementPath.findall(self, path, namespaces)
jpayne@68	324
jpayne@68	325 def iterfind(self, path, namespaces=None):
jpayne@68	326 """Find all matching subelements by tag name or path.
jpayne@68	327
jpayne@68	328 path is a string having either an element tag or an XPath,
jpayne@68	329 namespaces is an optional mapping from namespace prefix to full name.
jpayne@68	330
jpayne@68	331 Return an iterable yielding all matching elements in document order.
jpayne@68	332
jpayne@68	333 """
jpayne@68	334 return ElementPath.iterfind(self, path, namespaces)
jpayne@68	335
jpayne@68	336 def clear(self):
jpayne@68	337 """Reset element.
jpayne@68	338
jpayne@68	339 This function removes all subelements, clears all attributes, and sets
jpayne@68	340 the text and tail attributes to None.
jpayne@68	341
jpayne@68	342 """
jpayne@68	343 self.attrib.clear()
jpayne@68	344 self._children = []
jpayne@68	345 self.text = self.tail = None
jpayne@68	346
jpayne@68	347 def get(self, key, default=None):
jpayne@68	348 """Get element attribute.
jpayne@68	349
jpayne@68	350 Equivalent to attrib.get, but some implementations may handle this a
jpayne@68	351 bit more efficiently. key is what attribute to look for, and
jpayne@68	352 default is what to return if the attribute was not found.
jpayne@68	353
jpayne@68	354 Returns a string containing the attribute value, or the default if
jpayne@68	355 attribute was not found.
jpayne@68	356
jpayne@68	357 """
jpayne@68	358 return self.attrib.get(key, default)
jpayne@68	359
jpayne@68	360 def set(self, key, value):
jpayne@68	361 """Set element attribute.
jpayne@68	362
jpayne@68	363 Equivalent to attrib[key] = value, but some implementations may handle
jpayne@68	364 this a bit more efficiently. key is what attribute to set, and
jpayne@68	365 value is the attribute value to set it to.
jpayne@68	366
jpayne@68	367 """
jpayne@68	368 self.attrib[key] = value
jpayne@68	369
jpayne@68	370 def keys(self):
jpayne@68	371 """Get list of attribute names.
jpayne@68	372
jpayne@68	373 Names are returned in an arbitrary order, just like an ordinary
jpayne@68	374 Python dict. Equivalent to attrib.keys()
jpayne@68	375
jpayne@68	376 """
jpayne@68	377 return self.attrib.keys()
jpayne@68	378
jpayne@68	379 def items(self):
jpayne@68	380 """Get element attributes as a sequence.
jpayne@68	381
jpayne@68	382 The attributes are returned in arbitrary order. Equivalent to
jpayne@68	383 attrib.items().
jpayne@68	384
jpayne@68	385 Return a list of (name, value) tuples.
jpayne@68	386
jpayne@68	387 """
jpayne@68	388 return self.attrib.items()
jpayne@68	389
jpayne@68	390 def iter(self, tag=None):
jpayne@68	391 """Create tree iterator.
jpayne@68	392
jpayne@68	393 The iterator loops over the element and all subelements in document
jpayne@68	394 order, returning all elements with a matching tag.
jpayne@68	395
jpayne@68	396 If the tree structure is modified during iteration, new or removed
jpayne@68	397 elements may or may not be included. To get a stable set, use the
jpayne@68	398 list() function on the iterator, and loop over the resulting list.
jpayne@68	399
jpayne@68	400 tag is what tags to look for (default is to return all elements)
jpayne@68	401
jpayne@68	402 Return an iterator containing all the matching elements.
jpayne@68	403
jpayne@68	404 """
jpayne@68	405 if tag == "*":
jpayne@68	406 tag = None
jpayne@68	407 if tag is None or self.tag == tag:
jpayne@68	408 yield self
jpayne@68	409 for e in self._children:
jpayne@68	410 yield from e.iter(tag)
jpayne@68	411
jpayne@68	412 # compatibility
jpayne@68	413 def getiterator(self, tag=None):
jpayne@68	414 warnings.warn(
jpayne@68	415 "This method will be removed in future versions. "
jpayne@68	416 "Use 'elem.iter()' or 'list(elem.iter())' instead.",
jpayne@68	417 DeprecationWarning, stacklevel=2
jpayne@68	418 )
jpayne@68	419 return list(self.iter(tag))
jpayne@68	420
jpayne@68	421 def itertext(self):
jpayne@68	422 """Create text iterator.
jpayne@68	423
jpayne@68	424 The iterator loops over the element and all subelements in document
jpayne@68	425 order, returning all inner text.
jpayne@68	426
jpayne@68	427 """
jpayne@68	428 tag = self.tag
jpayne@68	429 if not isinstance(tag, str) and tag is not None:
jpayne@68	430 return
jpayne@68	431 t = self.text
jpayne@68	432 if t:
jpayne@68	433 yield t
jpayne@68	434 for e in self:
jpayne@68	435 yield from e.itertext()
jpayne@68	436 t = e.tail
jpayne@68	437 if t:
jpayne@68	438 yield t
jpayne@68	439
jpayne@68	440
jpayne@68	441 def SubElement(parent, tag, attrib={}, **extra):
jpayne@68	442 """Subelement factory which creates an element instance, and appends it
jpayne@68	443 to an existing parent.
jpayne@68	444
jpayne@68	445 The element tag, attribute names, and attribute values can be either
jpayne@68	446 bytes or Unicode strings.
jpayne@68	447
jpayne@68	448 parent is the parent element, tag is the subelements name, attrib is
jpayne@68	449 an optional directory containing element attributes, extra are
jpayne@68	450 additional attributes given as keyword arguments.
jpayne@68	451
jpayne@68	452 """
jpayne@68	453 attrib = {attrib, extra}
jpayne@68	454 element = parent.makeelement(tag, attrib)
jpayne@68	455 parent.append(element)
jpayne@68	456 return element
jpayne@68	457
jpayne@68	458
jpayne@68	459 def Comment(text=None):
jpayne@68	460 """Comment element factory.
jpayne@68	461
jpayne@68	462 This function creates a special element which the standard serializer
jpayne@68	463 serializes as an XML comment.
jpayne@68	464
jpayne@68	465 text is a string containing the comment string.
jpayne@68	466
jpayne@68	467 """
jpayne@68	468 element = Element(Comment)
jpayne@68	469 element.text = text
jpayne@68	470 return element
jpayne@68	471
jpayne@68	472
jpayne@68	473 def ProcessingInstruction(target, text=None):
jpayne@68	474 """Processing Instruction element factory.
jpayne@68	475
jpayne@68	476 This function creates a special element which the standard serializer
jpayne@68	477 serializes as an XML comment.
jpayne@68	478
jpayne@68	479 target is a string containing the processing instruction, text is a
jpayne@68	480 string containing the processing instruction contents, if any.
jpayne@68	481
jpayne@68	482 """
jpayne@68	483 element = Element(ProcessingInstruction)
jpayne@68	484 element.text = target
jpayne@68	485 if text:
jpayne@68	486 element.text = element.text + " " + text
jpayne@68	487 return element
jpayne@68	488
jpayne@68	489 PI = ProcessingInstruction
jpayne@68	490
jpayne@68	491
jpayne@68	492 class QName:
jpayne@68	493 """Qualified name wrapper.
jpayne@68	494
jpayne@68	495 This class can be used to wrap a QName attribute value in order to get
jpayne@68	496 proper namespace handing on output.
jpayne@68	497
jpayne@68	498 text_or_uri is a string containing the QName value either in the form
jpayne@68	499 {uri}local, or if the tag argument is given, the URI part of a QName.
jpayne@68	500
jpayne@68	501 tag is an optional argument which if given, will make the first
jpayne@68	502 argument (text_or_uri) be interpreted as a URI, and this argument (tag)
jpayne@68	503 be interpreted as a local name.
jpayne@68	504
jpayne@68	505 """
jpayne@68	506 def __init__(self, text_or_uri, tag=None):
jpayne@68	507 if tag:
jpayne@68	508 text_or_uri = "{%s}%s" % (text_or_uri, tag)
jpayne@68	509 self.text = text_or_uri
jpayne@68	510 def __str__(self):
jpayne@68	511 return self.text
jpayne@68	512 def __repr__(self):
jpayne@68	513 return '<%s %r>' % (self.__class__.__name__, self.text)
jpayne@68	514 def __hash__(self):
jpayne@68	515 return hash(self.text)
jpayne@68	516 def __le__(self, other):
jpayne@68	517 if isinstance(other, QName):
jpayne@68	518 return self.text <= other.text
jpayne@68	519 return self.text <= other
jpayne@68	520 def __lt__(self, other):
jpayne@68	521 if isinstance(other, QName):
jpayne@68	522 return self.text < other.text
jpayne@68	523 return self.text < other
jpayne@68	524 def __ge__(self, other):
jpayne@68	525 if isinstance(other, QName):
jpayne@68	526 return self.text >= other.text
jpayne@68	527 return self.text >= other
jpayne@68	528 def __gt__(self, other):
jpayne@68	529 if isinstance(other, QName):
jpayne@68	530 return self.text > other.text
jpayne@68	531 return self.text > other
jpayne@68	532 def __eq__(self, other):
jpayne@68	533 if isinstance(other, QName):
jpayne@68	534 return self.text == other.text
jpayne@68	535 return self.text == other
jpayne@68	536
jpayne@68	537 # --------------------------------------------------------------------
jpayne@68	538
jpayne@68	539
jpayne@68	540 class ElementTree:
jpayne@68	541 """An XML element hierarchy.
jpayne@68	542
jpayne@68	543 This class also provides support for serialization to and from
jpayne@68	544 standard XML.
jpayne@68	545
jpayne@68	546 element is an optional root element node,
jpayne@68	547 file is an optional file handle or file name of an XML file whose
jpayne@68	548 contents will be used to initialize the tree with.
jpayne@68	549
jpayne@68	550 """
jpayne@68	551 def __init__(self, element=None, file=None):
jpayne@68	552 # assert element is None or iselement(element)
jpayne@68	553 self._root = element # first node
jpayne@68	554 if file:
jpayne@68	555 self.parse(file)
jpayne@68	556
jpayne@68	557 def getroot(self):
jpayne@68	558 """Return root element of this tree."""
jpayne@68	559 return self._root
jpayne@68	560
jpayne@68	561 def _setroot(self, element):
jpayne@68	562 """Replace root element of this tree.
jpayne@68	563
jpayne@68	564 This will discard the current contents of the tree and replace it
jpayne@68	565 with the given element. Use with care!
jpayne@68	566
jpayne@68	567 """
jpayne@68	568 # assert iselement(element)
jpayne@68	569 self._root = element
jpayne@68	570
jpayne@68	571 def parse(self, source, parser=None):
jpayne@68	572 """Load external XML document into element tree.
jpayne@68	573
jpayne@68	574 source is a file name or file object, parser is an optional parser
jpayne@68	575 instance that defaults to XMLParser.
jpayne@68	576
jpayne@68	577 ParseError is raised if the parser fails to parse the document.
jpayne@68	578
jpayne@68	579 Returns the root element of the given source document.
jpayne@68	580
jpayne@68	581 """
jpayne@68	582 close_source = False
jpayne@68	583 if not hasattr(source, "read"):
jpayne@68	584 source = open(source, "rb")
jpayne@68	585 close_source = True
jpayne@68	586 try:
jpayne@68	587 if parser is None:
jpayne@68	588 # If no parser was specified, create a default XMLParser
jpayne@68	589 parser = XMLParser()
jpayne@68	590 if hasattr(parser, '_parse_whole'):
jpayne@68	591 # The default XMLParser, when it comes from an accelerator,
jpayne@68	592 # can define an internal _parse_whole API for efficiency.
jpayne@68	593 # It can be used to parse the whole source without feeding
jpayne@68	594 # it with chunks.
jpayne@68	595 self._root = parser._parse_whole(source)
jpayne@68	596 return self._root
jpayne@68	597 while True:
jpayne@68	598 data = source.read(65536)
jpayne@68	599 if not data:
jpayne@68	600 break
jpayne@68	601 parser.feed(data)
jpayne@68	602 self._root = parser.close()
jpayne@68	603 return self._root
jpayne@68	604 finally:
jpayne@68	605 if close_source:
jpayne@68	606 source.close()
jpayne@68	607
jpayne@68	608 def iter(self, tag=None):
jpayne@68	609 """Create and return tree iterator for the root element.
jpayne@68	610
jpayne@68	611 The iterator loops over all elements in this tree, in document order.
jpayne@68	612
jpayne@68	613 tag is a string with the tag name to iterate over
jpayne@68	614 (default is to return all elements).
jpayne@68	615
jpayne@68	616 """
jpayne@68	617 # assert self._root is not None
jpayne@68	618 return self._root.iter(tag)
jpayne@68	619
jpayne@68	620 # compatibility
jpayne@68	621 def getiterator(self, tag=None):
jpayne@68	622 warnings.warn(
jpayne@68	623 "This method will be removed in future versions. "
jpayne@68	624 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
jpayne@68	625 DeprecationWarning, stacklevel=2
jpayne@68	626 )
jpayne@68	627 return list(self.iter(tag))
jpayne@68	628
jpayne@68	629 def find(self, path, namespaces=None):
jpayne@68	630 """Find first matching element by tag name or path.
jpayne@68	631
jpayne@68	632 Same as getroot().find(path), which is Element.find()
jpayne@68	633
jpayne@68	634 path is a string having either an element tag or an XPath,
jpayne@68	635 namespaces is an optional mapping from namespace prefix to full name.
jpayne@68	636
jpayne@68	637 Return the first matching element, or None if no element was found.
jpayne@68	638
jpayne@68	639 """
jpayne@68	640 # assert self._root is not None
jpayne@68	641 if path[:1] == "/":
jpayne@68	642 path = "." + path
jpayne@68	643 warnings.warn(
jpayne@68	644 "This search is broken in 1.3 and earlier, and will be "
jpayne@68	645 "fixed in a future version. If you rely on the current "
jpayne@68	646 "behaviour, change it to %r" % path,
jpayne@68	647 FutureWarning, stacklevel=2
jpayne@68	648 )
jpayne@68	649 return self._root.find(path, namespaces)
jpayne@68	650
jpayne@68	651 def findtext(self, path, default=None, namespaces=None):
jpayne@68	652 """Find first matching element by tag name or path.
jpayne@68	653
jpayne@68	654 Same as getroot().findtext(path), which is Element.findtext()
jpayne@68	655
jpayne@68	656 path is a string having either an element tag or an XPath,
jpayne@68	657 namespaces is an optional mapping from namespace prefix to full name.
jpayne@68	658
jpayne@68	659 Return the first matching element, or None if no element was found.
jpayne@68	660
jpayne@68	661 """
jpayne@68	662 # assert self._root is not None
jpayne@68	663 if path[:1] == "/":
jpayne@68	664 path = "." + path
jpayne@68	665 warnings.warn(
jpayne@68	666 "This search is broken in 1.3 and earlier, and will be "
jpayne@68	667 "fixed in a future version. If you rely on the current "
jpayne@68	668 "behaviour, change it to %r" % path,
jpayne@68	669 FutureWarning, stacklevel=2
jpayne@68	670 )
jpayne@68	671 return self._root.findtext(path, default, namespaces)
jpayne@68	672
jpayne@68	673 def findall(self, path, namespaces=None):
jpayne@68	674 """Find all matching subelements by tag name or path.
jpayne@68	675
jpayne@68	676 Same as getroot().findall(path), which is Element.findall().
jpayne@68	677
jpayne@68	678 path is a string having either an element tag or an XPath,
jpayne@68	679 namespaces is an optional mapping from namespace prefix to full name.
jpayne@68	680
jpayne@68	681 Return list containing all matching elements in document order.
jpayne@68	682
jpayne@68	683 """
jpayne@68	684 # assert self._root is not None
jpayne@68	685 if path[:1] == "/":
jpayne@68	686 path = "." + path
jpayne@68	687 warnings.warn(
jpayne@68	688 "This search is broken in 1.3 and earlier, and will be "
jpayne@68	689 "fixed in a future version. If you rely on the current "
jpayne@68	690 "behaviour, change it to %r" % path,
jpayne@68	691 FutureWarning, stacklevel=2
jpayne@68	692 )
jpayne@68	693 return self._root.findall(path, namespaces)
jpayne@68	694
jpayne@68	695 def iterfind(self, path, namespaces=None):
jpayne@68	696 """Find all matching subelements by tag name or path.
jpayne@68	697
jpayne@68	698 Same as getroot().iterfind(path), which is element.iterfind()
jpayne@68	699
jpayne@68	700 path is a string having either an element tag or an XPath,
jpayne@68	701 namespaces is an optional mapping from namespace prefix to full name.
jpayne@68	702
jpayne@68	703 Return an iterable yielding all matching elements in document order.
jpayne@68	704
jpayne@68	705 """
jpayne@68	706 # assert self._root is not None
jpayne@68	707 if path[:1] == "/":
jpayne@68	708 path = "." + path
jpayne@68	709 warnings.warn(
jpayne@68	710 "This search is broken in 1.3 and earlier, and will be "
jpayne@68	711 "fixed in a future version. If you rely on the current "
jpayne@68	712 "behaviour, change it to %r" % path,
jpayne@68	713 FutureWarning, stacklevel=2
jpayne@68	714 )
jpayne@68	715 return self._root.iterfind(path, namespaces)
jpayne@68	716
jpayne@68	717 def write(self, file_or_filename,
jpayne@68	718 encoding=None,
jpayne@68	719 xml_declaration=None,
jpayne@68	720 default_namespace=None,
jpayne@68	721 method=None, *,
jpayne@68	722 short_empty_elements=True):
jpayne@68	723 """Write element tree to a file as XML.
jpayne@68	724
jpayne@68	725 Arguments:
jpayne@68	726 file_or_filename -- file name or a file object opened for writing
jpayne@68	727
jpayne@68	728 encoding -- the output encoding (default: US-ASCII)
jpayne@68	729
jpayne@68	730 xml_declaration -- bool indicating if an XML declaration should be
jpayne@68	731 added to the output. If None, an XML declaration
jpayne@68	732 is added if encoding IS NOT either of:
jpayne@68	733 US-ASCII, UTF-8, or Unicode
jpayne@68	734
jpayne@68	735 default_namespace -- sets the default XML namespace (for "xmlns")
jpayne@68	736
jpayne@68	737 method -- either "xml" (default), "html, "text", or "c14n"
jpayne@68	738
jpayne@68	739 short_empty_elements -- controls the formatting of elements
jpayne@68	740 that contain no content. If True (default)
jpayne@68	741 they are emitted as a single self-closed
jpayne@68	742 tag, otherwise they are emitted as a pair
jpayne@68	743 of start/end tags
jpayne@68	744
jpayne@68	745 """
jpayne@68	746 if not method:
jpayne@68	747 method = "xml"
jpayne@68	748 elif method not in _serialize:
jpayne@68	749 raise ValueError("unknown method %r" % method)
jpayne@68	750 if not encoding:
jpayne@68	751 if method == "c14n":
jpayne@68	752 encoding = "utf-8"
jpayne@68	753 else:
jpayne@68	754 encoding = "us-ascii"
jpayne@68	755 enc_lower = encoding.lower()
jpayne@68	756 with _get_writer(file_or_filename, enc_lower) as write:
jpayne@68	757 if method == "xml" and (xml_declaration or
jpayne@68	758 (xml_declaration is None and
jpayne@68	759 enc_lower not in ("utf-8", "us-ascii", "unicode"))):
jpayne@68	760 declared_encoding = encoding
jpayne@68	761 if enc_lower == "unicode":
jpayne@68	762 # Retrieve the default encoding for the xml declaration
jpayne@68	763 import locale
jpayne@68	764 declared_encoding = locale.getpreferredencoding()
jpayne@68	765 write("<?xml version='1.0' encoding='%s'?>\n" % (
jpayne@68	766 declared_encoding,))
jpayne@68	767 if method == "text":
jpayne@68	768 _serialize_text(write, self._root)
jpayne@68	769 else:
jpayne@68	770 qnames, namespaces = _namespaces(self._root, default_namespace)
jpayne@68	771 serialize = _serialize[method]
jpayne@68	772 serialize(write, self._root, qnames, namespaces,
jpayne@68	773 short_empty_elements=short_empty_elements)
jpayne@68	774
jpayne@68	775 def write_c14n(self, file):
jpayne@68	776 # lxml.etree compatibility. use output method instead
jpayne@68	777 return self.write(file, method="c14n")
jpayne@68	778
jpayne@68	779 # --------------------------------------------------------------------
jpayne@68	780 # serialization support
jpayne@68	781
jpayne@68	782 @contextlib.contextmanager
jpayne@68	783 def _get_writer(file_or_filename, encoding):
jpayne@68	784 # returns text write method and release all resources after using
jpayne@68	785 try:
jpayne@68	786 write = file_or_filename.write
jpayne@68	787 except AttributeError:
jpayne@68	788 # file_or_filename is a file name
jpayne@68	789 if encoding == "unicode":
jpayne@68	790 file = open(file_or_filename, "w")
jpayne@68	791 else:
jpayne@68	792 file = open(file_or_filename, "w", encoding=encoding,
jpayne@68	793 errors="xmlcharrefreplace")
jpayne@68	794 with file:
jpayne@68	795 yield file.write
jpayne@68	796 else:
jpayne@68	797 # file_or_filename is a file-like object
jpayne@68	798 # encoding determines if it is a text or binary writer
jpayne@68	799 if encoding == "unicode":
jpayne@68	800 # use a text writer as is
jpayne@68	801 yield write
jpayne@68	802 else:
jpayne@68	803 # wrap a binary writer with TextIOWrapper
jpayne@68	804 with contextlib.ExitStack() as stack:
jpayne@68	805 if isinstance(file_or_filename, io.BufferedIOBase):
jpayne@68	806 file = file_or_filename
jpayne@68	807 elif isinstance(file_or_filename, io.RawIOBase):
jpayne@68	808 file = io.BufferedWriter(file_or_filename)
jpayne@68	809 # Keep the original file open when the BufferedWriter is
jpayne@68	810 # destroyed
jpayne@68	811 stack.callback(file.detach)
jpayne@68	812 else:
jpayne@68	813 # This is to handle passed objects that aren't in the
jpayne@68	814 # IOBase hierarchy, but just have a write method
jpayne@68	815 file = io.BufferedIOBase()
jpayne@68	816 file.writable = lambda: True
jpayne@68	817 file.write = write
jpayne@68	818 try:
jpayne@68	819 # TextIOWrapper uses this methods to determine
jpayne@68	820 # if BOM (for UTF-16, etc) should be added
jpayne@68	821 file.seekable = file_or_filename.seekable
jpayne@68	822 file.tell = file_or_filename.tell
jpayne@68	823 except AttributeError:
jpayne@68	824 pass
jpayne@68	825 file = io.TextIOWrapper(file,
jpayne@68	826 encoding=encoding,
jpayne@68	827 errors="xmlcharrefreplace",
jpayne@68	828 newline="\n")
jpayne@68	829 # Keep the original file open when the TextIOWrapper is
jpayne@68	830 # destroyed
jpayne@68	831 stack.callback(file.detach)
jpayne@68	832 yield file.write
jpayne@68	833
jpayne@68	834 def _namespaces(elem, default_namespace=None):
jpayne@68	835 # identify namespaces used in this tree
jpayne@68	836
jpayne@68	837 # maps qnames to encoded prefix:local names
jpayne@68	838 qnames = {None: None}
jpayne@68	839
jpayne@68	840 # maps uri:s to prefixes
jpayne@68	841 namespaces = {}
jpayne@68	842 if default_namespace:
jpayne@68	843 namespaces[default_namespace] = ""
jpayne@68	844
jpayne@68	845 def add_qname(qname):
jpayne@68	846 # calculate serialized qname representation
jpayne@68	847 try:
jpayne@68	848 if qname[:1] == "{":
jpayne@68	849 uri, tag = qname[1:].rsplit("}", 1)
jpayne@68	850 prefix = namespaces.get(uri)
jpayne@68	851 if prefix is None:
jpayne@68	852 prefix = _namespace_map.get(uri)
jpayne@68	853 if prefix is None:
jpayne@68	854 prefix = "ns%d" % len(namespaces)
jpayne@68	855 if prefix != "xml":
jpayne@68	856 namespaces[uri] = prefix
jpayne@68	857 if prefix:
jpayne@68	858 qnames[qname] = "%s:%s" % (prefix, tag)
jpayne@68	859 else:
jpayne@68	860 qnames[qname] = tag # default element
jpayne@68	861 else:
jpayne@68	862 if default_namespace:
jpayne@68	863 # FIXME: can this be handled in XML 1.0?
jpayne@68	864 raise ValueError(
jpayne@68	865 "cannot use non-qualified names with "
jpayne@68	866 "default_namespace option"
jpayne@68	867 )
jpayne@68	868 qnames[qname] = qname
jpayne@68	869 except TypeError:
jpayne@68	870 _raise_serialization_error(qname)
jpayne@68	871
jpayne@68	872 # populate qname and namespaces table
jpayne@68	873 for elem in elem.iter():
jpayne@68	874 tag = elem.tag
jpayne@68	875 if isinstance(tag, QName):
jpayne@68	876 if tag.text not in qnames:
jpayne@68	877 add_qname(tag.text)
jpayne@68	878 elif isinstance(tag, str):
jpayne@68	879 if tag not in qnames:
jpayne@68	880 add_qname(tag)
jpayne@68	881 elif tag is not None and tag is not Comment and tag is not PI:
jpayne@68	882 _raise_serialization_error(tag)
jpayne@68	883 for key, value in elem.items():
jpayne@68	884 if isinstance(key, QName):
jpayne@68	885 key = key.text
jpayne@68	886 if key not in qnames:
jpayne@68	887 add_qname(key)
jpayne@68	888 if isinstance(value, QName) and value.text not in qnames:
jpayne@68	889 add_qname(value.text)
jpayne@68	890 text = elem.text
jpayne@68	891 if isinstance(text, QName) and text.text not in qnames:
jpayne@68	892 add_qname(text.text)
jpayne@68	893 return qnames, namespaces
jpayne@68	894
jpayne@68	895 def _serialize_xml(write, elem, qnames, namespaces,
jpayne@68	896 short_empty_elements, **kwargs):
jpayne@68	897 tag = elem.tag
jpayne@68	898 text = elem.text
jpayne@68	899 if tag is Comment:
jpayne@68	900 write("<!--%s-->" % text)
jpayne@68	901 elif tag is ProcessingInstruction:
jpayne@68	902 write("<?%s?>" % text)
jpayne@68	903 else:
jpayne@68	904 tag = qnames[tag]
jpayne@68	905 if tag is None:
jpayne@68	906 if text:
jpayne@68	907 write(_escape_cdata(text))
jpayne@68	908 for e in elem:
jpayne@68	909 _serialize_xml(write, e, qnames, None,
jpayne@68	910 short_empty_elements=short_empty_elements)
jpayne@68	911 else:
jpayne@68	912 write("<" + tag)
jpayne@68	913 items = list(elem.items())
jpayne@68	914 if items or namespaces:
jpayne@68	915 if namespaces:
jpayne@68	916 for v, k in sorted(namespaces.items(),
jpayne@68	917 key=lambda x: x[1]): # sort on prefix
jpayne@68	918 if k:
jpayne@68	919 k = ":" + k
jpayne@68	920 write(" xmlns%s=\"%s\"" % (
jpayne@68	921 k,
jpayne@68	922 _escape_attrib(v)
jpayne@68	923 ))
jpayne@68	924 for k, v in items:
jpayne@68	925 if isinstance(k, QName):
jpayne@68	926 k = k.text
jpayne@68	927 if isinstance(v, QName):
jpayne@68	928 v = qnames[v.text]
jpayne@68	929 else:
jpayne@68	930 v = _escape_attrib(v)
jpayne@68	931 write(" %s=\"%s\"" % (qnames[k], v))
jpayne@68	932 if text or len(elem) or not short_empty_elements:
jpayne@68	933 write(">")
jpayne@68	934 if text:
jpayne@68	935 write(_escape_cdata(text))
jpayne@68	936 for e in elem:
jpayne@68	937 _serialize_xml(write, e, qnames, None,
jpayne@68	938 short_empty_elements=short_empty_elements)
jpayne@68	939 write("</" + tag + ">")
jpayne@68	940 else:
jpayne@68	941 write(" />")
jpayne@68	942 if elem.tail:
jpayne@68	943 write(_escape_cdata(elem.tail))
jpayne@68	944
jpayne@68	945 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
jpayne@68	946 "img", "input", "isindex", "link", "meta", "param")
jpayne@68	947
jpayne@68	948 try:
jpayne@68	949 HTML_EMPTY = set(HTML_EMPTY)
jpayne@68	950 except NameError:
jpayne@68	951 pass
jpayne@68	952
jpayne@68	953 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
jpayne@68	954 tag = elem.tag
jpayne@68	955 text = elem.text
jpayne@68	956 if tag is Comment:
jpayne@68	957 write("<!--%s-->" % _escape_cdata(text))
jpayne@68	958 elif tag is ProcessingInstruction:
jpayne@68	959 write("<?%s?>" % _escape_cdata(text))
jpayne@68	960 else:
jpayne@68	961 tag = qnames[tag]
jpayne@68	962 if tag is None:
jpayne@68	963 if text:
jpayne@68	964 write(_escape_cdata(text))
jpayne@68	965 for e in elem:
jpayne@68	966 _serialize_html(write, e, qnames, None)
jpayne@68	967 else:
jpayne@68	968 write("<" + tag)
jpayne@68	969 items = list(elem.items())
jpayne@68	970 if items or namespaces:
jpayne@68	971 if namespaces:
jpayne@68	972 for v, k in sorted(namespaces.items(),
jpayne@68	973 key=lambda x: x[1]): # sort on prefix
jpayne@68	974 if k:
jpayne@68	975 k = ":" + k
jpayne@68	976 write(" xmlns%s=\"%s\"" % (
jpayne@68	977 k,
jpayne@68	978 _escape_attrib(v)
jpayne@68	979 ))
jpayne@68	980 for k, v in items:
jpayne@68	981 if isinstance(k, QName):
jpayne@68	982 k = k.text
jpayne@68	983 if isinstance(v, QName):
jpayne@68	984 v = qnames[v.text]
jpayne@68	985 else:
jpayne@68	986 v = _escape_attrib_html(v)
jpayne@68	987 # FIXME: handle boolean attributes
jpayne@68	988 write(" %s=\"%s\"" % (qnames[k], v))
jpayne@68	989 write(">")
jpayne@68	990 ltag = tag.lower()
jpayne@68	991 if text:
jpayne@68	992 if ltag == "script" or ltag == "style":
jpayne@68	993 write(text)
jpayne@68	994 else:
jpayne@68	995 write(_escape_cdata(text))
jpayne@68	996 for e in elem:
jpayne@68	997 _serialize_html(write, e, qnames, None)
jpayne@68	998 if ltag not in HTML_EMPTY:
jpayne@68	999 write("</" + tag + ">")
jpayne@68	1000 if elem.tail:
jpayne@68	1001 write(_escape_cdata(elem.tail))
jpayne@68	1002
jpayne@68	1003 def _serialize_text(write, elem):
jpayne@68	1004 for part in elem.itertext():
jpayne@68	1005 write(part)
jpayne@68	1006 if elem.tail:
jpayne@68	1007 write(elem.tail)
jpayne@68	1008
jpayne@68	1009 _serialize = {
jpayne@68	1010 "xml": _serialize_xml,
jpayne@68	1011 "html": _serialize_html,
jpayne@68	1012 "text": _serialize_text,
jpayne@68	1013 # this optional method is imported at the end of the module
jpayne@68	1014 # "c14n": _serialize_c14n,
jpayne@68	1015 }
jpayne@68	1016
jpayne@68	1017
jpayne@68	1018 def register_namespace(prefix, uri):
jpayne@68	1019 """Register a namespace prefix.
jpayne@68	1020
jpayne@68	1021 The registry is global, and any existing mapping for either the
jpayne@68	1022 given prefix or the namespace URI will be removed.
jpayne@68	1023
jpayne@68	1024 prefix is the namespace prefix, uri is a namespace uri. Tags and
jpayne@68	1025 attributes in this namespace will be serialized with prefix if possible.
jpayne@68	1026
jpayne@68	1027 ValueError is raised if prefix is reserved or is invalid.
jpayne@68	1028
jpayne@68	1029 """
jpayne@68	1030 if re.match(r"ns\d+$", prefix):
jpayne@68	1031 raise ValueError("Prefix format reserved for internal use")
jpayne@68	1032 for k, v in list(_namespace_map.items()):
jpayne@68	1033 if k == uri or v == prefix:
jpayne@68	1034 del _namespace_map[k]
jpayne@68	1035 _namespace_map[uri] = prefix
jpayne@68	1036
jpayne@68	1037 _namespace_map = {
jpayne@68	1038 # "well-known" namespace prefixes
jpayne@68	1039 "http://www.w3.org/XML/1998/namespace": "xml",
jpayne@68	1040 "http://www.w3.org/1999/xhtml": "html",
jpayne@68	1041 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
jpayne@68	1042 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
jpayne@68	1043 # xml schema
jpayne@68	1044 "http://www.w3.org/2001/XMLSchema": "xs",
jpayne@68	1045 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
jpayne@68	1046 # dublin core
jpayne@68	1047 "http://purl.org/dc/elements/1.1/": "dc",
jpayne@68	1048 }
jpayne@68	1049 # For tests and troubleshooting
jpayne@68	1050 register_namespace._namespace_map = _namespace_map
jpayne@68	1051
jpayne@68	1052 def _raise_serialization_error(text):
jpayne@68	1053 raise TypeError(
jpayne@68	1054 "cannot serialize %r (type %s)" % (text, type(text).__name__)
jpayne@68	1055 )
jpayne@68	1056
jpayne@68	1057 def _escape_cdata(text):
jpayne@68	1058 # escape character data
jpayne@68	1059 try:
jpayne@68	1060 # it's worth avoiding do-nothing calls for strings that are
jpayne@68	1061 # shorter than 500 characters, or so. assume that's, by far,
jpayne@68	1062 # the most common case in most applications.
jpayne@68	1063 if "&" in text:
jpayne@68	1064 text = text.replace("&", "&")
jpayne@68	1065 if "<" in text:
jpayne@68	1066 text = text.replace("<", "<")
jpayne@68	1067 if ">" in text:
jpayne@68	1068 text = text.replace(">", ">")
jpayne@68	1069 return text
jpayne@68	1070 except (TypeError, AttributeError):
jpayne@68	1071 _raise_serialization_error(text)
jpayne@68	1072
jpayne@68	1073 def _escape_attrib(text):
jpayne@68	1074 # escape attribute value
jpayne@68	1075 try:
jpayne@68	1076 if "&" in text:
jpayne@68	1077 text = text.replace("&", "&")
jpayne@68	1078 if "<" in text:
jpayne@68	1079 text = text.replace("<", "<")
jpayne@68	1080 if ">" in text:
jpayne@68	1081 text = text.replace(">", ">")
jpayne@68	1082 if "\"" in text:
jpayne@68	1083 text = text.replace("\"", """)
jpayne@68	1084 # The following business with carriage returns is to satisfy
jpayne@68	1085 # Section 2.11 of the XML specification, stating that
jpayne@68	1086 # CR or CR LN should be replaced with just LN
jpayne@68	1087 # http://www.w3.org/TR/REC-xml/#sec-line-ends
jpayne@68	1088 if "\r\n" in text:
jpayne@68	1089 text = text.replace("\r\n", "\n")
jpayne@68	1090 if "\r" in text:
jpayne@68	1091 text = text.replace("\r", "\n")
jpayne@68	1092 #The following four lines are issue 17582
jpayne@68	1093 if "\n" in text:
jpayne@68	1094 text = text.replace("\n", " ")
jpayne@68	1095 if "\t" in text:
jpayne@68	1096 text = text.replace("\t", " ")
jpayne@68	1097 return text
jpayne@68	1098 except (TypeError, AttributeError):
jpayne@68	1099 _raise_serialization_error(text)
jpayne@68	1100
jpayne@68	1101 def _escape_attrib_html(text):
jpayne@68	1102 # escape attribute value
jpayne@68	1103 try:
jpayne@68	1104 if "&" in text:
jpayne@68	1105 text = text.replace("&", "&")
jpayne@68	1106 if ">" in text:
jpayne@68	1107 text = text.replace(">", ">")
jpayne@68	1108 if "\"" in text:
jpayne@68	1109 text = text.replace("\"", """)
jpayne@68	1110 return text
jpayne@68	1111 except (TypeError, AttributeError):
jpayne@68	1112 _raise_serialization_error(text)
jpayne@68	1113
jpayne@68	1114 # --------------------------------------------------------------------
jpayne@68	1115
jpayne@68	1116 def tostring(element, encoding=None, method=None, *,
jpayne@68	1117 xml_declaration=None, default_namespace=None,
jpayne@68	1118 short_empty_elements=True):
jpayne@68	1119 """Generate string representation of XML element.
jpayne@68	1120
jpayne@68	1121 All subelements are included. If encoding is "unicode", a string
jpayne@68	1122 is returned. Otherwise a bytestring is returned.
jpayne@68	1123
jpayne@68	1124 element is an Element instance, encoding is an optional output
jpayne@68	1125 encoding defaulting to US-ASCII, method is an optional output which can
jpayne@68	1126 be one of "xml" (default), "html", "text" or "c14n", default_namespace
jpayne@68	1127 sets the default XML namespace (for "xmlns").
jpayne@68	1128
jpayne@68	1129 Returns an (optionally) encoded string containing the XML data.
jpayne@68	1130
jpayne@68	1131 """
jpayne@68	1132 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
jpayne@68	1133 ElementTree(element).write(stream, encoding,
jpayne@68	1134 xml_declaration=xml_declaration,
jpayne@68	1135 default_namespace=default_namespace,
jpayne@68	1136 method=method,
jpayne@68	1137 short_empty_elements=short_empty_elements)
jpayne@68	1138 return stream.getvalue()
jpayne@68	1139
jpayne@68	1140 class _ListDataStream(io.BufferedIOBase):
jpayne@68	1141 """An auxiliary stream accumulating into a list reference."""
jpayne@68	1142 def __init__(self, lst):
jpayne@68	1143 self.lst = lst
jpayne@68	1144
jpayne@68	1145 def writable(self):
jpayne@68	1146 return True
jpayne@68	1147
jpayne@68	1148 def seekable(self):
jpayne@68	1149 return True
jpayne@68	1150
jpayne@68	1151 def write(self, b):
jpayne@68	1152 self.lst.append(b)
jpayne@68	1153
jpayne@68	1154 def tell(self):
jpayne@68	1155 return len(self.lst)
jpayne@68	1156
jpayne@68	1157 def tostringlist(element, encoding=None, method=None, *,
jpayne@68	1158 xml_declaration=None, default_namespace=None,
jpayne@68	1159 short_empty_elements=True):
jpayne@68	1160 lst = []
jpayne@68	1161 stream = _ListDataStream(lst)
jpayne@68	1162 ElementTree(element).write(stream, encoding,
jpayne@68	1163 xml_declaration=xml_declaration,
jpayne@68	1164 default_namespace=default_namespace,
jpayne@68	1165 method=method,
jpayne@68	1166 short_empty_elements=short_empty_elements)
jpayne@68	1167 return lst
jpayne@68	1168
jpayne@68	1169
jpayne@68	1170 def dump(elem):
jpayne@68	1171 """Write element tree or element structure to sys.stdout.
jpayne@68	1172
jpayne@68	1173 This function should be used for debugging only.
jpayne@68	1174
jpayne@68	1175 elem is either an ElementTree, or a single Element. The exact output
jpayne@68	1176 format is implementation dependent. In this version, it's written as an
jpayne@68	1177 ordinary XML file.
jpayne@68	1178
jpayne@68	1179 """
jpayne@68	1180 # debugging
jpayne@68	1181 if not isinstance(elem, ElementTree):
jpayne@68	1182 elem = ElementTree(elem)
jpayne@68	1183 elem.write(sys.stdout, encoding="unicode")
jpayne@68	1184 tail = elem.getroot().tail
jpayne@68	1185 if not tail or tail[-1] != "\n":
jpayne@68	1186 sys.stdout.write("\n")
jpayne@68	1187
jpayne@68	1188 # --------------------------------------------------------------------
jpayne@68	1189 # parsing
jpayne@68	1190
jpayne@68	1191
jpayne@68	1192 def parse(source, parser=None):
jpayne@68	1193 """Parse XML document into element tree.
jpayne@68	1194
jpayne@68	1195 source is a filename or file object containing XML data,
jpayne@68	1196 parser is an optional parser instance defaulting to XMLParser.
jpayne@68	1197
jpayne@68	1198 Return an ElementTree instance.
jpayne@68	1199
jpayne@68	1200 """
jpayne@68	1201 tree = ElementTree()
jpayne@68	1202 tree.parse(source, parser)
jpayne@68	1203 return tree
jpayne@68	1204
jpayne@68	1205
jpayne@68	1206 def iterparse(source, events=None, parser=None):
jpayne@68	1207 """Incrementally parse XML document into ElementTree.
jpayne@68	1208
jpayne@68	1209 This class also reports what's going on to the user based on the
jpayne@68	1210 events it is initialized with. The supported events are the strings
jpayne@68	1211 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
jpayne@68	1212 detailed namespace information). If events is omitted, only
jpayne@68	1213 "end" events are reported.
jpayne@68	1214
jpayne@68	1215 source is a filename or file object containing XML data, events is
jpayne@68	1216 a list of events to report back, parser is an optional parser instance.
jpayne@68	1217
jpayne@68	1218 Returns an iterator providing (event, elem) pairs.
jpayne@68	1219
jpayne@68	1220 """
jpayne@68	1221 # Use the internal, undocumented _parser argument for now; When the
jpayne@68	1222 # parser argument of iterparse is removed, this can be killed.
jpayne@68	1223 pullparser = XMLPullParser(events=events, _parser=parser)
jpayne@68	1224 def iterator():
jpayne@68	1225 try:
jpayne@68	1226 while True:
jpayne@68	1227 yield from pullparser.read_events()
jpayne@68	1228 # load event buffer
jpayne@68	1229 data = source.read(16 * 1024)
jpayne@68	1230 if not data:
jpayne@68	1231 break
jpayne@68	1232 pullparser.feed(data)
jpayne@68	1233 root = pullparser._close_and_return_root()
jpayne@68	1234 yield from pullparser.read_events()
jpayne@68	1235 it.root = root
jpayne@68	1236 finally:
jpayne@68	1237 if close_source:
jpayne@68	1238 source.close()
jpayne@68	1239
jpayne@68	1240 class IterParseIterator(collections.abc.Iterator):
jpayne@68	1241 __next__ = iterator().__next__
jpayne@68	1242 it = IterParseIterator()
jpayne@68	1243 it.root = None
jpayne@68	1244 del iterator, IterParseIterator
jpayne@68	1245
jpayne@68	1246 close_source = False
jpayne@68	1247 if not hasattr(source, "read"):
jpayne@68	1248 source = open(source, "rb")
jpayne@68	1249 close_source = True
jpayne@68	1250
jpayne@68	1251 return it
jpayne@68	1252
jpayne@68	1253
jpayne@68	1254 class XMLPullParser:
jpayne@68	1255
jpayne@68	1256 def __init__(self, events=None, *, _parser=None):
jpayne@68	1257 # The _parser argument is for internal use only and must not be relied
jpayne@68	1258 # upon in user code. It will be removed in a future release.
jpayne@68	1259 # See http://bugs.python.org/issue17741 for more details.
jpayne@68	1260
jpayne@68	1261 self._events_queue = collections.deque()
jpayne@68	1262 self._parser = _parser or XMLParser(target=TreeBuilder())
jpayne@68	1263 # wire up the parser for event reporting
jpayne@68	1264 if events is None:
jpayne@68	1265 events = ("end",)
jpayne@68	1266 self._parser._setevents(self._events_queue, events)
jpayne@68	1267
jpayne@68	1268 def feed(self, data):
jpayne@68	1269 """Feed encoded data to parser."""
jpayne@68	1270 if self._parser is None:
jpayne@68	1271 raise ValueError("feed() called after end of stream")
jpayne@68	1272 if data:
jpayne@68	1273 try:
jpayne@68	1274 self._parser.feed(data)
jpayne@68	1275 except SyntaxError as exc:
jpayne@68	1276 self._events_queue.append(exc)
jpayne@68	1277
jpayne@68	1278 def _close_and_return_root(self):
jpayne@68	1279 # iterparse needs this to set its root attribute properly :(
jpayne@68	1280 root = self._parser.close()
jpayne@68	1281 self._parser = None
jpayne@68	1282 return root
jpayne@68	1283
jpayne@68	1284 def close(self):
jpayne@68	1285 """Finish feeding data to parser.
jpayne@68	1286
jpayne@68	1287 Unlike XMLParser, does not return the root element. Use
jpayne@68	1288 read_events() to consume elements from XMLPullParser.
jpayne@68	1289 """
jpayne@68	1290 self._close_and_return_root()
jpayne@68	1291
jpayne@68	1292 def read_events(self):
jpayne@68	1293 """Return an iterator over currently available (event, elem) pairs.
jpayne@68	1294
jpayne@68	1295 Events are consumed from the internal event queue as they are
jpayne@68	1296 retrieved from the iterator.
jpayne@68	1297 """
jpayne@68	1298 events = self._events_queue
jpayne@68	1299 while events:
jpayne@68	1300 event = events.popleft()
jpayne@68	1301 if isinstance(event, Exception):
jpayne@68	1302 raise event
jpayne@68	1303 else:
jpayne@68	1304 yield event
jpayne@68	1305
jpayne@68	1306
jpayne@68	1307 def XML(text, parser=None):
jpayne@68	1308 """Parse XML document from string constant.
jpayne@68	1309
jpayne@68	1310 This function can be used to embed "XML Literals" in Python code.
jpayne@68	1311
jpayne@68	1312 text is a string containing XML data, parser is an
jpayne@68	1313 optional parser instance, defaulting to the standard XMLParser.
jpayne@68	1314
jpayne@68	1315 Returns an Element instance.
jpayne@68	1316
jpayne@68	1317 """
jpayne@68	1318 if not parser:
jpayne@68	1319 parser = XMLParser(target=TreeBuilder())
jpayne@68	1320 parser.feed(text)
jpayne@68	1321 return parser.close()
jpayne@68	1322
jpayne@68	1323
jpayne@68	1324 def XMLID(text, parser=None):
jpayne@68	1325 """Parse XML document from string constant for its IDs.
jpayne@68	1326
jpayne@68	1327 text is a string containing XML data, parser is an
jpayne@68	1328 optional parser instance, defaulting to the standard XMLParser.
jpayne@68	1329
jpayne@68	1330 Returns an (Element, dict) tuple, in which the
jpayne@68	1331 dict maps element id:s to elements.
jpayne@68	1332
jpayne@68	1333 """
jpayne@68	1334 if not parser:
jpayne@68	1335 parser = XMLParser(target=TreeBuilder())
jpayne@68	1336 parser.feed(text)
jpayne@68	1337 tree = parser.close()
jpayne@68	1338 ids = {}
jpayne@68	1339 for elem in tree.iter():
jpayne@68	1340 id = elem.get("id")
jpayne@68	1341 if id:
jpayne@68	1342 ids[id] = elem
jpayne@68	1343 return tree, ids
jpayne@68	1344
jpayne@68	1345 # Parse XML document from string constant. Alias for XML().
jpayne@68	1346 fromstring = XML
jpayne@68	1347
jpayne@68	1348 def fromstringlist(sequence, parser=None):
jpayne@68	1349 """Parse XML document from sequence of string fragments.
jpayne@68	1350
jpayne@68	1351 sequence is a list of other sequence, parser is an optional parser
jpayne@68	1352 instance, defaulting to the standard XMLParser.
jpayne@68	1353
jpayne@68	1354 Returns an Element instance.
jpayne@68	1355
jpayne@68	1356 """
jpayne@68	1357 if not parser:
jpayne@68	1358 parser = XMLParser(target=TreeBuilder())
jpayne@68	1359 for text in sequence:
jpayne@68	1360 parser.feed(text)
jpayne@68	1361 return parser.close()
jpayne@68	1362
jpayne@68	1363 # --------------------------------------------------------------------
jpayne@68	1364
jpayne@68	1365
jpayne@68	1366 class TreeBuilder:
jpayne@68	1367 """Generic element structure builder.
jpayne@68	1368
jpayne@68	1369 This builder converts a sequence of start, data, and end method
jpayne@68	1370 calls to a well-formed element structure.
jpayne@68	1371
jpayne@68	1372 You can use this class to build an element structure using a custom XML
jpayne@68	1373 parser, or a parser for some other XML-like format.
jpayne@68	1374
jpayne@68	1375 element_factory is an optional element factory which is called
jpayne@68	1376 to create new Element instances, as necessary.
jpayne@68	1377
jpayne@68	1378 comment_factory is a factory to create comments to be used instead of
jpayne@68	1379 the standard factory. If insert_comments is false (the default),
jpayne@68	1380 comments will not be inserted into the tree.
jpayne@68	1381
jpayne@68	1382 pi_factory is a factory to create processing instructions to be used
jpayne@68	1383 instead of the standard factory. If insert_pis is false (the default),
jpayne@68	1384 processing instructions will not be inserted into the tree.
jpayne@68	1385 """
jpayne@68	1386 def __init__(self, element_factory=None, *,
jpayne@68	1387 comment_factory=None, pi_factory=None,
jpayne@68	1388 insert_comments=False, insert_pis=False):
jpayne@68	1389 self._data = [] # data collector
jpayne@68	1390 self._elem = [] # element stack
jpayne@68	1391 self._last = None # last element
jpayne@68	1392 self._root = None # root element
jpayne@68	1393 self._tail = None # true if we're after an end tag
jpayne@68	1394 if comment_factory is None:
jpayne@68	1395 comment_factory = Comment
jpayne@68	1396 self._comment_factory = comment_factory
jpayne@68	1397 self.insert_comments = insert_comments
jpayne@68	1398 if pi_factory is None:
jpayne@68	1399 pi_factory = ProcessingInstruction
jpayne@68	1400 self._pi_factory = pi_factory
jpayne@68	1401 self.insert_pis = insert_pis
jpayne@68	1402 if element_factory is None:
jpayne@68	1403 element_factory = Element
jpayne@68	1404 self._factory = element_factory
jpayne@68	1405
jpayne@68	1406 def close(self):
jpayne@68	1407 """Flush builder buffers and return toplevel document Element."""
jpayne@68	1408 assert len(self._elem) == 0, "missing end tags"
jpayne@68	1409 assert self._root is not None, "missing toplevel element"
jpayne@68	1410 return self._root
jpayne@68	1411
jpayne@68	1412 def _flush(self):
jpayne@68	1413 if self._data:
jpayne@68	1414 if self._last is not None:
jpayne@68	1415 text = "".join(self._data)
jpayne@68	1416 if self._tail:
jpayne@68	1417 assert self._last.tail is None, "internal error (tail)"
jpayne@68	1418 self._last.tail = text
jpayne@68	1419 else:
jpayne@68	1420 assert self._last.text is None, "internal error (text)"
jpayne@68	1421 self._last.text = text
jpayne@68	1422 self._data = []
jpayne@68	1423
jpayne@68	1424 def data(self, data):
jpayne@68	1425 """Add text to current element."""
jpayne@68	1426 self._data.append(data)
jpayne@68	1427
jpayne@68	1428 def start(self, tag, attrs):
jpayne@68	1429 """Open new element and return it.
jpayne@68	1430
jpayne@68	1431 tag is the element name, attrs is a dict containing element
jpayne@68	1432 attributes.
jpayne@68	1433
jpayne@68	1434 """
jpayne@68	1435 self._flush()
jpayne@68	1436 self._last = elem = self._factory(tag, attrs)
jpayne@68	1437 if self._elem:
jpayne@68	1438 self._elem[-1].append(elem)
jpayne@68	1439 elif self._root is None:
jpayne@68	1440 self._root = elem
jpayne@68	1441 self._elem.append(elem)
jpayne@68	1442 self._tail = 0
jpayne@68	1443 return elem
jpayne@68	1444
jpayne@68	1445 def end(self, tag):
jpayne@68	1446 """Close and return current Element.
jpayne@68	1447
jpayne@68	1448 tag is the element name.
jpayne@68	1449
jpayne@68	1450 """
jpayne@68	1451 self._flush()
jpayne@68	1452 self._last = self._elem.pop()
jpayne@68	1453 assert self._last.tag == tag,\
jpayne@68	1454 "end tag mismatch (expected %s, got %s)" % (
jpayne@68	1455 self._last.tag, tag)
jpayne@68	1456 self._tail = 1
jpayne@68	1457 return self._last
jpayne@68	1458
jpayne@68	1459 def comment(self, text):
jpayne@68	1460 """Create a comment using the comment_factory.
jpayne@68	1461
jpayne@68	1462 text is the text of the comment.
jpayne@68	1463 """
jpayne@68	1464 return self._handle_single(
jpayne@68	1465 self._comment_factory, self.insert_comments, text)
jpayne@68	1466
jpayne@68	1467 def pi(self, target, text=None):
jpayne@68	1468 """Create a processing instruction using the pi_factory.
jpayne@68	1469
jpayne@68	1470 target is the target name of the processing instruction.
jpayne@68	1471 text is the data of the processing instruction, or ''.
jpayne@68	1472 """
jpayne@68	1473 return self._handle_single(
jpayne@68	1474 self._pi_factory, self.insert_pis, target, text)
jpayne@68	1475
jpayne@68	1476 def _handle_single(self, factory, insert, *args):
jpayne@68	1477 elem = factory(*args)
jpayne@68	1478 if insert:
jpayne@68	1479 self._flush()
jpayne@68	1480 self._last = elem
jpayne@68	1481 if self._elem:
jpayne@68	1482 self._elem[-1].append(elem)
jpayne@68	1483 self._tail = 1
jpayne@68	1484 return elem
jpayne@68	1485
jpayne@68	1486
jpayne@68	1487 # also see ElementTree and TreeBuilder
jpayne@68	1488 class XMLParser:
jpayne@68	1489 """Element structure builder for XML source data based on the expat parser.
jpayne@68	1490
jpayne@68	1491 target is an optional target object which defaults to an instance of the
jpayne@68	1492 standard TreeBuilder class, encoding is an optional encoding string
jpayne@68	1493 which if given, overrides the encoding specified in the XML file:
jpayne@68	1494 http://www.iana.org/assignments/character-sets
jpayne@68	1495
jpayne@68	1496 """
jpayne@68	1497
jpayne@68	1498 def __init__(self, *, target=None, encoding=None):
jpayne@68	1499 try:
jpayne@68	1500 from xml.parsers import expat
jpayne@68	1501 except ImportError:
jpayne@68	1502 try:
jpayne@68	1503 import pyexpat as expat
jpayne@68	1504 except ImportError:
jpayne@68	1505 raise ImportError(
jpayne@68	1506 "No module named expat; use SimpleXMLTreeBuilder instead"
jpayne@68	1507 )
jpayne@68	1508 parser = expat.ParserCreate(encoding, "}")
jpayne@68	1509 if target is None:
jpayne@68	1510 target = TreeBuilder()
jpayne@68	1511 # underscored names are provided for compatibility only
jpayne@68	1512 self.parser = self._parser = parser
jpayne@68	1513 self.target = self._target = target
jpayne@68	1514 self._error = expat.error
jpayne@68	1515 self._names = {} # name memo cache
jpayne@68	1516 # main callbacks
jpayne@68	1517 parser.DefaultHandlerExpand = self._default
jpayne@68	1518 if hasattr(target, 'start'):
jpayne@68	1519 parser.StartElementHandler = self._start
jpayne@68	1520 if hasattr(target, 'end'):
jpayne@68	1521 parser.EndElementHandler = self._end
jpayne@68	1522 if hasattr(target, 'start_ns'):
jpayne@68	1523 parser.StartNamespaceDeclHandler = self._start_ns
jpayne@68	1524 if hasattr(target, 'end_ns'):
jpayne@68	1525 parser.EndNamespaceDeclHandler = self._end_ns
jpayne@68	1526 if hasattr(target, 'data'):
jpayne@68	1527 parser.CharacterDataHandler = target.data
jpayne@68	1528 # miscellaneous callbacks
jpayne@68	1529 if hasattr(target, 'comment'):
jpayne@68	1530 parser.CommentHandler = target.comment
jpayne@68	1531 if hasattr(target, 'pi'):
jpayne@68	1532 parser.ProcessingInstructionHandler = target.pi
jpayne@68	1533 # Configure pyexpat: buffering, new-style attribute handling.
jpayne@68	1534 parser.buffer_text = 1
jpayne@68	1535 parser.ordered_attributes = 1
jpayne@68	1536 parser.specified_attributes = 1
jpayne@68	1537 self._doctype = None
jpayne@68	1538 self.entity = {}
jpayne@68	1539 try:
jpayne@68	1540 self.version = "Expat %d.%d.%d" % expat.version_info
jpayne@68	1541 except AttributeError:
jpayne@68	1542 pass # unknown
jpayne@68	1543
jpayne@68	1544 def _setevents(self, events_queue, events_to_report):
jpayne@68	1545 # Internal API for XMLPullParser
jpayne@68	1546 # events_to_report: a list of events to report during parsing (same as
jpayne@68	1547 # the events of XMLPullParser's constructor.
jpayne@68	1548 # events_queue: a list of actual parsing events that will be populated
jpayne@68	1549 # by the underlying parser.
jpayne@68	1550 #
jpayne@68	1551 parser = self._parser
jpayne@68	1552 append = events_queue.append
jpayne@68	1553 for event_name in events_to_report:
jpayne@68	1554 if event_name == "start":
jpayne@68	1555 parser.ordered_attributes = 1
jpayne@68	1556 parser.specified_attributes = 1
jpayne@68	1557 def handler(tag, attrib_in, event=event_name, append=append,
jpayne@68	1558 start=self._start):
jpayne@68	1559 append((event, start(tag, attrib_in)))
jpayne@68	1560 parser.StartElementHandler = handler
jpayne@68	1561 elif event_name == "end":
jpayne@68	1562 def handler(tag, event=event_name, append=append,
jpayne@68	1563 end=self._end):
jpayne@68	1564 append((event, end(tag)))
jpayne@68	1565 parser.EndElementHandler = handler
jpayne@68	1566 elif event_name == "start-ns":
jpayne@68	1567 # TreeBuilder does not implement .start_ns()
jpayne@68	1568 if hasattr(self.target, "start_ns"):
jpayne@68	1569 def handler(prefix, uri, event=event_name, append=append,
jpayne@68	1570 start_ns=self._start_ns):
jpayne@68	1571 append((event, start_ns(prefix, uri)))
jpayne@68	1572 else:
jpayne@68	1573 def handler(prefix, uri, event=event_name, append=append):
jpayne@68	1574 append((event, (prefix or '', uri or '')))
jpayne@68	1575 parser.StartNamespaceDeclHandler = handler
jpayne@68	1576 elif event_name == "end-ns":
jpayne@68	1577 # TreeBuilder does not implement .end_ns()
jpayne@68	1578 if hasattr(self.target, "end_ns"):
jpayne@68	1579 def handler(prefix, event=event_name, append=append,
jpayne@68	1580 end_ns=self._end_ns):
jpayne@68	1581 append((event, end_ns(prefix)))
jpayne@68	1582 else:
jpayne@68	1583 def handler(prefix, event=event_name, append=append):
jpayne@68	1584 append((event, None))
jpayne@68	1585 parser.EndNamespaceDeclHandler = handler
jpayne@68	1586 elif event_name == 'comment':
jpayne@68	1587 def handler(text, event=event_name, append=append, self=self):
jpayne@68	1588 append((event, self.target.comment(text)))
jpayne@68	1589 parser.CommentHandler = handler
jpayne@68	1590 elif event_name == 'pi':
jpayne@68	1591 def handler(pi_target, data, event=event_name, append=append,
jpayne@68	1592 self=self):
jpayne@68	1593 append((event, self.target.pi(pi_target, data)))
jpayne@68	1594 parser.ProcessingInstructionHandler = handler
jpayne@68	1595 else:
jpayne@68	1596 raise ValueError("unknown event %r" % event_name)
jpayne@68	1597
jpayne@68	1598 def _raiseerror(self, value):
jpayne@68	1599 err = ParseError(value)
jpayne@68	1600 err.code = value.code
jpayne@68	1601 err.position = value.lineno, value.offset
jpayne@68	1602 raise err
jpayne@68	1603
jpayne@68	1604 def _fixname(self, key):
jpayne@68	1605 # expand qname, and convert name string to ascii, if possible
jpayne@68	1606 try:
jpayne@68	1607 name = self._names[key]
jpayne@68	1608 except KeyError:
jpayne@68	1609 name = key
jpayne@68	1610 if "}" in name:
jpayne@68	1611 name = "{" + name
jpayne@68	1612 self._names[key] = name
jpayne@68	1613 return name
jpayne@68	1614
jpayne@68	1615 def _start_ns(self, prefix, uri):
jpayne@68	1616 return self.target.start_ns(prefix or '', uri or '')
jpayne@68	1617
jpayne@68	1618 def _end_ns(self, prefix):
jpayne@68	1619 return self.target.end_ns(prefix or '')
jpayne@68	1620
jpayne@68	1621 def _start(self, tag, attr_list):
jpayne@68	1622 # Handler for expat's StartElementHandler. Since ordered_attributes
jpayne@68	1623 # is set, the attributes are reported as a list of alternating
jpayne@68	1624 # attribute name,value.
jpayne@68	1625 fixname = self._fixname
jpayne@68	1626 tag = fixname(tag)
jpayne@68	1627 attrib = {}
jpayne@68	1628 if attr_list:
jpayne@68	1629 for i in range(0, len(attr_list), 2):
jpayne@68	1630 attrib[fixname(attr_list[i])] = attr_list[i+1]
jpayne@68	1631 return self.target.start(tag, attrib)
jpayne@68	1632
jpayne@68	1633 def _end(self, tag):
jpayne@68	1634 return self.target.end(self._fixname(tag))
jpayne@68	1635
jpayne@68	1636 def _default(self, text):
jpayne@68	1637 prefix = text[:1]
jpayne@68	1638 if prefix == "&":
jpayne@68	1639 # deal with undefined entities
jpayne@68	1640 try:
jpayne@68	1641 data_handler = self.target.data
jpayne@68	1642 except AttributeError:
jpayne@68	1643 return
jpayne@68	1644 try:
jpayne@68	1645 data_handler(self.entity[text[1:-1]])
jpayne@68	1646 except KeyError:
jpayne@68	1647 from xml.parsers import expat
jpayne@68	1648 err = expat.error(
jpayne@68	1649 "undefined entity %s: line %d, column %d" %
jpayne@68	1650 (text, self.parser.ErrorLineNumber,
jpayne@68	1651 self.parser.ErrorColumnNumber)
jpayne@68	1652 )
jpayne@68	1653 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
jpayne@68	1654 err.lineno = self.parser.ErrorLineNumber
jpayne@68	1655 err.offset = self.parser.ErrorColumnNumber
jpayne@68	1656 raise err
jpayne@68	1657 elif prefix == "<" and text[:9] == "<!DOCTYPE":
jpayne@68	1658 self._doctype = [] # inside a doctype declaration
jpayne@68	1659 elif self._doctype is not None:
jpayne@68	1660 # parse doctype contents
jpayne@68	1661 if prefix == ">":
jpayne@68	1662 self._doctype = None
jpayne@68	1663 return
jpayne@68	1664 text = text.strip()
jpayne@68	1665 if not text:
jpayne@68	1666 return
jpayne@68	1667 self._doctype.append(text)
jpayne@68	1668 n = len(self._doctype)
jpayne@68	1669 if n > 2:
jpayne@68	1670 type = self._doctype[1]
jpayne@68	1671 if type == "PUBLIC" and n == 4:
jpayne@68	1672 name, type, pubid, system = self._doctype
jpayne@68	1673 if pubid:
jpayne@68	1674 pubid = pubid[1:-1]
jpayne@68	1675 elif type == "SYSTEM" and n == 3:
jpayne@68	1676 name, type, system = self._doctype
jpayne@68	1677 pubid = None
jpayne@68	1678 else:
jpayne@68	1679 return
jpayne@68	1680 if hasattr(self.target, "doctype"):
jpayne@68	1681 self.target.doctype(name, pubid, system[1:-1])
jpayne@68	1682 elif hasattr(self, "doctype"):
jpayne@68	1683 warnings.warn(
jpayne@68	1684 "The doctype() method of XMLParser is ignored. "
jpayne@68	1685 "Define doctype() method on the TreeBuilder target.",
jpayne@68	1686 RuntimeWarning)
jpayne@68	1687
jpayne@68	1688 self._doctype = None
jpayne@68	1689
jpayne@68	1690 def feed(self, data):
jpayne@68	1691 """Feed encoded data to parser."""
jpayne@68	1692 try:
jpayne@68	1693 self.parser.Parse(data, 0)
jpayne@68	1694 except self._error as v:
jpayne@68	1695 self._raiseerror(v)
jpayne@68	1696
jpayne@68	1697 def close(self):
jpayne@68	1698 """Finish feeding data to parser and return element structure."""
jpayne@68	1699 try:
jpayne@68	1700 self.parser.Parse("", 1) # end of data
jpayne@68	1701 except self._error as v:
jpayne@68	1702 self._raiseerror(v)
jpayne@68	1703 try:
jpayne@68	1704 close_handler = self.target.close
jpayne@68	1705 except AttributeError:
jpayne@68	1706 pass
jpayne@68	1707 else:
jpayne@68	1708 return close_handler()
jpayne@68	1709 finally:
jpayne@68	1710 # get rid of circular references
jpayne@68	1711 del self.parser, self._parser
jpayne@68	1712 del self.target, self._target
jpayne@68	1713
jpayne@68	1714
jpayne@68	1715 # --------------------------------------------------------------------
jpayne@68	1716 # C14N 2.0
jpayne@68	1717
jpayne@68	1718 def canonicalize(xml_data=None, , out=None, from_file=None, *options):
jpayne@68	1719 """Convert XML to its C14N 2.0 serialised form.
jpayne@68	1720
jpayne@68	1721 If out is provided, it must be a file or file-like object that receives
jpayne@68	1722 the serialised canonical XML output (text, not bytes) through its ``.write()``
jpayne@68	1723 method. To write to a file, open it in text mode with encoding "utf-8".
jpayne@68	1724 If out is not provided, this function returns the output as text string.
jpayne@68	1725
jpayne@68	1726 Either xml_data (an XML string) or from_file (a file path or
jpayne@68	1727 file-like object) must be provided as input.
jpayne@68	1728
jpayne@68	1729 The configuration options are the same as for the ``C14NWriterTarget``.
jpayne@68	1730 """
jpayne@68	1731 if xml_data is None and from_file is None:
jpayne@68	1732 raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
jpayne@68	1733 sio = None
jpayne@68	1734 if out is None:
jpayne@68	1735 sio = out = io.StringIO()
jpayne@68	1736
jpayne@68	1737 parser = XMLParser(target=C14NWriterTarget(out.write, **options))
jpayne@68	1738
jpayne@68	1739 if xml_data is not None:
jpayne@68	1740 parser.feed(xml_data)
jpayne@68	1741 parser.close()
jpayne@68	1742 elif from_file is not None:
jpayne@68	1743 parse(from_file, parser=parser)
jpayne@68	1744
jpayne@68	1745 return sio.getvalue() if sio is not None else None
jpayne@68	1746
jpayne@68	1747
jpayne@68	1748 _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
jpayne@68	1749
jpayne@68	1750
jpayne@68	1751 class C14NWriterTarget:
jpayne@68	1752 """
jpayne@68	1753 Canonicalization writer target for the XMLParser.
jpayne@68	1754
jpayne@68	1755 Serialises parse events to XML C14N 2.0.
jpayne@68	1756
jpayne@68	1757 The write function is used for writing out the resulting data stream
jpayne@68	1758 as text (not bytes). To write to a file, open it in text mode with encoding
jpayne@68	1759 "utf-8" and pass its ``.write`` method.
jpayne@68	1760
jpayne@68	1761 Configuration options:
jpayne@68	1762
jpayne@68	1763 - with_comments: set to true to include comments
jpayne@68	1764 - strip_text: set to true to strip whitespace before and after text content
jpayne@68	1765 - rewrite_prefixes: set to true to replace namespace prefixes by "n{number}"
jpayne@68	1766 - qname_aware_tags: a set of qname aware tag names in which prefixes
jpayne@68	1767 should be replaced in text content
jpayne@68	1768 - qname_aware_attrs: a set of qname aware attribute names in which prefixes
jpayne@68	1769 should be replaced in text content
jpayne@68	1770 - exclude_attrs: a set of attribute names that should not be serialised
jpayne@68	1771 - exclude_tags: a set of tag names that should not be serialised
jpayne@68	1772 """
jpayne@68	1773 def __init__(self, write, *,
jpayne@68	1774 with_comments=False, strip_text=False, rewrite_prefixes=False,
jpayne@68	1775 qname_aware_tags=None, qname_aware_attrs=None,
jpayne@68	1776 exclude_attrs=None, exclude_tags=None):
jpayne@68	1777 self._write = write
jpayne@68	1778 self._data = []
jpayne@68	1779 self._with_comments = with_comments
jpayne@68	1780 self._strip_text = strip_text
jpayne@68	1781 self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
jpayne@68	1782 self._exclude_tags = set(exclude_tags) if exclude_tags else None
jpayne@68	1783
jpayne@68	1784 self._rewrite_prefixes = rewrite_prefixes
jpayne@68	1785 if qname_aware_tags:
jpayne@68	1786 self._qname_aware_tags = set(qname_aware_tags)
jpayne@68	1787 else:
jpayne@68	1788 self._qname_aware_tags = None
jpayne@68	1789 if qname_aware_attrs:
jpayne@68	1790 self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
jpayne@68	1791 else:
jpayne@68	1792 self._find_qname_aware_attrs = None
jpayne@68	1793
jpayne@68	1794 # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
jpayne@68	1795 self._declared_ns_stack = [[
jpayne@68	1796 ("http://www.w3.org/XML/1998/namespace", "xml"),
jpayne@68	1797 ]]
jpayne@68	1798 # Stack with user declared namespace prefixes as (uri, prefix) pairs.
jpayne@68	1799 self._ns_stack = []
jpayne@68	1800 if not rewrite_prefixes:
jpayne@68	1801 self._ns_stack.append(list(_namespace_map.items()))
jpayne@68	1802 self._ns_stack.append([])
jpayne@68	1803 self._prefix_map = {}
jpayne@68	1804 self._preserve_space = [False]
jpayne@68	1805 self._pending_start = None
jpayne@68	1806 self._root_seen = False
jpayne@68	1807 self._root_done = False
jpayne@68	1808 self._ignored_depth = 0
jpayne@68	1809
jpayne@68	1810 def _iter_namespaces(self, ns_stack, _reversed=reversed):
jpayne@68	1811 for namespaces in _reversed(ns_stack):
jpayne@68	1812 if namespaces: # almost no element declares new namespaces
jpayne@68	1813 yield from namespaces
jpayne@68	1814
jpayne@68	1815 def _resolve_prefix_name(self, prefixed_name):
jpayne@68	1816 prefix, name = prefixed_name.split(':', 1)
jpayne@68	1817 for uri, p in self._iter_namespaces(self._ns_stack):
jpayne@68	1818 if p == prefix:
jpayne@68	1819 return f'{{{uri}}}{name}'
jpayne@68	1820 raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
jpayne@68	1821
jpayne@68	1822 def _qname(self, qname, uri=None):
jpayne@68	1823 if uri is None:
jpayne@68	1824 uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
jpayne@68	1825 else:
jpayne@68	1826 tag = qname
jpayne@68	1827
jpayne@68	1828 prefixes_seen = set()
jpayne@68	1829 for u, prefix in self._iter_namespaces(self._declared_ns_stack):
jpayne@68	1830 if u == uri and prefix not in prefixes_seen:
jpayne@68	1831 return f'{prefix}:{tag}' if prefix else tag, tag, uri
jpayne@68	1832 prefixes_seen.add(prefix)
jpayne@68	1833
jpayne@68	1834 # Not declared yet => add new declaration.
jpayne@68	1835 if self._rewrite_prefixes:
jpayne@68	1836 if uri in self._prefix_map:
jpayne@68	1837 prefix = self._prefix_map[uri]
jpayne@68	1838 else:
jpayne@68	1839 prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
jpayne@68	1840 self._declared_ns_stack[-1].append((uri, prefix))
jpayne@68	1841 return f'{prefix}:{tag}', tag, uri
jpayne@68	1842
jpayne@68	1843 if not uri and '' not in prefixes_seen:
jpayne@68	1844 # No default namespace declared => no prefix needed.
jpayne@68	1845 return tag, tag, uri
jpayne@68	1846
jpayne@68	1847 for u, prefix in self._iter_namespaces(self._ns_stack):
jpayne@68	1848 if u == uri:
jpayne@68	1849 self._declared_ns_stack[-1].append((uri, prefix))
jpayne@68	1850 return f'{prefix}:{tag}' if prefix else tag, tag, uri
jpayne@68	1851
jpayne@68	1852 raise ValueError(f'Namespace "{uri}" is not declared in scope')
jpayne@68	1853
jpayne@68	1854 def data(self, data):
jpayne@68	1855 if not self._ignored_depth:
jpayne@68	1856 self._data.append(data)
jpayne@68	1857
jpayne@68	1858 def _flush(self, _join_text=''.join):
jpayne@68	1859 data = _join_text(self._data)
jpayne@68	1860 del self._data[:]
jpayne@68	1861 if self._strip_text and not self._preserve_space[-1]:
jpayne@68	1862 data = data.strip()
jpayne@68	1863 if self._pending_start is not None:
jpayne@68	1864 args, self._pending_start = self._pending_start, None
jpayne@68	1865 qname_text = data if data and _looks_like_prefix_name(data) else None
jpayne@68	1866 self._start(*args, qname_text)
jpayne@68	1867 if qname_text is not None:
jpayne@68	1868 return
jpayne@68	1869 if data and self._root_seen:
jpayne@68	1870 self._write(_escape_cdata_c14n(data))
jpayne@68	1871
jpayne@68	1872 def start_ns(self, prefix, uri):
jpayne@68	1873 if self._ignored_depth:
jpayne@68	1874 return
jpayne@68	1875 # we may have to resolve qnames in text content
jpayne@68	1876 if self._data:
jpayne@68	1877 self._flush()
jpayne@68	1878 self._ns_stack[-1].append((uri, prefix))
jpayne@68	1879
jpayne@68	1880 def start(self, tag, attrs):
jpayne@68	1881 if self._exclude_tags is not None and (
jpayne@68	1882 self._ignored_depth or tag in self._exclude_tags):
jpayne@68	1883 self._ignored_depth += 1
jpayne@68	1884 return
jpayne@68	1885 if self._data:
jpayne@68	1886 self._flush()
jpayne@68	1887
jpayne@68	1888 new_namespaces = []
jpayne@68	1889 self._declared_ns_stack.append(new_namespaces)
jpayne@68	1890
jpayne@68	1891 if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
jpayne@68	1892 # Need to parse text first to see if it requires a prefix declaration.
jpayne@68	1893 self._pending_start = (tag, attrs, new_namespaces)
jpayne@68	1894 return
jpayne@68	1895 self._start(tag, attrs, new_namespaces)
jpayne@68	1896
jpayne@68	1897 def _start(self, tag, attrs, new_namespaces, qname_text=None):
jpayne@68	1898 if self._exclude_attrs is not None and attrs:
jpayne@68	1899 attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
jpayne@68	1900
jpayne@68	1901 qnames = {tag, *attrs}
jpayne@68	1902 resolved_names = {}
jpayne@68	1903
jpayne@68	1904 # Resolve prefixes in attribute and tag text.
jpayne@68	1905 if qname_text is not None:
jpayne@68	1906 qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
jpayne@68	1907 qnames.add(qname)
jpayne@68	1908 if self._find_qname_aware_attrs is not None and attrs:
jpayne@68	1909 qattrs = self._find_qname_aware_attrs(attrs)
jpayne@68	1910 if qattrs:
jpayne@68	1911 for attr_name in qattrs:
jpayne@68	1912 value = attrs[attr_name]
jpayne@68	1913 if _looks_like_prefix_name(value):
jpayne@68	1914 qname = resolved_names[value] = self._resolve_prefix_name(value)
jpayne@68	1915 qnames.add(qname)
jpayne@68	1916 else:
jpayne@68	1917 qattrs = None
jpayne@68	1918 else:
jpayne@68	1919 qattrs = None
jpayne@68	1920
jpayne@68	1921 # Assign prefixes in lexicographical order of used URIs.
jpayne@68	1922 parse_qname = self._qname
jpayne@68	1923 parsed_qnames = {n: parse_qname(n) for n in sorted(
jpayne@68	1924 qnames, key=lambda n: n.split('}', 1))}
jpayne@68	1925
jpayne@68	1926 # Write namespace declarations in prefix order ...
jpayne@68	1927 if new_namespaces:
jpayne@68	1928 attr_list = [
jpayne@68	1929 ('xmlns:' + prefix if prefix else 'xmlns', uri)
jpayne@68	1930 for uri, prefix in new_namespaces
jpayne@68	1931 ]
jpayne@68	1932 attr_list.sort()
jpayne@68	1933 else:
jpayne@68	1934 # almost always empty
jpayne@68	1935 attr_list = []
jpayne@68	1936
jpayne@68	1937 # ... followed by attributes in URI+name order
jpayne@68	1938 if attrs:
jpayne@68	1939 for k, v in sorted(attrs.items()):
jpayne@68	1940 if qattrs is not None and k in qattrs and v in resolved_names:
jpayne@68	1941 v = parsed_qnames[resolved_names[v]][0]
jpayne@68	1942 attr_qname, attr_name, uri = parsed_qnames[k]
jpayne@68	1943 # No prefix for attributes in default ('') namespace.
jpayne@68	1944 attr_list.append((attr_qname if uri else attr_name, v))
jpayne@68	1945
jpayne@68	1946 # Honour xml:space attributes.
jpayne@68	1947 space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
jpayne@68	1948 self._preserve_space.append(
jpayne@68	1949 space_behaviour == 'preserve' if space_behaviour
jpayne@68	1950 else self._preserve_space[-1])
jpayne@68	1951
jpayne@68	1952 # Write the tag.
jpayne@68	1953 write = self._write
jpayne@68	1954 write('<' + parsed_qnames[tag][0])
jpayne@68	1955 if attr_list:
jpayne@68	1956 write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
jpayne@68	1957 write('>')
jpayne@68	1958
jpayne@68	1959 # Write the resolved qname text content.
jpayne@68	1960 if qname_text is not None:
jpayne@68	1961 write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
jpayne@68	1962
jpayne@68	1963 self._root_seen = True
jpayne@68	1964 self._ns_stack.append([])
jpayne@68	1965
jpayne@68	1966 def end(self, tag):
jpayne@68	1967 if self._ignored_depth:
jpayne@68	1968 self._ignored_depth -= 1
jpayne@68	1969 return
jpayne@68	1970 if self._data:
jpayne@68	1971 self._flush()
jpayne@68	1972 self._write(f'</{self._qname(tag)[0]}>')
jpayne@68	1973 self._preserve_space.pop()
jpayne@68	1974 self._root_done = len(self._preserve_space) == 1
jpayne@68	1975 self._declared_ns_stack.pop()
jpayne@68	1976 self._ns_stack.pop()
jpayne@68	1977
jpayne@68	1978 def comment(self, text):
jpayne@68	1979 if not self._with_comments:
jpayne@68	1980 return
jpayne@68	1981 if self._ignored_depth:
jpayne@68	1982 return
jpayne@68	1983 if self._root_done:
jpayne@68	1984 self._write('\n')
jpayne@68	1985 elif self._root_seen and self._data:
jpayne@68	1986 self._flush()
jpayne@68	1987 self._write(f'<!--{_escape_cdata_c14n(text)}-->')
jpayne@68	1988 if not self._root_seen:
jpayne@68	1989 self._write('\n')
jpayne@68	1990
jpayne@68	1991 def pi(self, target, data):
jpayne@68	1992 if self._ignored_depth:
jpayne@68	1993 return
jpayne@68	1994 if self._root_done:
jpayne@68	1995 self._write('\n')
jpayne@68	1996 elif self._root_seen and self._data:
jpayne@68	1997 self._flush()
jpayne@68	1998 self._write(
jpayne@68	1999 f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
jpayne@68	2000 if not self._root_seen:
jpayne@68	2001 self._write('\n')
jpayne@68	2002
jpayne@68	2003
jpayne@68	2004 def _escape_cdata_c14n(text):
jpayne@68	2005 # escape character data
jpayne@68	2006 try:
jpayne@68	2007 # it's worth avoiding do-nothing calls for strings that are
jpayne@68	2008 # shorter than 500 character, or so. assume that's, by far,
jpayne@68	2009 # the most common case in most applications.
jpayne@68	2010 if '&' in text:
jpayne@68	2011 text = text.replace('&', '&')
jpayne@68	2012 if '<' in text:
jpayne@68	2013 text = text.replace('<', '<')
jpayne@68	2014 if '>' in text:
jpayne@68	2015 text = text.replace('>', '>')
jpayne@68	2016 if '\r' in text:
jpayne@68	2017 text = text.replace('\r', ' ')
jpayne@68	2018 return text
jpayne@68	2019 except (TypeError, AttributeError):
jpayne@68	2020 _raise_serialization_error(text)
jpayne@68	2021
jpayne@68	2022
jpayne@68	2023 def _escape_attrib_c14n(text):
jpayne@68	2024 # escape attribute value
jpayne@68	2025 try:
jpayne@68	2026 if '&' in text:
jpayne@68	2027 text = text.replace('&', '&')
jpayne@68	2028 if '<' in text:
jpayne@68	2029 text = text.replace('<', '<')
jpayne@68	2030 if '"' in text:
jpayne@68	2031 text = text.replace('"', '"')
jpayne@68	2032 if '\t' in text:
jpayne@68	2033 text = text.replace('\t', ' ')
jpayne@68	2034 if '\n' in text:
jpayne@68	2035 text = text.replace('\n', ' ')
jpayne@68	2036 if '\r' in text:
jpayne@68	2037 text = text.replace('\r', ' ')
jpayne@68	2038 return text
jpayne@68	2039 except (TypeError, AttributeError):
jpayne@68	2040 _raise_serialization_error(text)
jpayne@68	2041
jpayne@68	2042
jpayne@68	2043 # --------------------------------------------------------------------
jpayne@68	2044
jpayne@68	2045 # Import the C accelerators
jpayne@68	2046 try:
jpayne@68	2047 # Element is going to be shadowed by the C implementation. We need to keep
jpayne@68	2048 # the Python version of it accessible for some "creative" by external code
jpayne@68	2049 # (see tests)
jpayne@68	2050 _Element_Py = Element
jpayne@68	2051
jpayne@68	2052 # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
jpayne@68	2053 from _elementtree import *
jpayne@68	2054 from _elementtree import _set_factories
jpayne@68	2055 except ImportError:
jpayne@68	2056 pass
jpayne@68	2057 else:
jpayne@68	2058 _set_factories(Comment, ProcessingInstruction)

Mercurial > repos > rliterman > csp2

annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/etree/ElementTree.py @ 68:5028fdace37b