annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/etree/ElementTree.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
rev   line source
jpayne@68 1 """Lightweight XML support for Python.
jpayne@68 2
jpayne@68 3 XML is an inherently hierarchical data format, and the most natural way to
jpayne@68 4 represent it is with a tree. This module has two classes for this purpose:
jpayne@68 5
jpayne@68 6 1. ElementTree represents the whole XML document as a tree and
jpayne@68 7
jpayne@68 8 2. Element represents a single node in this tree.
jpayne@68 9
jpayne@68 10 Interactions with the whole document (reading and writing to/from files) are
jpayne@68 11 usually done on the ElementTree level. Interactions with a single XML element
jpayne@68 12 and its sub-elements are done on the Element level.
jpayne@68 13
jpayne@68 14 Element is a flexible container object designed to store hierarchical data
jpayne@68 15 structures in memory. It can be described as a cross between a list and a
jpayne@68 16 dictionary. Each Element has a number of properties associated with it:
jpayne@68 17
jpayne@68 18 'tag' - a string containing the element's name.
jpayne@68 19
jpayne@68 20 'attributes' - a Python dictionary storing the element's attributes.
jpayne@68 21
jpayne@68 22 'text' - a string containing the element's text content.
jpayne@68 23
jpayne@68 24 'tail' - an optional string containing text after the element's end tag.
jpayne@68 25
jpayne@68 26 And a number of child elements stored in a Python sequence.
jpayne@68 27
jpayne@68 28 To create an element instance, use the Element constructor,
jpayne@68 29 or the SubElement factory function.
jpayne@68 30
jpayne@68 31 You can also use the ElementTree class to wrap an element structure
jpayne@68 32 and convert it to and from XML.
jpayne@68 33
jpayne@68 34 """
jpayne@68 35
jpayne@68 36 #---------------------------------------------------------------------
jpayne@68 37 # Licensed to PSF under a Contributor Agreement.
jpayne@68 38 # See http://www.python.org/psf/license for licensing details.
jpayne@68 39 #
jpayne@68 40 # ElementTree
jpayne@68 41 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
jpayne@68 42 #
jpayne@68 43 # fredrik@pythonware.com
jpayne@68 44 # http://www.pythonware.com
jpayne@68 45 # --------------------------------------------------------------------
jpayne@68 46 # The ElementTree toolkit is
jpayne@68 47 #
jpayne@68 48 # Copyright (c) 1999-2008 by Fredrik Lundh
jpayne@68 49 #
jpayne@68 50 # By obtaining, using, and/or copying this software and/or its
jpayne@68 51 # associated documentation, you agree that you have read, understood,
jpayne@68 52 # and will comply with the following terms and conditions:
jpayne@68 53 #
jpayne@68 54 # Permission to use, copy, modify, and distribute this software and
jpayne@68 55 # its associated documentation for any purpose and without fee is
jpayne@68 56 # hereby granted, provided that the above copyright notice appears in
jpayne@68 57 # all copies, and that both that copyright notice and this permission
jpayne@68 58 # notice appear in supporting documentation, and that the name of
jpayne@68 59 # Secret Labs AB or the author not be used in advertising or publicity
jpayne@68 60 # pertaining to distribution of the software without specific, written
jpayne@68 61 # prior permission.
jpayne@68 62 #
jpayne@68 63 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
jpayne@68 64 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
jpayne@68 65 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
jpayne@68 66 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
jpayne@68 67 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
jpayne@68 68 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
jpayne@68 69 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
jpayne@68 70 # OF THIS SOFTWARE.
jpayne@68 71 # --------------------------------------------------------------------
jpayne@68 72
jpayne@68 73 __all__ = [
jpayne@68 74 # public symbols
jpayne@68 75 "Comment",
jpayne@68 76 "dump",
jpayne@68 77 "Element", "ElementTree",
jpayne@68 78 "fromstring", "fromstringlist",
jpayne@68 79 "iselement", "iterparse",
jpayne@68 80 "parse", "ParseError",
jpayne@68 81 "PI", "ProcessingInstruction",
jpayne@68 82 "QName",
jpayne@68 83 "SubElement",
jpayne@68 84 "tostring", "tostringlist",
jpayne@68 85 "TreeBuilder",
jpayne@68 86 "VERSION",
jpayne@68 87 "XML", "XMLID",
jpayne@68 88 "XMLParser", "XMLPullParser",
jpayne@68 89 "register_namespace",
jpayne@68 90 "canonicalize", "C14NWriterTarget",
jpayne@68 91 ]
jpayne@68 92
jpayne@68 93 VERSION = "1.3.0"
jpayne@68 94
jpayne@68 95 import sys
jpayne@68 96 import re
jpayne@68 97 import warnings
jpayne@68 98 import io
jpayne@68 99 import collections
jpayne@68 100 import collections.abc
jpayne@68 101 import contextlib
jpayne@68 102
jpayne@68 103 from . import ElementPath
jpayne@68 104
jpayne@68 105
jpayne@68 106 class ParseError(SyntaxError):
jpayne@68 107 """An error when parsing an XML document.
jpayne@68 108
jpayne@68 109 In addition to its exception value, a ParseError contains
jpayne@68 110 two extra attributes:
jpayne@68 111 'code' - the specific exception code
jpayne@68 112 'position' - the line and column of the error
jpayne@68 113
jpayne@68 114 """
jpayne@68 115 pass
jpayne@68 116
jpayne@68 117 # --------------------------------------------------------------------
jpayne@68 118
jpayne@68 119
jpayne@68 120 def iselement(element):
jpayne@68 121 """Return True if *element* appears to be an Element."""
jpayne@68 122 return hasattr(element, 'tag')
jpayne@68 123
jpayne@68 124
jpayne@68 125 class Element:
jpayne@68 126 """An XML element.
jpayne@68 127
jpayne@68 128 This class is the reference implementation of the Element interface.
jpayne@68 129
jpayne@68 130 An element's length is its number of subelements. That means if you
jpayne@68 131 want to check if an element is truly empty, you should check BOTH
jpayne@68 132 its length AND its text attribute.
jpayne@68 133
jpayne@68 134 The element tag, attribute names, and attribute values can be either
jpayne@68 135 bytes or strings.
jpayne@68 136
jpayne@68 137 *tag* is the element name. *attrib* is an optional dictionary containing
jpayne@68 138 element attributes. *extra* are additional element attributes given as
jpayne@68 139 keyword arguments.
jpayne@68 140
jpayne@68 141 Example form:
jpayne@68 142 <tag attrib>text<child/>...</tag>tail
jpayne@68 143
jpayne@68 144 """
jpayne@68 145
jpayne@68 146 tag = None
jpayne@68 147 """The element's name."""
jpayne@68 148
jpayne@68 149 attrib = None
jpayne@68 150 """Dictionary of the element's attributes."""
jpayne@68 151
jpayne@68 152 text = None
jpayne@68 153 """
jpayne@68 154 Text before first subelement. This is either a string or the value None.
jpayne@68 155 Note that if there is no text, this attribute may be either
jpayne@68 156 None or the empty string, depending on the parser.
jpayne@68 157
jpayne@68 158 """
jpayne@68 159
jpayne@68 160 tail = None
jpayne@68 161 """
jpayne@68 162 Text after this element's end tag, but before the next sibling element's
jpayne@68 163 start tag. This is either a string or the value None. Note that if there
jpayne@68 164 was no text, this attribute may be either None or an empty string,
jpayne@68 165 depending on the parser.
jpayne@68 166
jpayne@68 167 """
jpayne@68 168
jpayne@68 169 def __init__(self, tag, attrib={}, **extra):
jpayne@68 170 if not isinstance(attrib, dict):
jpayne@68 171 raise TypeError("attrib must be dict, not %s" % (
jpayne@68 172 attrib.__class__.__name__,))
jpayne@68 173 self.tag = tag
jpayne@68 174 self.attrib = {**attrib, **extra}
jpayne@68 175 self._children = []
jpayne@68 176
jpayne@68 177 def __repr__(self):
jpayne@68 178 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
jpayne@68 179
jpayne@68 180 def makeelement(self, tag, attrib):
jpayne@68 181 """Create a new element with the same type.
jpayne@68 182
jpayne@68 183 *tag* is a string containing the element name.
jpayne@68 184 *attrib* is a dictionary containing the element attributes.
jpayne@68 185
jpayne@68 186 Do not call this method, use the SubElement factory function instead.
jpayne@68 187
jpayne@68 188 """
jpayne@68 189 return self.__class__(tag, attrib)
jpayne@68 190
jpayne@68 191 def copy(self):
jpayne@68 192 """Return copy of current element.
jpayne@68 193
jpayne@68 194 This creates a shallow copy. Subelements will be shared with the
jpayne@68 195 original tree.
jpayne@68 196
jpayne@68 197 """
jpayne@68 198 elem = self.makeelement(self.tag, self.attrib)
jpayne@68 199 elem.text = self.text
jpayne@68 200 elem.tail = self.tail
jpayne@68 201 elem[:] = self
jpayne@68 202 return elem
jpayne@68 203
jpayne@68 204 def __len__(self):
jpayne@68 205 return len(self._children)
jpayne@68 206
jpayne@68 207 def __bool__(self):
jpayne@68 208 warnings.warn(
jpayne@68 209 "The behavior of this method will change in future versions. "
jpayne@68 210 "Use specific 'len(elem)' or 'elem is not None' test instead.",
jpayne@68 211 FutureWarning, stacklevel=2
jpayne@68 212 )
jpayne@68 213 return len(self._children) != 0 # emulate old behaviour, for now
jpayne@68 214
jpayne@68 215 def __getitem__(self, index):
jpayne@68 216 return self._children[index]
jpayne@68 217
jpayne@68 218 def __setitem__(self, index, element):
jpayne@68 219 if isinstance(index, slice):
jpayne@68 220 for elt in element:
jpayne@68 221 self._assert_is_element(elt)
jpayne@68 222 else:
jpayne@68 223 self._assert_is_element(element)
jpayne@68 224 self._children[index] = element
jpayne@68 225
jpayne@68 226 def __delitem__(self, index):
jpayne@68 227 del self._children[index]
jpayne@68 228
jpayne@68 229 def append(self, subelement):
jpayne@68 230 """Add *subelement* to the end of this element.
jpayne@68 231
jpayne@68 232 The new element will appear in document order after the last existing
jpayne@68 233 subelement (or directly after the text, if it's the first subelement),
jpayne@68 234 but before the end tag for this element.
jpayne@68 235
jpayne@68 236 """
jpayne@68 237 self._assert_is_element(subelement)
jpayne@68 238 self._children.append(subelement)
jpayne@68 239
jpayne@68 240 def extend(self, elements):
jpayne@68 241 """Append subelements from a sequence.
jpayne@68 242
jpayne@68 243 *elements* is a sequence with zero or more elements.
jpayne@68 244
jpayne@68 245 """
jpayne@68 246 for element in elements:
jpayne@68 247 self._assert_is_element(element)
jpayne@68 248 self._children.extend(elements)
jpayne@68 249
jpayne@68 250 def insert(self, index, subelement):
jpayne@68 251 """Insert *subelement* at position *index*."""
jpayne@68 252 self._assert_is_element(subelement)
jpayne@68 253 self._children.insert(index, subelement)
jpayne@68 254
jpayne@68 255 def _assert_is_element(self, e):
jpayne@68 256 # Need to refer to the actual Python implementation, not the
jpayne@68 257 # shadowing C implementation.
jpayne@68 258 if not isinstance(e, _Element_Py):
jpayne@68 259 raise TypeError('expected an Element, not %s' % type(e).__name__)
jpayne@68 260
jpayne@68 261 def remove(self, subelement):
jpayne@68 262 """Remove matching subelement.
jpayne@68 263
jpayne@68 264 Unlike the find methods, this method compares elements based on
jpayne@68 265 identity, NOT ON tag value or contents. To remove subelements by
jpayne@68 266 other means, the easiest way is to use a list comprehension to
jpayne@68 267 select what elements to keep, and then use slice assignment to update
jpayne@68 268 the parent element.
jpayne@68 269
jpayne@68 270 ValueError is raised if a matching element could not be found.
jpayne@68 271
jpayne@68 272 """
jpayne@68 273 # assert iselement(element)
jpayne@68 274 self._children.remove(subelement)
jpayne@68 275
jpayne@68 276 def getchildren(self):
jpayne@68 277 """(Deprecated) Return all subelements.
jpayne@68 278
jpayne@68 279 Elements are returned in document order.
jpayne@68 280
jpayne@68 281 """
jpayne@68 282 warnings.warn(
jpayne@68 283 "This method will be removed in future versions. "
jpayne@68 284 "Use 'list(elem)' or iteration over elem instead.",
jpayne@68 285 DeprecationWarning, stacklevel=2
jpayne@68 286 )
jpayne@68 287 return self._children
jpayne@68 288
jpayne@68 289 def find(self, path, namespaces=None):
jpayne@68 290 """Find first matching element by tag name or path.
jpayne@68 291
jpayne@68 292 *path* is a string having either an element tag or an XPath,
jpayne@68 293 *namespaces* is an optional mapping from namespace prefix to full name.
jpayne@68 294
jpayne@68 295 Return the first matching element, or None if no element was found.
jpayne@68 296
jpayne@68 297 """
jpayne@68 298 return ElementPath.find(self, path, namespaces)
jpayne@68 299
jpayne@68 300 def findtext(self, path, default=None, namespaces=None):
jpayne@68 301 """Find text for first matching element by tag name or path.
jpayne@68 302
jpayne@68 303 *path* is a string having either an element tag or an XPath,
jpayne@68 304 *default* is the value to return if the element was not found,
jpayne@68 305 *namespaces* is an optional mapping from namespace prefix to full name.
jpayne@68 306
jpayne@68 307 Return text content of first matching element, or default value if
jpayne@68 308 none was found. Note that if an element is found having no text
jpayne@68 309 content, the empty string is returned.
jpayne@68 310
jpayne@68 311 """
jpayne@68 312 return ElementPath.findtext(self, path, default, namespaces)
jpayne@68 313
jpayne@68 314 def findall(self, path, namespaces=None):
jpayne@68 315 """Find all matching subelements by tag name or path.
jpayne@68 316
jpayne@68 317 *path* is a string having either an element tag or an XPath,
jpayne@68 318 *namespaces* is an optional mapping from namespace prefix to full name.
jpayne@68 319
jpayne@68 320 Returns list containing all matching elements in document order.
jpayne@68 321
jpayne@68 322 """
jpayne@68 323 return ElementPath.findall(self, path, namespaces)
jpayne@68 324
jpayne@68 325 def iterfind(self, path, namespaces=None):
jpayne@68 326 """Find all matching subelements by tag name or path.
jpayne@68 327
jpayne@68 328 *path* is a string having either an element tag or an XPath,
jpayne@68 329 *namespaces* is an optional mapping from namespace prefix to full name.
jpayne@68 330
jpayne@68 331 Return an iterable yielding all matching elements in document order.
jpayne@68 332
jpayne@68 333 """
jpayne@68 334 return ElementPath.iterfind(self, path, namespaces)
jpayne@68 335
jpayne@68 336 def clear(self):
jpayne@68 337 """Reset element.
jpayne@68 338
jpayne@68 339 This function removes all subelements, clears all attributes, and sets
jpayne@68 340 the text and tail attributes to None.
jpayne@68 341
jpayne@68 342 """
jpayne@68 343 self.attrib.clear()
jpayne@68 344 self._children = []
jpayne@68 345 self.text = self.tail = None
jpayne@68 346
jpayne@68 347 def get(self, key, default=None):
jpayne@68 348 """Get element attribute.
jpayne@68 349
jpayne@68 350 Equivalent to attrib.get, but some implementations may handle this a
jpayne@68 351 bit more efficiently. *key* is what attribute to look for, and
jpayne@68 352 *default* is what to return if the attribute was not found.
jpayne@68 353
jpayne@68 354 Returns a string containing the attribute value, or the default if
jpayne@68 355 attribute was not found.
jpayne@68 356
jpayne@68 357 """
jpayne@68 358 return self.attrib.get(key, default)
jpayne@68 359
jpayne@68 360 def set(self, key, value):
jpayne@68 361 """Set element attribute.
jpayne@68 362
jpayne@68 363 Equivalent to attrib[key] = value, but some implementations may handle
jpayne@68 364 this a bit more efficiently. *key* is what attribute to set, and
jpayne@68 365 *value* is the attribute value to set it to.
jpayne@68 366
jpayne@68 367 """
jpayne@68 368 self.attrib[key] = value
jpayne@68 369
jpayne@68 370 def keys(self):
jpayne@68 371 """Get list of attribute names.
jpayne@68 372
jpayne@68 373 Names are returned in an arbitrary order, just like an ordinary
jpayne@68 374 Python dict. Equivalent to attrib.keys()
jpayne@68 375
jpayne@68 376 """
jpayne@68 377 return self.attrib.keys()
jpayne@68 378
jpayne@68 379 def items(self):
jpayne@68 380 """Get element attributes as a sequence.
jpayne@68 381
jpayne@68 382 The attributes are returned in arbitrary order. Equivalent to
jpayne@68 383 attrib.items().
jpayne@68 384
jpayne@68 385 Return a list of (name, value) tuples.
jpayne@68 386
jpayne@68 387 """
jpayne@68 388 return self.attrib.items()
jpayne@68 389
jpayne@68 390 def iter(self, tag=None):
jpayne@68 391 """Create tree iterator.
jpayne@68 392
jpayne@68 393 The iterator loops over the element and all subelements in document
jpayne@68 394 order, returning all elements with a matching tag.
jpayne@68 395
jpayne@68 396 If the tree structure is modified during iteration, new or removed
jpayne@68 397 elements may or may not be included. To get a stable set, use the
jpayne@68 398 list() function on the iterator, and loop over the resulting list.
jpayne@68 399
jpayne@68 400 *tag* is what tags to look for (default is to return all elements)
jpayne@68 401
jpayne@68 402 Return an iterator containing all the matching elements.
jpayne@68 403
jpayne@68 404 """
jpayne@68 405 if tag == "*":
jpayne@68 406 tag = None
jpayne@68 407 if tag is None or self.tag == tag:
jpayne@68 408 yield self
jpayne@68 409 for e in self._children:
jpayne@68 410 yield from e.iter(tag)
jpayne@68 411
jpayne@68 412 # compatibility
jpayne@68 413 def getiterator(self, tag=None):
jpayne@68 414 warnings.warn(
jpayne@68 415 "This method will be removed in future versions. "
jpayne@68 416 "Use 'elem.iter()' or 'list(elem.iter())' instead.",
jpayne@68 417 DeprecationWarning, stacklevel=2
jpayne@68 418 )
jpayne@68 419 return list(self.iter(tag))
jpayne@68 420
jpayne@68 421 def itertext(self):
jpayne@68 422 """Create text iterator.
jpayne@68 423
jpayne@68 424 The iterator loops over the element and all subelements in document
jpayne@68 425 order, returning all inner text.
jpayne@68 426
jpayne@68 427 """
jpayne@68 428 tag = self.tag
jpayne@68 429 if not isinstance(tag, str) and tag is not None:
jpayne@68 430 return
jpayne@68 431 t = self.text
jpayne@68 432 if t:
jpayne@68 433 yield t
jpayne@68 434 for e in self:
jpayne@68 435 yield from e.itertext()
jpayne@68 436 t = e.tail
jpayne@68 437 if t:
jpayne@68 438 yield t
jpayne@68 439
jpayne@68 440
jpayne@68 441 def SubElement(parent, tag, attrib={}, **extra):
jpayne@68 442 """Subelement factory which creates an element instance, and appends it
jpayne@68 443 to an existing parent.
jpayne@68 444
jpayne@68 445 The element tag, attribute names, and attribute values can be either
jpayne@68 446 bytes or Unicode strings.
jpayne@68 447
jpayne@68 448 *parent* is the parent element, *tag* is the subelements name, *attrib* is
jpayne@68 449 an optional directory containing element attributes, *extra* are
jpayne@68 450 additional attributes given as keyword arguments.
jpayne@68 451
jpayne@68 452 """
jpayne@68 453 attrib = {**attrib, **extra}
jpayne@68 454 element = parent.makeelement(tag, attrib)
jpayne@68 455 parent.append(element)
jpayne@68 456 return element
jpayne@68 457
jpayne@68 458
jpayne@68 459 def Comment(text=None):
jpayne@68 460 """Comment element factory.
jpayne@68 461
jpayne@68 462 This function creates a special element which the standard serializer
jpayne@68 463 serializes as an XML comment.
jpayne@68 464
jpayne@68 465 *text* is a string containing the comment string.
jpayne@68 466
jpayne@68 467 """
jpayne@68 468 element = Element(Comment)
jpayne@68 469 element.text = text
jpayne@68 470 return element
jpayne@68 471
jpayne@68 472
jpayne@68 473 def ProcessingInstruction(target, text=None):
jpayne@68 474 """Processing Instruction element factory.
jpayne@68 475
jpayne@68 476 This function creates a special element which the standard serializer
jpayne@68 477 serializes as an XML comment.
jpayne@68 478
jpayne@68 479 *target* is a string containing the processing instruction, *text* is a
jpayne@68 480 string containing the processing instruction contents, if any.
jpayne@68 481
jpayne@68 482 """
jpayne@68 483 element = Element(ProcessingInstruction)
jpayne@68 484 element.text = target
jpayne@68 485 if text:
jpayne@68 486 element.text = element.text + " " + text
jpayne@68 487 return element
jpayne@68 488
jpayne@68 489 PI = ProcessingInstruction
jpayne@68 490
jpayne@68 491
jpayne@68 492 class QName:
jpayne@68 493 """Qualified name wrapper.
jpayne@68 494
jpayne@68 495 This class can be used to wrap a QName attribute value in order to get
jpayne@68 496 proper namespace handing on output.
jpayne@68 497
jpayne@68 498 *text_or_uri* is a string containing the QName value either in the form
jpayne@68 499 {uri}local, or if the tag argument is given, the URI part of a QName.
jpayne@68 500
jpayne@68 501 *tag* is an optional argument which if given, will make the first
jpayne@68 502 argument (text_or_uri) be interpreted as a URI, and this argument (tag)
jpayne@68 503 be interpreted as a local name.
jpayne@68 504
jpayne@68 505 """
jpayne@68 506 def __init__(self, text_or_uri, tag=None):
jpayne@68 507 if tag:
jpayne@68 508 text_or_uri = "{%s}%s" % (text_or_uri, tag)
jpayne@68 509 self.text = text_or_uri
jpayne@68 510 def __str__(self):
jpayne@68 511 return self.text
jpayne@68 512 def __repr__(self):
jpayne@68 513 return '<%s %r>' % (self.__class__.__name__, self.text)
jpayne@68 514 def __hash__(self):
jpayne@68 515 return hash(self.text)
jpayne@68 516 def __le__(self, other):
jpayne@68 517 if isinstance(other, QName):
jpayne@68 518 return self.text <= other.text
jpayne@68 519 return self.text <= other
jpayne@68 520 def __lt__(self, other):
jpayne@68 521 if isinstance(other, QName):
jpayne@68 522 return self.text < other.text
jpayne@68 523 return self.text < other
jpayne@68 524 def __ge__(self, other):
jpayne@68 525 if isinstance(other, QName):
jpayne@68 526 return self.text >= other.text
jpayne@68 527 return self.text >= other
jpayne@68 528 def __gt__(self, other):
jpayne@68 529 if isinstance(other, QName):
jpayne@68 530 return self.text > other.text
jpayne@68 531 return self.text > other
jpayne@68 532 def __eq__(self, other):
jpayne@68 533 if isinstance(other, QName):
jpayne@68 534 return self.text == other.text
jpayne@68 535 return self.text == other
jpayne@68 536
jpayne@68 537 # --------------------------------------------------------------------
jpayne@68 538
jpayne@68 539
jpayne@68 540 class ElementTree:
jpayne@68 541 """An XML element hierarchy.
jpayne@68 542
jpayne@68 543 This class also provides support for serialization to and from
jpayne@68 544 standard XML.
jpayne@68 545
jpayne@68 546 *element* is an optional root element node,
jpayne@68 547 *file* is an optional file handle or file name of an XML file whose
jpayne@68 548 contents will be used to initialize the tree with.
jpayne@68 549
jpayne@68 550 """
jpayne@68 551 def __init__(self, element=None, file=None):
jpayne@68 552 # assert element is None or iselement(element)
jpayne@68 553 self._root = element # first node
jpayne@68 554 if file:
jpayne@68 555 self.parse(file)
jpayne@68 556
jpayne@68 557 def getroot(self):
jpayne@68 558 """Return root element of this tree."""
jpayne@68 559 return self._root
jpayne@68 560
jpayne@68 561 def _setroot(self, element):
jpayne@68 562 """Replace root element of this tree.
jpayne@68 563
jpayne@68 564 This will discard the current contents of the tree and replace it
jpayne@68 565 with the given element. Use with care!
jpayne@68 566
jpayne@68 567 """
jpayne@68 568 # assert iselement(element)
jpayne@68 569 self._root = element
jpayne@68 570
jpayne@68 571 def parse(self, source, parser=None):
jpayne@68 572 """Load external XML document into element tree.
jpayne@68 573
jpayne@68 574 *source* is a file name or file object, *parser* is an optional parser
jpayne@68 575 instance that defaults to XMLParser.
jpayne@68 576
jpayne@68 577 ParseError is raised if the parser fails to parse the document.
jpayne@68 578
jpayne@68 579 Returns the root element of the given source document.
jpayne@68 580
jpayne@68 581 """
jpayne@68 582 close_source = False
jpayne@68 583 if not hasattr(source, "read"):
jpayne@68 584 source = open(source, "rb")
jpayne@68 585 close_source = True
jpayne@68 586 try:
jpayne@68 587 if parser is None:
jpayne@68 588 # If no parser was specified, create a default XMLParser
jpayne@68 589 parser = XMLParser()
jpayne@68 590 if hasattr(parser, '_parse_whole'):
jpayne@68 591 # The default XMLParser, when it comes from an accelerator,
jpayne@68 592 # can define an internal _parse_whole API for efficiency.
jpayne@68 593 # It can be used to parse the whole source without feeding
jpayne@68 594 # it with chunks.
jpayne@68 595 self._root = parser._parse_whole(source)
jpayne@68 596 return self._root
jpayne@68 597 while True:
jpayne@68 598 data = source.read(65536)
jpayne@68 599 if not data:
jpayne@68 600 break
jpayne@68 601 parser.feed(data)
jpayne@68 602 self._root = parser.close()
jpayne@68 603 return self._root
jpayne@68 604 finally:
jpayne@68 605 if close_source:
jpayne@68 606 source.close()
jpayne@68 607
jpayne@68 608 def iter(self, tag=None):
jpayne@68 609 """Create and return tree iterator for the root element.
jpayne@68 610
jpayne@68 611 The iterator loops over all elements in this tree, in document order.
jpayne@68 612
jpayne@68 613 *tag* is a string with the tag name to iterate over
jpayne@68 614 (default is to return all elements).
jpayne@68 615
jpayne@68 616 """
jpayne@68 617 # assert self._root is not None
jpayne@68 618 return self._root.iter(tag)
jpayne@68 619
jpayne@68 620 # compatibility
jpayne@68 621 def getiterator(self, tag=None):
jpayne@68 622 warnings.warn(
jpayne@68 623 "This method will be removed in future versions. "
jpayne@68 624 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
jpayne@68 625 DeprecationWarning, stacklevel=2
jpayne@68 626 )
jpayne@68 627 return list(self.iter(tag))
jpayne@68 628
jpayne@68 629 def find(self, path, namespaces=None):
jpayne@68 630 """Find first matching element by tag name or path.
jpayne@68 631
jpayne@68 632 Same as getroot().find(path), which is Element.find()
jpayne@68 633
jpayne@68 634 *path* is a string having either an element tag or an XPath,
jpayne@68 635 *namespaces* is an optional mapping from namespace prefix to full name.
jpayne@68 636
jpayne@68 637 Return the first matching element, or None if no element was found.
jpayne@68 638
jpayne@68 639 """
jpayne@68 640 # assert self._root is not None
jpayne@68 641 if path[:1] == "/":
jpayne@68 642 path = "." + path
jpayne@68 643 warnings.warn(
jpayne@68 644 "This search is broken in 1.3 and earlier, and will be "
jpayne@68 645 "fixed in a future version. If you rely on the current "
jpayne@68 646 "behaviour, change it to %r" % path,
jpayne@68 647 FutureWarning, stacklevel=2
jpayne@68 648 )
jpayne@68 649 return self._root.find(path, namespaces)
jpayne@68 650
jpayne@68 651 def findtext(self, path, default=None, namespaces=None):
jpayne@68 652 """Find first matching element by tag name or path.
jpayne@68 653
jpayne@68 654 Same as getroot().findtext(path), which is Element.findtext()
jpayne@68 655
jpayne@68 656 *path* is a string having either an element tag or an XPath,
jpayne@68 657 *namespaces* is an optional mapping from namespace prefix to full name.
jpayne@68 658
jpayne@68 659 Return the first matching element, or None if no element was found.
jpayne@68 660
jpayne@68 661 """
jpayne@68 662 # assert self._root is not None
jpayne@68 663 if path[:1] == "/":
jpayne@68 664 path = "." + path
jpayne@68 665 warnings.warn(
jpayne@68 666 "This search is broken in 1.3 and earlier, and will be "
jpayne@68 667 "fixed in a future version. If you rely on the current "
jpayne@68 668 "behaviour, change it to %r" % path,
jpayne@68 669 FutureWarning, stacklevel=2
jpayne@68 670 )
jpayne@68 671 return self._root.findtext(path, default, namespaces)
jpayne@68 672
jpayne@68 673 def findall(self, path, namespaces=None):
jpayne@68 674 """Find all matching subelements by tag name or path.
jpayne@68 675
jpayne@68 676 Same as getroot().findall(path), which is Element.findall().
jpayne@68 677
jpayne@68 678 *path* is a string having either an element tag or an XPath,
jpayne@68 679 *namespaces* is an optional mapping from namespace prefix to full name.
jpayne@68 680
jpayne@68 681 Return list containing all matching elements in document order.
jpayne@68 682
jpayne@68 683 """
jpayne@68 684 # assert self._root is not None
jpayne@68 685 if path[:1] == "/":
jpayne@68 686 path = "." + path
jpayne@68 687 warnings.warn(
jpayne@68 688 "This search is broken in 1.3 and earlier, and will be "
jpayne@68 689 "fixed in a future version. If you rely on the current "
jpayne@68 690 "behaviour, change it to %r" % path,
jpayne@68 691 FutureWarning, stacklevel=2
jpayne@68 692 )
jpayne@68 693 return self._root.findall(path, namespaces)
jpayne@68 694
jpayne@68 695 def iterfind(self, path, namespaces=None):
jpayne@68 696 """Find all matching subelements by tag name or path.
jpayne@68 697
jpayne@68 698 Same as getroot().iterfind(path), which is element.iterfind()
jpayne@68 699
jpayne@68 700 *path* is a string having either an element tag or an XPath,
jpayne@68 701 *namespaces* is an optional mapping from namespace prefix to full name.
jpayne@68 702
jpayne@68 703 Return an iterable yielding all matching elements in document order.
jpayne@68 704
jpayne@68 705 """
jpayne@68 706 # assert self._root is not None
jpayne@68 707 if path[:1] == "/":
jpayne@68 708 path = "." + path
jpayne@68 709 warnings.warn(
jpayne@68 710 "This search is broken in 1.3 and earlier, and will be "
jpayne@68 711 "fixed in a future version. If you rely on the current "
jpayne@68 712 "behaviour, change it to %r" % path,
jpayne@68 713 FutureWarning, stacklevel=2
jpayne@68 714 )
jpayne@68 715 return self._root.iterfind(path, namespaces)
jpayne@68 716
jpayne@68 717 def write(self, file_or_filename,
jpayne@68 718 encoding=None,
jpayne@68 719 xml_declaration=None,
jpayne@68 720 default_namespace=None,
jpayne@68 721 method=None, *,
jpayne@68 722 short_empty_elements=True):
jpayne@68 723 """Write element tree to a file as XML.
jpayne@68 724
jpayne@68 725 Arguments:
jpayne@68 726 *file_or_filename* -- file name or a file object opened for writing
jpayne@68 727
jpayne@68 728 *encoding* -- the output encoding (default: US-ASCII)
jpayne@68 729
jpayne@68 730 *xml_declaration* -- bool indicating if an XML declaration should be
jpayne@68 731 added to the output. If None, an XML declaration
jpayne@68 732 is added if encoding IS NOT either of:
jpayne@68 733 US-ASCII, UTF-8, or Unicode
jpayne@68 734
jpayne@68 735 *default_namespace* -- sets the default XML namespace (for "xmlns")
jpayne@68 736
jpayne@68 737 *method* -- either "xml" (default), "html, "text", or "c14n"
jpayne@68 738
jpayne@68 739 *short_empty_elements* -- controls the formatting of elements
jpayne@68 740 that contain no content. If True (default)
jpayne@68 741 they are emitted as a single self-closed
jpayne@68 742 tag, otherwise they are emitted as a pair
jpayne@68 743 of start/end tags
jpayne@68 744
jpayne@68 745 """
jpayne@68 746 if not method:
jpayne@68 747 method = "xml"
jpayne@68 748 elif method not in _serialize:
jpayne@68 749 raise ValueError("unknown method %r" % method)
jpayne@68 750 if not encoding:
jpayne@68 751 if method == "c14n":
jpayne@68 752 encoding = "utf-8"
jpayne@68 753 else:
jpayne@68 754 encoding = "us-ascii"
jpayne@68 755 enc_lower = encoding.lower()
jpayne@68 756 with _get_writer(file_or_filename, enc_lower) as write:
jpayne@68 757 if method == "xml" and (xml_declaration or
jpayne@68 758 (xml_declaration is None and
jpayne@68 759 enc_lower not in ("utf-8", "us-ascii", "unicode"))):
jpayne@68 760 declared_encoding = encoding
jpayne@68 761 if enc_lower == "unicode":
jpayne@68 762 # Retrieve the default encoding for the xml declaration
jpayne@68 763 import locale
jpayne@68 764 declared_encoding = locale.getpreferredencoding()
jpayne@68 765 write("<?xml version='1.0' encoding='%s'?>\n" % (
jpayne@68 766 declared_encoding,))
jpayne@68 767 if method == "text":
jpayne@68 768 _serialize_text(write, self._root)
jpayne@68 769 else:
jpayne@68 770 qnames, namespaces = _namespaces(self._root, default_namespace)
jpayne@68 771 serialize = _serialize[method]
jpayne@68 772 serialize(write, self._root, qnames, namespaces,
jpayne@68 773 short_empty_elements=short_empty_elements)
jpayne@68 774
jpayne@68 775 def write_c14n(self, file):
jpayne@68 776 # lxml.etree compatibility. use output method instead
jpayne@68 777 return self.write(file, method="c14n")
jpayne@68 778
jpayne@68 779 # --------------------------------------------------------------------
jpayne@68 780 # serialization support
jpayne@68 781
jpayne@68 782 @contextlib.contextmanager
jpayne@68 783 def _get_writer(file_or_filename, encoding):
jpayne@68 784 # returns text write method and release all resources after using
jpayne@68 785 try:
jpayne@68 786 write = file_or_filename.write
jpayne@68 787 except AttributeError:
jpayne@68 788 # file_or_filename is a file name
jpayne@68 789 if encoding == "unicode":
jpayne@68 790 file = open(file_or_filename, "w")
jpayne@68 791 else:
jpayne@68 792 file = open(file_or_filename, "w", encoding=encoding,
jpayne@68 793 errors="xmlcharrefreplace")
jpayne@68 794 with file:
jpayne@68 795 yield file.write
jpayne@68 796 else:
jpayne@68 797 # file_or_filename is a file-like object
jpayne@68 798 # encoding determines if it is a text or binary writer
jpayne@68 799 if encoding == "unicode":
jpayne@68 800 # use a text writer as is
jpayne@68 801 yield write
jpayne@68 802 else:
jpayne@68 803 # wrap a binary writer with TextIOWrapper
jpayne@68 804 with contextlib.ExitStack() as stack:
jpayne@68 805 if isinstance(file_or_filename, io.BufferedIOBase):
jpayne@68 806 file = file_or_filename
jpayne@68 807 elif isinstance(file_or_filename, io.RawIOBase):
jpayne@68 808 file = io.BufferedWriter(file_or_filename)
jpayne@68 809 # Keep the original file open when the BufferedWriter is
jpayne@68 810 # destroyed
jpayne@68 811 stack.callback(file.detach)
jpayne@68 812 else:
jpayne@68 813 # This is to handle passed objects that aren't in the
jpayne@68 814 # IOBase hierarchy, but just have a write method
jpayne@68 815 file = io.BufferedIOBase()
jpayne@68 816 file.writable = lambda: True
jpayne@68 817 file.write = write
jpayne@68 818 try:
jpayne@68 819 # TextIOWrapper uses this methods to determine
jpayne@68 820 # if BOM (for UTF-16, etc) should be added
jpayne@68 821 file.seekable = file_or_filename.seekable
jpayne@68 822 file.tell = file_or_filename.tell
jpayne@68 823 except AttributeError:
jpayne@68 824 pass
jpayne@68 825 file = io.TextIOWrapper(file,
jpayne@68 826 encoding=encoding,
jpayne@68 827 errors="xmlcharrefreplace",
jpayne@68 828 newline="\n")
jpayne@68 829 # Keep the original file open when the TextIOWrapper is
jpayne@68 830 # destroyed
jpayne@68 831 stack.callback(file.detach)
jpayne@68 832 yield file.write
jpayne@68 833
jpayne@68 834 def _namespaces(elem, default_namespace=None):
jpayne@68 835 # identify namespaces used in this tree
jpayne@68 836
jpayne@68 837 # maps qnames to *encoded* prefix:local names
jpayne@68 838 qnames = {None: None}
jpayne@68 839
jpayne@68 840 # maps uri:s to prefixes
jpayne@68 841 namespaces = {}
jpayne@68 842 if default_namespace:
jpayne@68 843 namespaces[default_namespace] = ""
jpayne@68 844
jpayne@68 845 def add_qname(qname):
jpayne@68 846 # calculate serialized qname representation
jpayne@68 847 try:
jpayne@68 848 if qname[:1] == "{":
jpayne@68 849 uri, tag = qname[1:].rsplit("}", 1)
jpayne@68 850 prefix = namespaces.get(uri)
jpayne@68 851 if prefix is None:
jpayne@68 852 prefix = _namespace_map.get(uri)
jpayne@68 853 if prefix is None:
jpayne@68 854 prefix = "ns%d" % len(namespaces)
jpayne@68 855 if prefix != "xml":
jpayne@68 856 namespaces[uri] = prefix
jpayne@68 857 if prefix:
jpayne@68 858 qnames[qname] = "%s:%s" % (prefix, tag)
jpayne@68 859 else:
jpayne@68 860 qnames[qname] = tag # default element
jpayne@68 861 else:
jpayne@68 862 if default_namespace:
jpayne@68 863 # FIXME: can this be handled in XML 1.0?
jpayne@68 864 raise ValueError(
jpayne@68 865 "cannot use non-qualified names with "
jpayne@68 866 "default_namespace option"
jpayne@68 867 )
jpayne@68 868 qnames[qname] = qname
jpayne@68 869 except TypeError:
jpayne@68 870 _raise_serialization_error(qname)
jpayne@68 871
jpayne@68 872 # populate qname and namespaces table
jpayne@68 873 for elem in elem.iter():
jpayne@68 874 tag = elem.tag
jpayne@68 875 if isinstance(tag, QName):
jpayne@68 876 if tag.text not in qnames:
jpayne@68 877 add_qname(tag.text)
jpayne@68 878 elif isinstance(tag, str):
jpayne@68 879 if tag not in qnames:
jpayne@68 880 add_qname(tag)
jpayne@68 881 elif tag is not None and tag is not Comment and tag is not PI:
jpayne@68 882 _raise_serialization_error(tag)
jpayne@68 883 for key, value in elem.items():
jpayne@68 884 if isinstance(key, QName):
jpayne@68 885 key = key.text
jpayne@68 886 if key not in qnames:
jpayne@68 887 add_qname(key)
jpayne@68 888 if isinstance(value, QName) and value.text not in qnames:
jpayne@68 889 add_qname(value.text)
jpayne@68 890 text = elem.text
jpayne@68 891 if isinstance(text, QName) and text.text not in qnames:
jpayne@68 892 add_qname(text.text)
jpayne@68 893 return qnames, namespaces
jpayne@68 894
jpayne@68 895 def _serialize_xml(write, elem, qnames, namespaces,
jpayne@68 896 short_empty_elements, **kwargs):
jpayne@68 897 tag = elem.tag
jpayne@68 898 text = elem.text
jpayne@68 899 if tag is Comment:
jpayne@68 900 write("<!--%s-->" % text)
jpayne@68 901 elif tag is ProcessingInstruction:
jpayne@68 902 write("<?%s?>" % text)
jpayne@68 903 else:
jpayne@68 904 tag = qnames[tag]
jpayne@68 905 if tag is None:
jpayne@68 906 if text:
jpayne@68 907 write(_escape_cdata(text))
jpayne@68 908 for e in elem:
jpayne@68 909 _serialize_xml(write, e, qnames, None,
jpayne@68 910 short_empty_elements=short_empty_elements)
jpayne@68 911 else:
jpayne@68 912 write("<" + tag)
jpayne@68 913 items = list(elem.items())
jpayne@68 914 if items or namespaces:
jpayne@68 915 if namespaces:
jpayne@68 916 for v, k in sorted(namespaces.items(),
jpayne@68 917 key=lambda x: x[1]): # sort on prefix
jpayne@68 918 if k:
jpayne@68 919 k = ":" + k
jpayne@68 920 write(" xmlns%s=\"%s\"" % (
jpayne@68 921 k,
jpayne@68 922 _escape_attrib(v)
jpayne@68 923 ))
jpayne@68 924 for k, v in items:
jpayne@68 925 if isinstance(k, QName):
jpayne@68 926 k = k.text
jpayne@68 927 if isinstance(v, QName):
jpayne@68 928 v = qnames[v.text]
jpayne@68 929 else:
jpayne@68 930 v = _escape_attrib(v)
jpayne@68 931 write(" %s=\"%s\"" % (qnames[k], v))
jpayne@68 932 if text or len(elem) or not short_empty_elements:
jpayne@68 933 write(">")
jpayne@68 934 if text:
jpayne@68 935 write(_escape_cdata(text))
jpayne@68 936 for e in elem:
jpayne@68 937 _serialize_xml(write, e, qnames, None,
jpayne@68 938 short_empty_elements=short_empty_elements)
jpayne@68 939 write("</" + tag + ">")
jpayne@68 940 else:
jpayne@68 941 write(" />")
jpayne@68 942 if elem.tail:
jpayne@68 943 write(_escape_cdata(elem.tail))
jpayne@68 944
jpayne@68 945 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
jpayne@68 946 "img", "input", "isindex", "link", "meta", "param")
jpayne@68 947
jpayne@68 948 try:
jpayne@68 949 HTML_EMPTY = set(HTML_EMPTY)
jpayne@68 950 except NameError:
jpayne@68 951 pass
jpayne@68 952
jpayne@68 953 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
jpayne@68 954 tag = elem.tag
jpayne@68 955 text = elem.text
jpayne@68 956 if tag is Comment:
jpayne@68 957 write("<!--%s-->" % _escape_cdata(text))
jpayne@68 958 elif tag is ProcessingInstruction:
jpayne@68 959 write("<?%s?>" % _escape_cdata(text))
jpayne@68 960 else:
jpayne@68 961 tag = qnames[tag]
jpayne@68 962 if tag is None:
jpayne@68 963 if text:
jpayne@68 964 write(_escape_cdata(text))
jpayne@68 965 for e in elem:
jpayne@68 966 _serialize_html(write, e, qnames, None)
jpayne@68 967 else:
jpayne@68 968 write("<" + tag)
jpayne@68 969 items = list(elem.items())
jpayne@68 970 if items or namespaces:
jpayne@68 971 if namespaces:
jpayne@68 972 for v, k in sorted(namespaces.items(),
jpayne@68 973 key=lambda x: x[1]): # sort on prefix
jpayne@68 974 if k:
jpayne@68 975 k = ":" + k
jpayne@68 976 write(" xmlns%s=\"%s\"" % (
jpayne@68 977 k,
jpayne@68 978 _escape_attrib(v)
jpayne@68 979 ))
jpayne@68 980 for k, v in items:
jpayne@68 981 if isinstance(k, QName):
jpayne@68 982 k = k.text
jpayne@68 983 if isinstance(v, QName):
jpayne@68 984 v = qnames[v.text]
jpayne@68 985 else:
jpayne@68 986 v = _escape_attrib_html(v)
jpayne@68 987 # FIXME: handle boolean attributes
jpayne@68 988 write(" %s=\"%s\"" % (qnames[k], v))
jpayne@68 989 write(">")
jpayne@68 990 ltag = tag.lower()
jpayne@68 991 if text:
jpayne@68 992 if ltag == "script" or ltag == "style":
jpayne@68 993 write(text)
jpayne@68 994 else:
jpayne@68 995 write(_escape_cdata(text))
jpayne@68 996 for e in elem:
jpayne@68 997 _serialize_html(write, e, qnames, None)
jpayne@68 998 if ltag not in HTML_EMPTY:
jpayne@68 999 write("</" + tag + ">")
jpayne@68 1000 if elem.tail:
jpayne@68 1001 write(_escape_cdata(elem.tail))
jpayne@68 1002
jpayne@68 1003 def _serialize_text(write, elem):
jpayne@68 1004 for part in elem.itertext():
jpayne@68 1005 write(part)
jpayne@68 1006 if elem.tail:
jpayne@68 1007 write(elem.tail)
jpayne@68 1008
jpayne@68 1009 _serialize = {
jpayne@68 1010 "xml": _serialize_xml,
jpayne@68 1011 "html": _serialize_html,
jpayne@68 1012 "text": _serialize_text,
jpayne@68 1013 # this optional method is imported at the end of the module
jpayne@68 1014 # "c14n": _serialize_c14n,
jpayne@68 1015 }
jpayne@68 1016
jpayne@68 1017
jpayne@68 1018 def register_namespace(prefix, uri):
jpayne@68 1019 """Register a namespace prefix.
jpayne@68 1020
jpayne@68 1021 The registry is global, and any existing mapping for either the
jpayne@68 1022 given prefix or the namespace URI will be removed.
jpayne@68 1023
jpayne@68 1024 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
jpayne@68 1025 attributes in this namespace will be serialized with prefix if possible.
jpayne@68 1026
jpayne@68 1027 ValueError is raised if prefix is reserved or is invalid.
jpayne@68 1028
jpayne@68 1029 """
jpayne@68 1030 if re.match(r"ns\d+$", prefix):
jpayne@68 1031 raise ValueError("Prefix format reserved for internal use")
jpayne@68 1032 for k, v in list(_namespace_map.items()):
jpayne@68 1033 if k == uri or v == prefix:
jpayne@68 1034 del _namespace_map[k]
jpayne@68 1035 _namespace_map[uri] = prefix
jpayne@68 1036
jpayne@68 1037 _namespace_map = {
jpayne@68 1038 # "well-known" namespace prefixes
jpayne@68 1039 "http://www.w3.org/XML/1998/namespace": "xml",
jpayne@68 1040 "http://www.w3.org/1999/xhtml": "html",
jpayne@68 1041 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
jpayne@68 1042 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
jpayne@68 1043 # xml schema
jpayne@68 1044 "http://www.w3.org/2001/XMLSchema": "xs",
jpayne@68 1045 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
jpayne@68 1046 # dublin core
jpayne@68 1047 "http://purl.org/dc/elements/1.1/": "dc",
jpayne@68 1048 }
jpayne@68 1049 # For tests and troubleshooting
jpayne@68 1050 register_namespace._namespace_map = _namespace_map
jpayne@68 1051
jpayne@68 1052 def _raise_serialization_error(text):
jpayne@68 1053 raise TypeError(
jpayne@68 1054 "cannot serialize %r (type %s)" % (text, type(text).__name__)
jpayne@68 1055 )
jpayne@68 1056
jpayne@68 1057 def _escape_cdata(text):
jpayne@68 1058 # escape character data
jpayne@68 1059 try:
jpayne@68 1060 # it's worth avoiding do-nothing calls for strings that are
jpayne@68 1061 # shorter than 500 characters, or so. assume that's, by far,
jpayne@68 1062 # the most common case in most applications.
jpayne@68 1063 if "&" in text:
jpayne@68 1064 text = text.replace("&", "&amp;")
jpayne@68 1065 if "<" in text:
jpayne@68 1066 text = text.replace("<", "&lt;")
jpayne@68 1067 if ">" in text:
jpayne@68 1068 text = text.replace(">", "&gt;")
jpayne@68 1069 return text
jpayne@68 1070 except (TypeError, AttributeError):
jpayne@68 1071 _raise_serialization_error(text)
jpayne@68 1072
jpayne@68 1073 def _escape_attrib(text):
jpayne@68 1074 # escape attribute value
jpayne@68 1075 try:
jpayne@68 1076 if "&" in text:
jpayne@68 1077 text = text.replace("&", "&amp;")
jpayne@68 1078 if "<" in text:
jpayne@68 1079 text = text.replace("<", "&lt;")
jpayne@68 1080 if ">" in text:
jpayne@68 1081 text = text.replace(">", "&gt;")
jpayne@68 1082 if "\"" in text:
jpayne@68 1083 text = text.replace("\"", "&quot;")
jpayne@68 1084 # The following business with carriage returns is to satisfy
jpayne@68 1085 # Section 2.11 of the XML specification, stating that
jpayne@68 1086 # CR or CR LN should be replaced with just LN
jpayne@68 1087 # http://www.w3.org/TR/REC-xml/#sec-line-ends
jpayne@68 1088 if "\r\n" in text:
jpayne@68 1089 text = text.replace("\r\n", "\n")
jpayne@68 1090 if "\r" in text:
jpayne@68 1091 text = text.replace("\r", "\n")
jpayne@68 1092 #The following four lines are issue 17582
jpayne@68 1093 if "\n" in text:
jpayne@68 1094 text = text.replace("\n", "&#10;")
jpayne@68 1095 if "\t" in text:
jpayne@68 1096 text = text.replace("\t", "&#09;")
jpayne@68 1097 return text
jpayne@68 1098 except (TypeError, AttributeError):
jpayne@68 1099 _raise_serialization_error(text)
jpayne@68 1100
jpayne@68 1101 def _escape_attrib_html(text):
jpayne@68 1102 # escape attribute value
jpayne@68 1103 try:
jpayne@68 1104 if "&" in text:
jpayne@68 1105 text = text.replace("&", "&amp;")
jpayne@68 1106 if ">" in text:
jpayne@68 1107 text = text.replace(">", "&gt;")
jpayne@68 1108 if "\"" in text:
jpayne@68 1109 text = text.replace("\"", "&quot;")
jpayne@68 1110 return text
jpayne@68 1111 except (TypeError, AttributeError):
jpayne@68 1112 _raise_serialization_error(text)
jpayne@68 1113
jpayne@68 1114 # --------------------------------------------------------------------
jpayne@68 1115
jpayne@68 1116 def tostring(element, encoding=None, method=None, *,
jpayne@68 1117 xml_declaration=None, default_namespace=None,
jpayne@68 1118 short_empty_elements=True):
jpayne@68 1119 """Generate string representation of XML element.
jpayne@68 1120
jpayne@68 1121 All subelements are included. If encoding is "unicode", a string
jpayne@68 1122 is returned. Otherwise a bytestring is returned.
jpayne@68 1123
jpayne@68 1124 *element* is an Element instance, *encoding* is an optional output
jpayne@68 1125 encoding defaulting to US-ASCII, *method* is an optional output which can
jpayne@68 1126 be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
jpayne@68 1127 sets the default XML namespace (for "xmlns").
jpayne@68 1128
jpayne@68 1129 Returns an (optionally) encoded string containing the XML data.
jpayne@68 1130
jpayne@68 1131 """
jpayne@68 1132 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
jpayne@68 1133 ElementTree(element).write(stream, encoding,
jpayne@68 1134 xml_declaration=xml_declaration,
jpayne@68 1135 default_namespace=default_namespace,
jpayne@68 1136 method=method,
jpayne@68 1137 short_empty_elements=short_empty_elements)
jpayne@68 1138 return stream.getvalue()
jpayne@68 1139
jpayne@68 1140 class _ListDataStream(io.BufferedIOBase):
jpayne@68 1141 """An auxiliary stream accumulating into a list reference."""
jpayne@68 1142 def __init__(self, lst):
jpayne@68 1143 self.lst = lst
jpayne@68 1144
jpayne@68 1145 def writable(self):
jpayne@68 1146 return True
jpayne@68 1147
jpayne@68 1148 def seekable(self):
jpayne@68 1149 return True
jpayne@68 1150
jpayne@68 1151 def write(self, b):
jpayne@68 1152 self.lst.append(b)
jpayne@68 1153
jpayne@68 1154 def tell(self):
jpayne@68 1155 return len(self.lst)
jpayne@68 1156
jpayne@68 1157 def tostringlist(element, encoding=None, method=None, *,
jpayne@68 1158 xml_declaration=None, default_namespace=None,
jpayne@68 1159 short_empty_elements=True):
jpayne@68 1160 lst = []
jpayne@68 1161 stream = _ListDataStream(lst)
jpayne@68 1162 ElementTree(element).write(stream, encoding,
jpayne@68 1163 xml_declaration=xml_declaration,
jpayne@68 1164 default_namespace=default_namespace,
jpayne@68 1165 method=method,
jpayne@68 1166 short_empty_elements=short_empty_elements)
jpayne@68 1167 return lst
jpayne@68 1168
jpayne@68 1169
jpayne@68 1170 def dump(elem):
jpayne@68 1171 """Write element tree or element structure to sys.stdout.
jpayne@68 1172
jpayne@68 1173 This function should be used for debugging only.
jpayne@68 1174
jpayne@68 1175 *elem* is either an ElementTree, or a single Element. The exact output
jpayne@68 1176 format is implementation dependent. In this version, it's written as an
jpayne@68 1177 ordinary XML file.
jpayne@68 1178
jpayne@68 1179 """
jpayne@68 1180 # debugging
jpayne@68 1181 if not isinstance(elem, ElementTree):
jpayne@68 1182 elem = ElementTree(elem)
jpayne@68 1183 elem.write(sys.stdout, encoding="unicode")
jpayne@68 1184 tail = elem.getroot().tail
jpayne@68 1185 if not tail or tail[-1] != "\n":
jpayne@68 1186 sys.stdout.write("\n")
jpayne@68 1187
jpayne@68 1188 # --------------------------------------------------------------------
jpayne@68 1189 # parsing
jpayne@68 1190
jpayne@68 1191
jpayne@68 1192 def parse(source, parser=None):
jpayne@68 1193 """Parse XML document into element tree.
jpayne@68 1194
jpayne@68 1195 *source* is a filename or file object containing XML data,
jpayne@68 1196 *parser* is an optional parser instance defaulting to XMLParser.
jpayne@68 1197
jpayne@68 1198 Return an ElementTree instance.
jpayne@68 1199
jpayne@68 1200 """
jpayne@68 1201 tree = ElementTree()
jpayne@68 1202 tree.parse(source, parser)
jpayne@68 1203 return tree
jpayne@68 1204
jpayne@68 1205
jpayne@68 1206 def iterparse(source, events=None, parser=None):
jpayne@68 1207 """Incrementally parse XML document into ElementTree.
jpayne@68 1208
jpayne@68 1209 This class also reports what's going on to the user based on the
jpayne@68 1210 *events* it is initialized with. The supported events are the strings
jpayne@68 1211 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
jpayne@68 1212 detailed namespace information). If *events* is omitted, only
jpayne@68 1213 "end" events are reported.
jpayne@68 1214
jpayne@68 1215 *source* is a filename or file object containing XML data, *events* is
jpayne@68 1216 a list of events to report back, *parser* is an optional parser instance.
jpayne@68 1217
jpayne@68 1218 Returns an iterator providing (event, elem) pairs.
jpayne@68 1219
jpayne@68 1220 """
jpayne@68 1221 # Use the internal, undocumented _parser argument for now; When the
jpayne@68 1222 # parser argument of iterparse is removed, this can be killed.
jpayne@68 1223 pullparser = XMLPullParser(events=events, _parser=parser)
jpayne@68 1224 def iterator():
jpayne@68 1225 try:
jpayne@68 1226 while True:
jpayne@68 1227 yield from pullparser.read_events()
jpayne@68 1228 # load event buffer
jpayne@68 1229 data = source.read(16 * 1024)
jpayne@68 1230 if not data:
jpayne@68 1231 break
jpayne@68 1232 pullparser.feed(data)
jpayne@68 1233 root = pullparser._close_and_return_root()
jpayne@68 1234 yield from pullparser.read_events()
jpayne@68 1235 it.root = root
jpayne@68 1236 finally:
jpayne@68 1237 if close_source:
jpayne@68 1238 source.close()
jpayne@68 1239
jpayne@68 1240 class IterParseIterator(collections.abc.Iterator):
jpayne@68 1241 __next__ = iterator().__next__
jpayne@68 1242 it = IterParseIterator()
jpayne@68 1243 it.root = None
jpayne@68 1244 del iterator, IterParseIterator
jpayne@68 1245
jpayne@68 1246 close_source = False
jpayne@68 1247 if not hasattr(source, "read"):
jpayne@68 1248 source = open(source, "rb")
jpayne@68 1249 close_source = True
jpayne@68 1250
jpayne@68 1251 return it
jpayne@68 1252
jpayne@68 1253
jpayne@68 1254 class XMLPullParser:
jpayne@68 1255
jpayne@68 1256 def __init__(self, events=None, *, _parser=None):
jpayne@68 1257 # The _parser argument is for internal use only and must not be relied
jpayne@68 1258 # upon in user code. It will be removed in a future release.
jpayne@68 1259 # See http://bugs.python.org/issue17741 for more details.
jpayne@68 1260
jpayne@68 1261 self._events_queue = collections.deque()
jpayne@68 1262 self._parser = _parser or XMLParser(target=TreeBuilder())
jpayne@68 1263 # wire up the parser for event reporting
jpayne@68 1264 if events is None:
jpayne@68 1265 events = ("end",)
jpayne@68 1266 self._parser._setevents(self._events_queue, events)
jpayne@68 1267
jpayne@68 1268 def feed(self, data):
jpayne@68 1269 """Feed encoded data to parser."""
jpayne@68 1270 if self._parser is None:
jpayne@68 1271 raise ValueError("feed() called after end of stream")
jpayne@68 1272 if data:
jpayne@68 1273 try:
jpayne@68 1274 self._parser.feed(data)
jpayne@68 1275 except SyntaxError as exc:
jpayne@68 1276 self._events_queue.append(exc)
jpayne@68 1277
jpayne@68 1278 def _close_and_return_root(self):
jpayne@68 1279 # iterparse needs this to set its root attribute properly :(
jpayne@68 1280 root = self._parser.close()
jpayne@68 1281 self._parser = None
jpayne@68 1282 return root
jpayne@68 1283
jpayne@68 1284 def close(self):
jpayne@68 1285 """Finish feeding data to parser.
jpayne@68 1286
jpayne@68 1287 Unlike XMLParser, does not return the root element. Use
jpayne@68 1288 read_events() to consume elements from XMLPullParser.
jpayne@68 1289 """
jpayne@68 1290 self._close_and_return_root()
jpayne@68 1291
jpayne@68 1292 def read_events(self):
jpayne@68 1293 """Return an iterator over currently available (event, elem) pairs.
jpayne@68 1294
jpayne@68 1295 Events are consumed from the internal event queue as they are
jpayne@68 1296 retrieved from the iterator.
jpayne@68 1297 """
jpayne@68 1298 events = self._events_queue
jpayne@68 1299 while events:
jpayne@68 1300 event = events.popleft()
jpayne@68 1301 if isinstance(event, Exception):
jpayne@68 1302 raise event
jpayne@68 1303 else:
jpayne@68 1304 yield event
jpayne@68 1305
jpayne@68 1306
jpayne@68 1307 def XML(text, parser=None):
jpayne@68 1308 """Parse XML document from string constant.
jpayne@68 1309
jpayne@68 1310 This function can be used to embed "XML Literals" in Python code.
jpayne@68 1311
jpayne@68 1312 *text* is a string containing XML data, *parser* is an
jpayne@68 1313 optional parser instance, defaulting to the standard XMLParser.
jpayne@68 1314
jpayne@68 1315 Returns an Element instance.
jpayne@68 1316
jpayne@68 1317 """
jpayne@68 1318 if not parser:
jpayne@68 1319 parser = XMLParser(target=TreeBuilder())
jpayne@68 1320 parser.feed(text)
jpayne@68 1321 return parser.close()
jpayne@68 1322
jpayne@68 1323
jpayne@68 1324 def XMLID(text, parser=None):
jpayne@68 1325 """Parse XML document from string constant for its IDs.
jpayne@68 1326
jpayne@68 1327 *text* is a string containing XML data, *parser* is an
jpayne@68 1328 optional parser instance, defaulting to the standard XMLParser.
jpayne@68 1329
jpayne@68 1330 Returns an (Element, dict) tuple, in which the
jpayne@68 1331 dict maps element id:s to elements.
jpayne@68 1332
jpayne@68 1333 """
jpayne@68 1334 if not parser:
jpayne@68 1335 parser = XMLParser(target=TreeBuilder())
jpayne@68 1336 parser.feed(text)
jpayne@68 1337 tree = parser.close()
jpayne@68 1338 ids = {}
jpayne@68 1339 for elem in tree.iter():
jpayne@68 1340 id = elem.get("id")
jpayne@68 1341 if id:
jpayne@68 1342 ids[id] = elem
jpayne@68 1343 return tree, ids
jpayne@68 1344
jpayne@68 1345 # Parse XML document from string constant. Alias for XML().
jpayne@68 1346 fromstring = XML
jpayne@68 1347
jpayne@68 1348 def fromstringlist(sequence, parser=None):
jpayne@68 1349 """Parse XML document from sequence of string fragments.
jpayne@68 1350
jpayne@68 1351 *sequence* is a list of other sequence, *parser* is an optional parser
jpayne@68 1352 instance, defaulting to the standard XMLParser.
jpayne@68 1353
jpayne@68 1354 Returns an Element instance.
jpayne@68 1355
jpayne@68 1356 """
jpayne@68 1357 if not parser:
jpayne@68 1358 parser = XMLParser(target=TreeBuilder())
jpayne@68 1359 for text in sequence:
jpayne@68 1360 parser.feed(text)
jpayne@68 1361 return parser.close()
jpayne@68 1362
jpayne@68 1363 # --------------------------------------------------------------------
jpayne@68 1364
jpayne@68 1365
jpayne@68 1366 class TreeBuilder:
jpayne@68 1367 """Generic element structure builder.
jpayne@68 1368
jpayne@68 1369 This builder converts a sequence of start, data, and end method
jpayne@68 1370 calls to a well-formed element structure.
jpayne@68 1371
jpayne@68 1372 You can use this class to build an element structure using a custom XML
jpayne@68 1373 parser, or a parser for some other XML-like format.
jpayne@68 1374
jpayne@68 1375 *element_factory* is an optional element factory which is called
jpayne@68 1376 to create new Element instances, as necessary.
jpayne@68 1377
jpayne@68 1378 *comment_factory* is a factory to create comments to be used instead of
jpayne@68 1379 the standard factory. If *insert_comments* is false (the default),
jpayne@68 1380 comments will not be inserted into the tree.
jpayne@68 1381
jpayne@68 1382 *pi_factory* is a factory to create processing instructions to be used
jpayne@68 1383 instead of the standard factory. If *insert_pis* is false (the default),
jpayne@68 1384 processing instructions will not be inserted into the tree.
jpayne@68 1385 """
jpayne@68 1386 def __init__(self, element_factory=None, *,
jpayne@68 1387 comment_factory=None, pi_factory=None,
jpayne@68 1388 insert_comments=False, insert_pis=False):
jpayne@68 1389 self._data = [] # data collector
jpayne@68 1390 self._elem = [] # element stack
jpayne@68 1391 self._last = None # last element
jpayne@68 1392 self._root = None # root element
jpayne@68 1393 self._tail = None # true if we're after an end tag
jpayne@68 1394 if comment_factory is None:
jpayne@68 1395 comment_factory = Comment
jpayne@68 1396 self._comment_factory = comment_factory
jpayne@68 1397 self.insert_comments = insert_comments
jpayne@68 1398 if pi_factory is None:
jpayne@68 1399 pi_factory = ProcessingInstruction
jpayne@68 1400 self._pi_factory = pi_factory
jpayne@68 1401 self.insert_pis = insert_pis
jpayne@68 1402 if element_factory is None:
jpayne@68 1403 element_factory = Element
jpayne@68 1404 self._factory = element_factory
jpayne@68 1405
jpayne@68 1406 def close(self):
jpayne@68 1407 """Flush builder buffers and return toplevel document Element."""
jpayne@68 1408 assert len(self._elem) == 0, "missing end tags"
jpayne@68 1409 assert self._root is not None, "missing toplevel element"
jpayne@68 1410 return self._root
jpayne@68 1411
jpayne@68 1412 def _flush(self):
jpayne@68 1413 if self._data:
jpayne@68 1414 if self._last is not None:
jpayne@68 1415 text = "".join(self._data)
jpayne@68 1416 if self._tail:
jpayne@68 1417 assert self._last.tail is None, "internal error (tail)"
jpayne@68 1418 self._last.tail = text
jpayne@68 1419 else:
jpayne@68 1420 assert self._last.text is None, "internal error (text)"
jpayne@68 1421 self._last.text = text
jpayne@68 1422 self._data = []
jpayne@68 1423
jpayne@68 1424 def data(self, data):
jpayne@68 1425 """Add text to current element."""
jpayne@68 1426 self._data.append(data)
jpayne@68 1427
jpayne@68 1428 def start(self, tag, attrs):
jpayne@68 1429 """Open new element and return it.
jpayne@68 1430
jpayne@68 1431 *tag* is the element name, *attrs* is a dict containing element
jpayne@68 1432 attributes.
jpayne@68 1433
jpayne@68 1434 """
jpayne@68 1435 self._flush()
jpayne@68 1436 self._last = elem = self._factory(tag, attrs)
jpayne@68 1437 if self._elem:
jpayne@68 1438 self._elem[-1].append(elem)
jpayne@68 1439 elif self._root is None:
jpayne@68 1440 self._root = elem
jpayne@68 1441 self._elem.append(elem)
jpayne@68 1442 self._tail = 0
jpayne@68 1443 return elem
jpayne@68 1444
jpayne@68 1445 def end(self, tag):
jpayne@68 1446 """Close and return current Element.
jpayne@68 1447
jpayne@68 1448 *tag* is the element name.
jpayne@68 1449
jpayne@68 1450 """
jpayne@68 1451 self._flush()
jpayne@68 1452 self._last = self._elem.pop()
jpayne@68 1453 assert self._last.tag == tag,\
jpayne@68 1454 "end tag mismatch (expected %s, got %s)" % (
jpayne@68 1455 self._last.tag, tag)
jpayne@68 1456 self._tail = 1
jpayne@68 1457 return self._last
jpayne@68 1458
jpayne@68 1459 def comment(self, text):
jpayne@68 1460 """Create a comment using the comment_factory.
jpayne@68 1461
jpayne@68 1462 *text* is the text of the comment.
jpayne@68 1463 """
jpayne@68 1464 return self._handle_single(
jpayne@68 1465 self._comment_factory, self.insert_comments, text)
jpayne@68 1466
jpayne@68 1467 def pi(self, target, text=None):
jpayne@68 1468 """Create a processing instruction using the pi_factory.
jpayne@68 1469
jpayne@68 1470 *target* is the target name of the processing instruction.
jpayne@68 1471 *text* is the data of the processing instruction, or ''.
jpayne@68 1472 """
jpayne@68 1473 return self._handle_single(
jpayne@68 1474 self._pi_factory, self.insert_pis, target, text)
jpayne@68 1475
jpayne@68 1476 def _handle_single(self, factory, insert, *args):
jpayne@68 1477 elem = factory(*args)
jpayne@68 1478 if insert:
jpayne@68 1479 self._flush()
jpayne@68 1480 self._last = elem
jpayne@68 1481 if self._elem:
jpayne@68 1482 self._elem[-1].append(elem)
jpayne@68 1483 self._tail = 1
jpayne@68 1484 return elem
jpayne@68 1485
jpayne@68 1486
jpayne@68 1487 # also see ElementTree and TreeBuilder
jpayne@68 1488 class XMLParser:
jpayne@68 1489 """Element structure builder for XML source data based on the expat parser.
jpayne@68 1490
jpayne@68 1491 *target* is an optional target object which defaults to an instance of the
jpayne@68 1492 standard TreeBuilder class, *encoding* is an optional encoding string
jpayne@68 1493 which if given, overrides the encoding specified in the XML file:
jpayne@68 1494 http://www.iana.org/assignments/character-sets
jpayne@68 1495
jpayne@68 1496 """
jpayne@68 1497
jpayne@68 1498 def __init__(self, *, target=None, encoding=None):
jpayne@68 1499 try:
jpayne@68 1500 from xml.parsers import expat
jpayne@68 1501 except ImportError:
jpayne@68 1502 try:
jpayne@68 1503 import pyexpat as expat
jpayne@68 1504 except ImportError:
jpayne@68 1505 raise ImportError(
jpayne@68 1506 "No module named expat; use SimpleXMLTreeBuilder instead"
jpayne@68 1507 )
jpayne@68 1508 parser = expat.ParserCreate(encoding, "}")
jpayne@68 1509 if target is None:
jpayne@68 1510 target = TreeBuilder()
jpayne@68 1511 # underscored names are provided for compatibility only
jpayne@68 1512 self.parser = self._parser = parser
jpayne@68 1513 self.target = self._target = target
jpayne@68 1514 self._error = expat.error
jpayne@68 1515 self._names = {} # name memo cache
jpayne@68 1516 # main callbacks
jpayne@68 1517 parser.DefaultHandlerExpand = self._default
jpayne@68 1518 if hasattr(target, 'start'):
jpayne@68 1519 parser.StartElementHandler = self._start
jpayne@68 1520 if hasattr(target, 'end'):
jpayne@68 1521 parser.EndElementHandler = self._end
jpayne@68 1522 if hasattr(target, 'start_ns'):
jpayne@68 1523 parser.StartNamespaceDeclHandler = self._start_ns
jpayne@68 1524 if hasattr(target, 'end_ns'):
jpayne@68 1525 parser.EndNamespaceDeclHandler = self._end_ns
jpayne@68 1526 if hasattr(target, 'data'):
jpayne@68 1527 parser.CharacterDataHandler = target.data
jpayne@68 1528 # miscellaneous callbacks
jpayne@68 1529 if hasattr(target, 'comment'):
jpayne@68 1530 parser.CommentHandler = target.comment
jpayne@68 1531 if hasattr(target, 'pi'):
jpayne@68 1532 parser.ProcessingInstructionHandler = target.pi
jpayne@68 1533 # Configure pyexpat: buffering, new-style attribute handling.
jpayne@68 1534 parser.buffer_text = 1
jpayne@68 1535 parser.ordered_attributes = 1
jpayne@68 1536 parser.specified_attributes = 1
jpayne@68 1537 self._doctype = None
jpayne@68 1538 self.entity = {}
jpayne@68 1539 try:
jpayne@68 1540 self.version = "Expat %d.%d.%d" % expat.version_info
jpayne@68 1541 except AttributeError:
jpayne@68 1542 pass # unknown
jpayne@68 1543
jpayne@68 1544 def _setevents(self, events_queue, events_to_report):
jpayne@68 1545 # Internal API for XMLPullParser
jpayne@68 1546 # events_to_report: a list of events to report during parsing (same as
jpayne@68 1547 # the *events* of XMLPullParser's constructor.
jpayne@68 1548 # events_queue: a list of actual parsing events that will be populated
jpayne@68 1549 # by the underlying parser.
jpayne@68 1550 #
jpayne@68 1551 parser = self._parser
jpayne@68 1552 append = events_queue.append
jpayne@68 1553 for event_name in events_to_report:
jpayne@68 1554 if event_name == "start":
jpayne@68 1555 parser.ordered_attributes = 1
jpayne@68 1556 parser.specified_attributes = 1
jpayne@68 1557 def handler(tag, attrib_in, event=event_name, append=append,
jpayne@68 1558 start=self._start):
jpayne@68 1559 append((event, start(tag, attrib_in)))
jpayne@68 1560 parser.StartElementHandler = handler
jpayne@68 1561 elif event_name == "end":
jpayne@68 1562 def handler(tag, event=event_name, append=append,
jpayne@68 1563 end=self._end):
jpayne@68 1564 append((event, end(tag)))
jpayne@68 1565 parser.EndElementHandler = handler
jpayne@68 1566 elif event_name == "start-ns":
jpayne@68 1567 # TreeBuilder does not implement .start_ns()
jpayne@68 1568 if hasattr(self.target, "start_ns"):
jpayne@68 1569 def handler(prefix, uri, event=event_name, append=append,
jpayne@68 1570 start_ns=self._start_ns):
jpayne@68 1571 append((event, start_ns(prefix, uri)))
jpayne@68 1572 else:
jpayne@68 1573 def handler(prefix, uri, event=event_name, append=append):
jpayne@68 1574 append((event, (prefix or '', uri or '')))
jpayne@68 1575 parser.StartNamespaceDeclHandler = handler
jpayne@68 1576 elif event_name == "end-ns":
jpayne@68 1577 # TreeBuilder does not implement .end_ns()
jpayne@68 1578 if hasattr(self.target, "end_ns"):
jpayne@68 1579 def handler(prefix, event=event_name, append=append,
jpayne@68 1580 end_ns=self._end_ns):
jpayne@68 1581 append((event, end_ns(prefix)))
jpayne@68 1582 else:
jpayne@68 1583 def handler(prefix, event=event_name, append=append):
jpayne@68 1584 append((event, None))
jpayne@68 1585 parser.EndNamespaceDeclHandler = handler
jpayne@68 1586 elif event_name == 'comment':
jpayne@68 1587 def handler(text, event=event_name, append=append, self=self):
jpayne@68 1588 append((event, self.target.comment(text)))
jpayne@68 1589 parser.CommentHandler = handler
jpayne@68 1590 elif event_name == 'pi':
jpayne@68 1591 def handler(pi_target, data, event=event_name, append=append,
jpayne@68 1592 self=self):
jpayne@68 1593 append((event, self.target.pi(pi_target, data)))
jpayne@68 1594 parser.ProcessingInstructionHandler = handler
jpayne@68 1595 else:
jpayne@68 1596 raise ValueError("unknown event %r" % event_name)
jpayne@68 1597
jpayne@68 1598 def _raiseerror(self, value):
jpayne@68 1599 err = ParseError(value)
jpayne@68 1600 err.code = value.code
jpayne@68 1601 err.position = value.lineno, value.offset
jpayne@68 1602 raise err
jpayne@68 1603
jpayne@68 1604 def _fixname(self, key):
jpayne@68 1605 # expand qname, and convert name string to ascii, if possible
jpayne@68 1606 try:
jpayne@68 1607 name = self._names[key]
jpayne@68 1608 except KeyError:
jpayne@68 1609 name = key
jpayne@68 1610 if "}" in name:
jpayne@68 1611 name = "{" + name
jpayne@68 1612 self._names[key] = name
jpayne@68 1613 return name
jpayne@68 1614
jpayne@68 1615 def _start_ns(self, prefix, uri):
jpayne@68 1616 return self.target.start_ns(prefix or '', uri or '')
jpayne@68 1617
jpayne@68 1618 def _end_ns(self, prefix):
jpayne@68 1619 return self.target.end_ns(prefix or '')
jpayne@68 1620
jpayne@68 1621 def _start(self, tag, attr_list):
jpayne@68 1622 # Handler for expat's StartElementHandler. Since ordered_attributes
jpayne@68 1623 # is set, the attributes are reported as a list of alternating
jpayne@68 1624 # attribute name,value.
jpayne@68 1625 fixname = self._fixname
jpayne@68 1626 tag = fixname(tag)
jpayne@68 1627 attrib = {}
jpayne@68 1628 if attr_list:
jpayne@68 1629 for i in range(0, len(attr_list), 2):
jpayne@68 1630 attrib[fixname(attr_list[i])] = attr_list[i+1]
jpayne@68 1631 return self.target.start(tag, attrib)
jpayne@68 1632
jpayne@68 1633 def _end(self, tag):
jpayne@68 1634 return self.target.end(self._fixname(tag))
jpayne@68 1635
jpayne@68 1636 def _default(self, text):
jpayne@68 1637 prefix = text[:1]
jpayne@68 1638 if prefix == "&":
jpayne@68 1639 # deal with undefined entities
jpayne@68 1640 try:
jpayne@68 1641 data_handler = self.target.data
jpayne@68 1642 except AttributeError:
jpayne@68 1643 return
jpayne@68 1644 try:
jpayne@68 1645 data_handler(self.entity[text[1:-1]])
jpayne@68 1646 except KeyError:
jpayne@68 1647 from xml.parsers import expat
jpayne@68 1648 err = expat.error(
jpayne@68 1649 "undefined entity %s: line %d, column %d" %
jpayne@68 1650 (text, self.parser.ErrorLineNumber,
jpayne@68 1651 self.parser.ErrorColumnNumber)
jpayne@68 1652 )
jpayne@68 1653 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
jpayne@68 1654 err.lineno = self.parser.ErrorLineNumber
jpayne@68 1655 err.offset = self.parser.ErrorColumnNumber
jpayne@68 1656 raise err
jpayne@68 1657 elif prefix == "<" and text[:9] == "<!DOCTYPE":
jpayne@68 1658 self._doctype = [] # inside a doctype declaration
jpayne@68 1659 elif self._doctype is not None:
jpayne@68 1660 # parse doctype contents
jpayne@68 1661 if prefix == ">":
jpayne@68 1662 self._doctype = None
jpayne@68 1663 return
jpayne@68 1664 text = text.strip()
jpayne@68 1665 if not text:
jpayne@68 1666 return
jpayne@68 1667 self._doctype.append(text)
jpayne@68 1668 n = len(self._doctype)
jpayne@68 1669 if n > 2:
jpayne@68 1670 type = self._doctype[1]
jpayne@68 1671 if type == "PUBLIC" and n == 4:
jpayne@68 1672 name, type, pubid, system = self._doctype
jpayne@68 1673 if pubid:
jpayne@68 1674 pubid = pubid[1:-1]
jpayne@68 1675 elif type == "SYSTEM" and n == 3:
jpayne@68 1676 name, type, system = self._doctype
jpayne@68 1677 pubid = None
jpayne@68 1678 else:
jpayne@68 1679 return
jpayne@68 1680 if hasattr(self.target, "doctype"):
jpayne@68 1681 self.target.doctype(name, pubid, system[1:-1])
jpayne@68 1682 elif hasattr(self, "doctype"):
jpayne@68 1683 warnings.warn(
jpayne@68 1684 "The doctype() method of XMLParser is ignored. "
jpayne@68 1685 "Define doctype() method on the TreeBuilder target.",
jpayne@68 1686 RuntimeWarning)
jpayne@68 1687
jpayne@68 1688 self._doctype = None
jpayne@68 1689
jpayne@68 1690 def feed(self, data):
jpayne@68 1691 """Feed encoded data to parser."""
jpayne@68 1692 try:
jpayne@68 1693 self.parser.Parse(data, 0)
jpayne@68 1694 except self._error as v:
jpayne@68 1695 self._raiseerror(v)
jpayne@68 1696
jpayne@68 1697 def close(self):
jpayne@68 1698 """Finish feeding data to parser and return element structure."""
jpayne@68 1699 try:
jpayne@68 1700 self.parser.Parse("", 1) # end of data
jpayne@68 1701 except self._error as v:
jpayne@68 1702 self._raiseerror(v)
jpayne@68 1703 try:
jpayne@68 1704 close_handler = self.target.close
jpayne@68 1705 except AttributeError:
jpayne@68 1706 pass
jpayne@68 1707 else:
jpayne@68 1708 return close_handler()
jpayne@68 1709 finally:
jpayne@68 1710 # get rid of circular references
jpayne@68 1711 del self.parser, self._parser
jpayne@68 1712 del self.target, self._target
jpayne@68 1713
jpayne@68 1714
jpayne@68 1715 # --------------------------------------------------------------------
jpayne@68 1716 # C14N 2.0
jpayne@68 1717
jpayne@68 1718 def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
jpayne@68 1719 """Convert XML to its C14N 2.0 serialised form.
jpayne@68 1720
jpayne@68 1721 If *out* is provided, it must be a file or file-like object that receives
jpayne@68 1722 the serialised canonical XML output (text, not bytes) through its ``.write()``
jpayne@68 1723 method. To write to a file, open it in text mode with encoding "utf-8".
jpayne@68 1724 If *out* is not provided, this function returns the output as text string.
jpayne@68 1725
jpayne@68 1726 Either *xml_data* (an XML string) or *from_file* (a file path or
jpayne@68 1727 file-like object) must be provided as input.
jpayne@68 1728
jpayne@68 1729 The configuration options are the same as for the ``C14NWriterTarget``.
jpayne@68 1730 """
jpayne@68 1731 if xml_data is None and from_file is None:
jpayne@68 1732 raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
jpayne@68 1733 sio = None
jpayne@68 1734 if out is None:
jpayne@68 1735 sio = out = io.StringIO()
jpayne@68 1736
jpayne@68 1737 parser = XMLParser(target=C14NWriterTarget(out.write, **options))
jpayne@68 1738
jpayne@68 1739 if xml_data is not None:
jpayne@68 1740 parser.feed(xml_data)
jpayne@68 1741 parser.close()
jpayne@68 1742 elif from_file is not None:
jpayne@68 1743 parse(from_file, parser=parser)
jpayne@68 1744
jpayne@68 1745 return sio.getvalue() if sio is not None else None
jpayne@68 1746
jpayne@68 1747
jpayne@68 1748 _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
jpayne@68 1749
jpayne@68 1750
jpayne@68 1751 class C14NWriterTarget:
jpayne@68 1752 """
jpayne@68 1753 Canonicalization writer target for the XMLParser.
jpayne@68 1754
jpayne@68 1755 Serialises parse events to XML C14N 2.0.
jpayne@68 1756
jpayne@68 1757 The *write* function is used for writing out the resulting data stream
jpayne@68 1758 as text (not bytes). To write to a file, open it in text mode with encoding
jpayne@68 1759 "utf-8" and pass its ``.write`` method.
jpayne@68 1760
jpayne@68 1761 Configuration options:
jpayne@68 1762
jpayne@68 1763 - *with_comments*: set to true to include comments
jpayne@68 1764 - *strip_text*: set to true to strip whitespace before and after text content
jpayne@68 1765 - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
jpayne@68 1766 - *qname_aware_tags*: a set of qname aware tag names in which prefixes
jpayne@68 1767 should be replaced in text content
jpayne@68 1768 - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
jpayne@68 1769 should be replaced in text content
jpayne@68 1770 - *exclude_attrs*: a set of attribute names that should not be serialised
jpayne@68 1771 - *exclude_tags*: a set of tag names that should not be serialised
jpayne@68 1772 """
jpayne@68 1773 def __init__(self, write, *,
jpayne@68 1774 with_comments=False, strip_text=False, rewrite_prefixes=False,
jpayne@68 1775 qname_aware_tags=None, qname_aware_attrs=None,
jpayne@68 1776 exclude_attrs=None, exclude_tags=None):
jpayne@68 1777 self._write = write
jpayne@68 1778 self._data = []
jpayne@68 1779 self._with_comments = with_comments
jpayne@68 1780 self._strip_text = strip_text
jpayne@68 1781 self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
jpayne@68 1782 self._exclude_tags = set(exclude_tags) if exclude_tags else None
jpayne@68 1783
jpayne@68 1784 self._rewrite_prefixes = rewrite_prefixes
jpayne@68 1785 if qname_aware_tags:
jpayne@68 1786 self._qname_aware_tags = set(qname_aware_tags)
jpayne@68 1787 else:
jpayne@68 1788 self._qname_aware_tags = None
jpayne@68 1789 if qname_aware_attrs:
jpayne@68 1790 self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
jpayne@68 1791 else:
jpayne@68 1792 self._find_qname_aware_attrs = None
jpayne@68 1793
jpayne@68 1794 # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
jpayne@68 1795 self._declared_ns_stack = [[
jpayne@68 1796 ("http://www.w3.org/XML/1998/namespace", "xml"),
jpayne@68 1797 ]]
jpayne@68 1798 # Stack with user declared namespace prefixes as (uri, prefix) pairs.
jpayne@68 1799 self._ns_stack = []
jpayne@68 1800 if not rewrite_prefixes:
jpayne@68 1801 self._ns_stack.append(list(_namespace_map.items()))
jpayne@68 1802 self._ns_stack.append([])
jpayne@68 1803 self._prefix_map = {}
jpayne@68 1804 self._preserve_space = [False]
jpayne@68 1805 self._pending_start = None
jpayne@68 1806 self._root_seen = False
jpayne@68 1807 self._root_done = False
jpayne@68 1808 self._ignored_depth = 0
jpayne@68 1809
jpayne@68 1810 def _iter_namespaces(self, ns_stack, _reversed=reversed):
jpayne@68 1811 for namespaces in _reversed(ns_stack):
jpayne@68 1812 if namespaces: # almost no element declares new namespaces
jpayne@68 1813 yield from namespaces
jpayne@68 1814
jpayne@68 1815 def _resolve_prefix_name(self, prefixed_name):
jpayne@68 1816 prefix, name = prefixed_name.split(':', 1)
jpayne@68 1817 for uri, p in self._iter_namespaces(self._ns_stack):
jpayne@68 1818 if p == prefix:
jpayne@68 1819 return f'{{{uri}}}{name}'
jpayne@68 1820 raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
jpayne@68 1821
jpayne@68 1822 def _qname(self, qname, uri=None):
jpayne@68 1823 if uri is None:
jpayne@68 1824 uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
jpayne@68 1825 else:
jpayne@68 1826 tag = qname
jpayne@68 1827
jpayne@68 1828 prefixes_seen = set()
jpayne@68 1829 for u, prefix in self._iter_namespaces(self._declared_ns_stack):
jpayne@68 1830 if u == uri and prefix not in prefixes_seen:
jpayne@68 1831 return f'{prefix}:{tag}' if prefix else tag, tag, uri
jpayne@68 1832 prefixes_seen.add(prefix)
jpayne@68 1833
jpayne@68 1834 # Not declared yet => add new declaration.
jpayne@68 1835 if self._rewrite_prefixes:
jpayne@68 1836 if uri in self._prefix_map:
jpayne@68 1837 prefix = self._prefix_map[uri]
jpayne@68 1838 else:
jpayne@68 1839 prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
jpayne@68 1840 self._declared_ns_stack[-1].append((uri, prefix))
jpayne@68 1841 return f'{prefix}:{tag}', tag, uri
jpayne@68 1842
jpayne@68 1843 if not uri and '' not in prefixes_seen:
jpayne@68 1844 # No default namespace declared => no prefix needed.
jpayne@68 1845 return tag, tag, uri
jpayne@68 1846
jpayne@68 1847 for u, prefix in self._iter_namespaces(self._ns_stack):
jpayne@68 1848 if u == uri:
jpayne@68 1849 self._declared_ns_stack[-1].append((uri, prefix))
jpayne@68 1850 return f'{prefix}:{tag}' if prefix else tag, tag, uri
jpayne@68 1851
jpayne@68 1852 raise ValueError(f'Namespace "{uri}" is not declared in scope')
jpayne@68 1853
jpayne@68 1854 def data(self, data):
jpayne@68 1855 if not self._ignored_depth:
jpayne@68 1856 self._data.append(data)
jpayne@68 1857
jpayne@68 1858 def _flush(self, _join_text=''.join):
jpayne@68 1859 data = _join_text(self._data)
jpayne@68 1860 del self._data[:]
jpayne@68 1861 if self._strip_text and not self._preserve_space[-1]:
jpayne@68 1862 data = data.strip()
jpayne@68 1863 if self._pending_start is not None:
jpayne@68 1864 args, self._pending_start = self._pending_start, None
jpayne@68 1865 qname_text = data if data and _looks_like_prefix_name(data) else None
jpayne@68 1866 self._start(*args, qname_text)
jpayne@68 1867 if qname_text is not None:
jpayne@68 1868 return
jpayne@68 1869 if data and self._root_seen:
jpayne@68 1870 self._write(_escape_cdata_c14n(data))
jpayne@68 1871
jpayne@68 1872 def start_ns(self, prefix, uri):
jpayne@68 1873 if self._ignored_depth:
jpayne@68 1874 return
jpayne@68 1875 # we may have to resolve qnames in text content
jpayne@68 1876 if self._data:
jpayne@68 1877 self._flush()
jpayne@68 1878 self._ns_stack[-1].append((uri, prefix))
jpayne@68 1879
jpayne@68 1880 def start(self, tag, attrs):
jpayne@68 1881 if self._exclude_tags is not None and (
jpayne@68 1882 self._ignored_depth or tag in self._exclude_tags):
jpayne@68 1883 self._ignored_depth += 1
jpayne@68 1884 return
jpayne@68 1885 if self._data:
jpayne@68 1886 self._flush()
jpayne@68 1887
jpayne@68 1888 new_namespaces = []
jpayne@68 1889 self._declared_ns_stack.append(new_namespaces)
jpayne@68 1890
jpayne@68 1891 if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
jpayne@68 1892 # Need to parse text first to see if it requires a prefix declaration.
jpayne@68 1893 self._pending_start = (tag, attrs, new_namespaces)
jpayne@68 1894 return
jpayne@68 1895 self._start(tag, attrs, new_namespaces)
jpayne@68 1896
jpayne@68 1897 def _start(self, tag, attrs, new_namespaces, qname_text=None):
jpayne@68 1898 if self._exclude_attrs is not None and attrs:
jpayne@68 1899 attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
jpayne@68 1900
jpayne@68 1901 qnames = {tag, *attrs}
jpayne@68 1902 resolved_names = {}
jpayne@68 1903
jpayne@68 1904 # Resolve prefixes in attribute and tag text.
jpayne@68 1905 if qname_text is not None:
jpayne@68 1906 qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
jpayne@68 1907 qnames.add(qname)
jpayne@68 1908 if self._find_qname_aware_attrs is not None and attrs:
jpayne@68 1909 qattrs = self._find_qname_aware_attrs(attrs)
jpayne@68 1910 if qattrs:
jpayne@68 1911 for attr_name in qattrs:
jpayne@68 1912 value = attrs[attr_name]
jpayne@68 1913 if _looks_like_prefix_name(value):
jpayne@68 1914 qname = resolved_names[value] = self._resolve_prefix_name(value)
jpayne@68 1915 qnames.add(qname)
jpayne@68 1916 else:
jpayne@68 1917 qattrs = None
jpayne@68 1918 else:
jpayne@68 1919 qattrs = None
jpayne@68 1920
jpayne@68 1921 # Assign prefixes in lexicographical order of used URIs.
jpayne@68 1922 parse_qname = self._qname
jpayne@68 1923 parsed_qnames = {n: parse_qname(n) for n in sorted(
jpayne@68 1924 qnames, key=lambda n: n.split('}', 1))}
jpayne@68 1925
jpayne@68 1926 # Write namespace declarations in prefix order ...
jpayne@68 1927 if new_namespaces:
jpayne@68 1928 attr_list = [
jpayne@68 1929 ('xmlns:' + prefix if prefix else 'xmlns', uri)
jpayne@68 1930 for uri, prefix in new_namespaces
jpayne@68 1931 ]
jpayne@68 1932 attr_list.sort()
jpayne@68 1933 else:
jpayne@68 1934 # almost always empty
jpayne@68 1935 attr_list = []
jpayne@68 1936
jpayne@68 1937 # ... followed by attributes in URI+name order
jpayne@68 1938 if attrs:
jpayne@68 1939 for k, v in sorted(attrs.items()):
jpayne@68 1940 if qattrs is not None and k in qattrs and v in resolved_names:
jpayne@68 1941 v = parsed_qnames[resolved_names[v]][0]
jpayne@68 1942 attr_qname, attr_name, uri = parsed_qnames[k]
jpayne@68 1943 # No prefix for attributes in default ('') namespace.
jpayne@68 1944 attr_list.append((attr_qname if uri else attr_name, v))
jpayne@68 1945
jpayne@68 1946 # Honour xml:space attributes.
jpayne@68 1947 space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
jpayne@68 1948 self._preserve_space.append(
jpayne@68 1949 space_behaviour == 'preserve' if space_behaviour
jpayne@68 1950 else self._preserve_space[-1])
jpayne@68 1951
jpayne@68 1952 # Write the tag.
jpayne@68 1953 write = self._write
jpayne@68 1954 write('<' + parsed_qnames[tag][0])
jpayne@68 1955 if attr_list:
jpayne@68 1956 write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
jpayne@68 1957 write('>')
jpayne@68 1958
jpayne@68 1959 # Write the resolved qname text content.
jpayne@68 1960 if qname_text is not None:
jpayne@68 1961 write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
jpayne@68 1962
jpayne@68 1963 self._root_seen = True
jpayne@68 1964 self._ns_stack.append([])
jpayne@68 1965
jpayne@68 1966 def end(self, tag):
jpayne@68 1967 if self._ignored_depth:
jpayne@68 1968 self._ignored_depth -= 1
jpayne@68 1969 return
jpayne@68 1970 if self._data:
jpayne@68 1971 self._flush()
jpayne@68 1972 self._write(f'</{self._qname(tag)[0]}>')
jpayne@68 1973 self._preserve_space.pop()
jpayne@68 1974 self._root_done = len(self._preserve_space) == 1
jpayne@68 1975 self._declared_ns_stack.pop()
jpayne@68 1976 self._ns_stack.pop()
jpayne@68 1977
jpayne@68 1978 def comment(self, text):
jpayne@68 1979 if not self._with_comments:
jpayne@68 1980 return
jpayne@68 1981 if self._ignored_depth:
jpayne@68 1982 return
jpayne@68 1983 if self._root_done:
jpayne@68 1984 self._write('\n')
jpayne@68 1985 elif self._root_seen and self._data:
jpayne@68 1986 self._flush()
jpayne@68 1987 self._write(f'<!--{_escape_cdata_c14n(text)}-->')
jpayne@68 1988 if not self._root_seen:
jpayne@68 1989 self._write('\n')
jpayne@68 1990
jpayne@68 1991 def pi(self, target, data):
jpayne@68 1992 if self._ignored_depth:
jpayne@68 1993 return
jpayne@68 1994 if self._root_done:
jpayne@68 1995 self._write('\n')
jpayne@68 1996 elif self._root_seen and self._data:
jpayne@68 1997 self._flush()
jpayne@68 1998 self._write(
jpayne@68 1999 f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
jpayne@68 2000 if not self._root_seen:
jpayne@68 2001 self._write('\n')
jpayne@68 2002
jpayne@68 2003
jpayne@68 2004 def _escape_cdata_c14n(text):
jpayne@68 2005 # escape character data
jpayne@68 2006 try:
jpayne@68 2007 # it's worth avoiding do-nothing calls for strings that are
jpayne@68 2008 # shorter than 500 character, or so. assume that's, by far,
jpayne@68 2009 # the most common case in most applications.
jpayne@68 2010 if '&' in text:
jpayne@68 2011 text = text.replace('&', '&amp;')
jpayne@68 2012 if '<' in text:
jpayne@68 2013 text = text.replace('<', '&lt;')
jpayne@68 2014 if '>' in text:
jpayne@68 2015 text = text.replace('>', '&gt;')
jpayne@68 2016 if '\r' in text:
jpayne@68 2017 text = text.replace('\r', '&#xD;')
jpayne@68 2018 return text
jpayne@68 2019 except (TypeError, AttributeError):
jpayne@68 2020 _raise_serialization_error(text)
jpayne@68 2021
jpayne@68 2022
jpayne@68 2023 def _escape_attrib_c14n(text):
jpayne@68 2024 # escape attribute value
jpayne@68 2025 try:
jpayne@68 2026 if '&' in text:
jpayne@68 2027 text = text.replace('&', '&amp;')
jpayne@68 2028 if '<' in text:
jpayne@68 2029 text = text.replace('<', '&lt;')
jpayne@68 2030 if '"' in text:
jpayne@68 2031 text = text.replace('"', '&quot;')
jpayne@68 2032 if '\t' in text:
jpayne@68 2033 text = text.replace('\t', '&#x9;')
jpayne@68 2034 if '\n' in text:
jpayne@68 2035 text = text.replace('\n', '&#xA;')
jpayne@68 2036 if '\r' in text:
jpayne@68 2037 text = text.replace('\r', '&#xD;')
jpayne@68 2038 return text
jpayne@68 2039 except (TypeError, AttributeError):
jpayne@68 2040 _raise_serialization_error(text)
jpayne@68 2041
jpayne@68 2042
jpayne@68 2043 # --------------------------------------------------------------------
jpayne@68 2044
jpayne@68 2045 # Import the C accelerators
jpayne@68 2046 try:
jpayne@68 2047 # Element is going to be shadowed by the C implementation. We need to keep
jpayne@68 2048 # the Python version of it accessible for some "creative" by external code
jpayne@68 2049 # (see tests)
jpayne@68 2050 _Element_Py = Element
jpayne@68 2051
jpayne@68 2052 # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
jpayne@68 2053 from _elementtree import *
jpayne@68 2054 from _elementtree import _set_factories
jpayne@68 2055 except ImportError:
jpayne@68 2056 pass
jpayne@68 2057 else:
jpayne@68 2058 _set_factories(Comment, ProcessingInstruction)