comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/xml/dom/minidom.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 """Simple implementation of the Level 1 DOM.
2
3 Namespaces and other minor Level 2 features are also supported.
4
5 parse("foo.xml")
6
7 parseString("<foo><bar/></foo>")
8
9 Todo:
10 =====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14 interface
15 * SAX 2 namespaces
16 """
17
18 import io
19 import xml.dom
20
21 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
22 from xml.dom.minicompat import *
23 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
24
25 # This is used by the ID-cache invalidation checks; the list isn't
26 # actually complete, since the nodes being checked will never be the
27 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is
28 # the node being added or removed, not the node being modified.)
29 #
30 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
31 xml.dom.Node.ENTITY_REFERENCE_NODE)
32
33
34 class Node(xml.dom.Node):
35 namespaceURI = None # this is non-null only for elements and attributes
36 parentNode = None
37 ownerDocument = None
38 nextSibling = None
39 previousSibling = None
40
41 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
42
43 def __bool__(self):
44 return True
45
46 def toxml(self, encoding=None):
47 return self.toprettyxml("", "", encoding)
48
49 def toprettyxml(self, indent="\t", newl="\n", encoding=None):
50 if encoding is None:
51 writer = io.StringIO()
52 else:
53 writer = io.TextIOWrapper(io.BytesIO(),
54 encoding=encoding,
55 errors="xmlcharrefreplace",
56 newline='\n')
57 if self.nodeType == Node.DOCUMENT_NODE:
58 # Can pass encoding only to document, to put it into XML header
59 self.writexml(writer, "", indent, newl, encoding)
60 else:
61 self.writexml(writer, "", indent, newl)
62 if encoding is None:
63 return writer.getvalue()
64 else:
65 return writer.detach().getvalue()
66
67 def hasChildNodes(self):
68 return bool(self.childNodes)
69
70 def _get_childNodes(self):
71 return self.childNodes
72
73 def _get_firstChild(self):
74 if self.childNodes:
75 return self.childNodes[0]
76
77 def _get_lastChild(self):
78 if self.childNodes:
79 return self.childNodes[-1]
80
81 def insertBefore(self, newChild, refChild):
82 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
83 for c in tuple(newChild.childNodes):
84 self.insertBefore(c, refChild)
85 ### The DOM does not clearly specify what to return in this case
86 return newChild
87 if newChild.nodeType not in self._child_node_types:
88 raise xml.dom.HierarchyRequestErr(
89 "%s cannot be child of %s" % (repr(newChild), repr(self)))
90 if newChild.parentNode is not None:
91 newChild.parentNode.removeChild(newChild)
92 if refChild is None:
93 self.appendChild(newChild)
94 else:
95 try:
96 index = self.childNodes.index(refChild)
97 except ValueError:
98 raise xml.dom.NotFoundErr()
99 if newChild.nodeType in _nodeTypes_with_children:
100 _clear_id_cache(self)
101 self.childNodes.insert(index, newChild)
102 newChild.nextSibling = refChild
103 refChild.previousSibling = newChild
104 if index:
105 node = self.childNodes[index-1]
106 node.nextSibling = newChild
107 newChild.previousSibling = node
108 else:
109 newChild.previousSibling = None
110 newChild.parentNode = self
111 return newChild
112
113 def appendChild(self, node):
114 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
115 for c in tuple(node.childNodes):
116 self.appendChild(c)
117 ### The DOM does not clearly specify what to return in this case
118 return node
119 if node.nodeType not in self._child_node_types:
120 raise xml.dom.HierarchyRequestErr(
121 "%s cannot be child of %s" % (repr(node), repr(self)))
122 elif node.nodeType in _nodeTypes_with_children:
123 _clear_id_cache(self)
124 if node.parentNode is not None:
125 node.parentNode.removeChild(node)
126 _append_child(self, node)
127 node.nextSibling = None
128 return node
129
130 def replaceChild(self, newChild, oldChild):
131 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
132 refChild = oldChild.nextSibling
133 self.removeChild(oldChild)
134 return self.insertBefore(newChild, refChild)
135 if newChild.nodeType not in self._child_node_types:
136 raise xml.dom.HierarchyRequestErr(
137 "%s cannot be child of %s" % (repr(newChild), repr(self)))
138 if newChild is oldChild:
139 return
140 if newChild.parentNode is not None:
141 newChild.parentNode.removeChild(newChild)
142 try:
143 index = self.childNodes.index(oldChild)
144 except ValueError:
145 raise xml.dom.NotFoundErr()
146 self.childNodes[index] = newChild
147 newChild.parentNode = self
148 oldChild.parentNode = None
149 if (newChild.nodeType in _nodeTypes_with_children
150 or oldChild.nodeType in _nodeTypes_with_children):
151 _clear_id_cache(self)
152 newChild.nextSibling = oldChild.nextSibling
153 newChild.previousSibling = oldChild.previousSibling
154 oldChild.nextSibling = None
155 oldChild.previousSibling = None
156 if newChild.previousSibling:
157 newChild.previousSibling.nextSibling = newChild
158 if newChild.nextSibling:
159 newChild.nextSibling.previousSibling = newChild
160 return oldChild
161
162 def removeChild(self, oldChild):
163 try:
164 self.childNodes.remove(oldChild)
165 except ValueError:
166 raise xml.dom.NotFoundErr()
167 if oldChild.nextSibling is not None:
168 oldChild.nextSibling.previousSibling = oldChild.previousSibling
169 if oldChild.previousSibling is not None:
170 oldChild.previousSibling.nextSibling = oldChild.nextSibling
171 oldChild.nextSibling = oldChild.previousSibling = None
172 if oldChild.nodeType in _nodeTypes_with_children:
173 _clear_id_cache(self)
174
175 oldChild.parentNode = None
176 return oldChild
177
178 def normalize(self):
179 L = []
180 for child in self.childNodes:
181 if child.nodeType == Node.TEXT_NODE:
182 if not child.data:
183 # empty text node; discard
184 if L:
185 L[-1].nextSibling = child.nextSibling
186 if child.nextSibling:
187 child.nextSibling.previousSibling = child.previousSibling
188 child.unlink()
189 elif L and L[-1].nodeType == child.nodeType:
190 # collapse text node
191 node = L[-1]
192 node.data = node.data + child.data
193 node.nextSibling = child.nextSibling
194 if child.nextSibling:
195 child.nextSibling.previousSibling = node
196 child.unlink()
197 else:
198 L.append(child)
199 else:
200 L.append(child)
201 if child.nodeType == Node.ELEMENT_NODE:
202 child.normalize()
203 self.childNodes[:] = L
204
205 def cloneNode(self, deep):
206 return _clone_node(self, deep, self.ownerDocument or self)
207
208 def isSupported(self, feature, version):
209 return self.ownerDocument.implementation.hasFeature(feature, version)
210
211 def _get_localName(self):
212 # Overridden in Element and Attr where localName can be Non-Null
213 return None
214
215 # Node interfaces from Level 3 (WD 9 April 2002)
216
217 def isSameNode(self, other):
218 return self is other
219
220 def getInterface(self, feature):
221 if self.isSupported(feature, None):
222 return self
223 else:
224 return None
225
226 # The "user data" functions use a dictionary that is only present
227 # if some user data has been set, so be careful not to assume it
228 # exists.
229
230 def getUserData(self, key):
231 try:
232 return self._user_data[key][0]
233 except (AttributeError, KeyError):
234 return None
235
236 def setUserData(self, key, data, handler):
237 old = None
238 try:
239 d = self._user_data
240 except AttributeError:
241 d = {}
242 self._user_data = d
243 if key in d:
244 old = d[key][0]
245 if data is None:
246 # ignore handlers passed for None
247 handler = None
248 if old is not None:
249 del d[key]
250 else:
251 d[key] = (data, handler)
252 return old
253
254 def _call_user_data_handler(self, operation, src, dst):
255 if hasattr(self, "_user_data"):
256 for key, (data, handler) in list(self._user_data.items()):
257 if handler is not None:
258 handler.handle(operation, key, data, src, dst)
259
260 # minidom-specific API:
261
262 def unlink(self):
263 self.parentNode = self.ownerDocument = None
264 if self.childNodes:
265 for child in self.childNodes:
266 child.unlink()
267 self.childNodes = NodeList()
268 self.previousSibling = None
269 self.nextSibling = None
270
271 # A Node is its own context manager, to ensure that an unlink() call occurs.
272 # This is similar to how a file object works.
273 def __enter__(self):
274 return self
275
276 def __exit__(self, et, ev, tb):
277 self.unlink()
278
279 defproperty(Node, "firstChild", doc="First child node, or None.")
280 defproperty(Node, "lastChild", doc="Last child node, or None.")
281 defproperty(Node, "localName", doc="Namespace-local name of this node.")
282
283
284 def _append_child(self, node):
285 # fast path with less checks; usable by DOM builders if careful
286 childNodes = self.childNodes
287 if childNodes:
288 last = childNodes[-1]
289 node.previousSibling = last
290 last.nextSibling = node
291 childNodes.append(node)
292 node.parentNode = self
293
294 def _in_document(node):
295 # return True iff node is part of a document tree
296 while node is not None:
297 if node.nodeType == Node.DOCUMENT_NODE:
298 return True
299 node = node.parentNode
300 return False
301
302 def _write_data(writer, data):
303 "Writes datachars to writer."
304 if data:
305 data = data.replace("&", "&amp;").replace("<", "&lt;"). \
306 replace("\"", "&quot;").replace(">", "&gt;")
307 writer.write(data)
308
309 def _get_elements_by_tagName_helper(parent, name, rc):
310 for node in parent.childNodes:
311 if node.nodeType == Node.ELEMENT_NODE and \
312 (name == "*" or node.tagName == name):
313 rc.append(node)
314 _get_elements_by_tagName_helper(node, name, rc)
315 return rc
316
317 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
318 for node in parent.childNodes:
319 if node.nodeType == Node.ELEMENT_NODE:
320 if ((localName == "*" or node.localName == localName) and
321 (nsURI == "*" or node.namespaceURI == nsURI)):
322 rc.append(node)
323 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
324 return rc
325
326 class DocumentFragment(Node):
327 nodeType = Node.DOCUMENT_FRAGMENT_NODE
328 nodeName = "#document-fragment"
329 nodeValue = None
330 attributes = None
331 parentNode = None
332 _child_node_types = (Node.ELEMENT_NODE,
333 Node.TEXT_NODE,
334 Node.CDATA_SECTION_NODE,
335 Node.ENTITY_REFERENCE_NODE,
336 Node.PROCESSING_INSTRUCTION_NODE,
337 Node.COMMENT_NODE,
338 Node.NOTATION_NODE)
339
340 def __init__(self):
341 self.childNodes = NodeList()
342
343
344 class Attr(Node):
345 __slots__=('_name', '_value', 'namespaceURI',
346 '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
347 nodeType = Node.ATTRIBUTE_NODE
348 attributes = None
349 specified = False
350 _is_id = False
351
352 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
353
354 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
355 prefix=None):
356 self.ownerElement = None
357 self._name = qName
358 self.namespaceURI = namespaceURI
359 self._prefix = prefix
360 self.childNodes = NodeList()
361
362 # Add the single child node that represents the value of the attr
363 self.childNodes.append(Text())
364
365 # nodeValue and value are set elsewhere
366
367 def _get_localName(self):
368 try:
369 return self._localName
370 except AttributeError:
371 return self.nodeName.split(":", 1)[-1]
372
373 def _get_specified(self):
374 return self.specified
375
376 def _get_name(self):
377 return self._name
378
379 def _set_name(self, value):
380 self._name = value
381 if self.ownerElement is not None:
382 _clear_id_cache(self.ownerElement)
383
384 nodeName = name = property(_get_name, _set_name)
385
386 def _get_value(self):
387 return self._value
388
389 def _set_value(self, value):
390 self._value = value
391 self.childNodes[0].data = value
392 if self.ownerElement is not None:
393 _clear_id_cache(self.ownerElement)
394 self.childNodes[0].data = value
395
396 nodeValue = value = property(_get_value, _set_value)
397
398 def _get_prefix(self):
399 return self._prefix
400
401 def _set_prefix(self, prefix):
402 nsuri = self.namespaceURI
403 if prefix == "xmlns":
404 if nsuri and nsuri != XMLNS_NAMESPACE:
405 raise xml.dom.NamespaceErr(
406 "illegal use of 'xmlns' prefix for the wrong namespace")
407 self._prefix = prefix
408 if prefix is None:
409 newName = self.localName
410 else:
411 newName = "%s:%s" % (prefix, self.localName)
412 if self.ownerElement:
413 _clear_id_cache(self.ownerElement)
414 self.name = newName
415
416 prefix = property(_get_prefix, _set_prefix)
417
418 def unlink(self):
419 # This implementation does not call the base implementation
420 # since most of that is not needed, and the expense of the
421 # method call is not warranted. We duplicate the removal of
422 # children, but that's all we needed from the base class.
423 elem = self.ownerElement
424 if elem is not None:
425 del elem._attrs[self.nodeName]
426 del elem._attrsNS[(self.namespaceURI, self.localName)]
427 if self._is_id:
428 self._is_id = False
429 elem._magic_id_nodes -= 1
430 self.ownerDocument._magic_id_count -= 1
431 for child in self.childNodes:
432 child.unlink()
433 del self.childNodes[:]
434
435 def _get_isId(self):
436 if self._is_id:
437 return True
438 doc = self.ownerDocument
439 elem = self.ownerElement
440 if doc is None or elem is None:
441 return False
442
443 info = doc._get_elem_info(elem)
444 if info is None:
445 return False
446 if self.namespaceURI:
447 return info.isIdNS(self.namespaceURI, self.localName)
448 else:
449 return info.isId(self.nodeName)
450
451 def _get_schemaType(self):
452 doc = self.ownerDocument
453 elem = self.ownerElement
454 if doc is None or elem is None:
455 return _no_type
456
457 info = doc._get_elem_info(elem)
458 if info is None:
459 return _no_type
460 if self.namespaceURI:
461 return info.getAttributeTypeNS(self.namespaceURI, self.localName)
462 else:
463 return info.getAttributeType(self.nodeName)
464
465 defproperty(Attr, "isId", doc="True if this attribute is an ID.")
466 defproperty(Attr, "localName", doc="Namespace-local name of this attribute.")
467 defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
468
469
470 class NamedNodeMap(object):
471 """The attribute list is a transient interface to the underlying
472 dictionaries. Mutations here will change the underlying element's
473 dictionary.
474
475 Ordering is imposed artificially and does not reflect the order of
476 attributes as found in an input document.
477 """
478
479 __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
480
481 def __init__(self, attrs, attrsNS, ownerElement):
482 self._attrs = attrs
483 self._attrsNS = attrsNS
484 self._ownerElement = ownerElement
485
486 def _get_length(self):
487 return len(self._attrs)
488
489 def item(self, index):
490 try:
491 return self[list(self._attrs.keys())[index]]
492 except IndexError:
493 return None
494
495 def items(self):
496 L = []
497 for node in self._attrs.values():
498 L.append((node.nodeName, node.value))
499 return L
500
501 def itemsNS(self):
502 L = []
503 for node in self._attrs.values():
504 L.append(((node.namespaceURI, node.localName), node.value))
505 return L
506
507 def __contains__(self, key):
508 if isinstance(key, str):
509 return key in self._attrs
510 else:
511 return key in self._attrsNS
512
513 def keys(self):
514 return self._attrs.keys()
515
516 def keysNS(self):
517 return self._attrsNS.keys()
518
519 def values(self):
520 return self._attrs.values()
521
522 def get(self, name, value=None):
523 return self._attrs.get(name, value)
524
525 __len__ = _get_length
526
527 def _cmp(self, other):
528 if self._attrs is getattr(other, "_attrs", None):
529 return 0
530 else:
531 return (id(self) > id(other)) - (id(self) < id(other))
532
533 def __eq__(self, other):
534 return self._cmp(other) == 0
535
536 def __ge__(self, other):
537 return self._cmp(other) >= 0
538
539 def __gt__(self, other):
540 return self._cmp(other) > 0
541
542 def __le__(self, other):
543 return self._cmp(other) <= 0
544
545 def __lt__(self, other):
546 return self._cmp(other) < 0
547
548 def __getitem__(self, attname_or_tuple):
549 if isinstance(attname_or_tuple, tuple):
550 return self._attrsNS[attname_or_tuple]
551 else:
552 return self._attrs[attname_or_tuple]
553
554 # same as set
555 def __setitem__(self, attname, value):
556 if isinstance(value, str):
557 try:
558 node = self._attrs[attname]
559 except KeyError:
560 node = Attr(attname)
561 node.ownerDocument = self._ownerElement.ownerDocument
562 self.setNamedItem(node)
563 node.value = value
564 else:
565 if not isinstance(value, Attr):
566 raise TypeError("value must be a string or Attr object")
567 node = value
568 self.setNamedItem(node)
569
570 def getNamedItem(self, name):
571 try:
572 return self._attrs[name]
573 except KeyError:
574 return None
575
576 def getNamedItemNS(self, namespaceURI, localName):
577 try:
578 return self._attrsNS[(namespaceURI, localName)]
579 except KeyError:
580 return None
581
582 def removeNamedItem(self, name):
583 n = self.getNamedItem(name)
584 if n is not None:
585 _clear_id_cache(self._ownerElement)
586 del self._attrs[n.nodeName]
587 del self._attrsNS[(n.namespaceURI, n.localName)]
588 if hasattr(n, 'ownerElement'):
589 n.ownerElement = None
590 return n
591 else:
592 raise xml.dom.NotFoundErr()
593
594 def removeNamedItemNS(self, namespaceURI, localName):
595 n = self.getNamedItemNS(namespaceURI, localName)
596 if n is not None:
597 _clear_id_cache(self._ownerElement)
598 del self._attrsNS[(n.namespaceURI, n.localName)]
599 del self._attrs[n.nodeName]
600 if hasattr(n, 'ownerElement'):
601 n.ownerElement = None
602 return n
603 else:
604 raise xml.dom.NotFoundErr()
605
606 def setNamedItem(self, node):
607 if not isinstance(node, Attr):
608 raise xml.dom.HierarchyRequestErr(
609 "%s cannot be child of %s" % (repr(node), repr(self)))
610 old = self._attrs.get(node.name)
611 if old:
612 old.unlink()
613 self._attrs[node.name] = node
614 self._attrsNS[(node.namespaceURI, node.localName)] = node
615 node.ownerElement = self._ownerElement
616 _clear_id_cache(node.ownerElement)
617 return old
618
619 def setNamedItemNS(self, node):
620 return self.setNamedItem(node)
621
622 def __delitem__(self, attname_or_tuple):
623 node = self[attname_or_tuple]
624 _clear_id_cache(node.ownerElement)
625 node.unlink()
626
627 def __getstate__(self):
628 return self._attrs, self._attrsNS, self._ownerElement
629
630 def __setstate__(self, state):
631 self._attrs, self._attrsNS, self._ownerElement = state
632
633 defproperty(NamedNodeMap, "length",
634 doc="Number of nodes in the NamedNodeMap.")
635
636 AttributeList = NamedNodeMap
637
638
639 class TypeInfo(object):
640 __slots__ = 'namespace', 'name'
641
642 def __init__(self, namespace, name):
643 self.namespace = namespace
644 self.name = name
645
646 def __repr__(self):
647 if self.namespace:
648 return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
649 self.namespace)
650 else:
651 return "<%s %r>" % (self.__class__.__name__, self.name)
652
653 def _get_name(self):
654 return self.name
655
656 def _get_namespace(self):
657 return self.namespace
658
659 _no_type = TypeInfo(None, None)
660
661 class Element(Node):
662 __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
663 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
664 'nextSibling', 'previousSibling')
665 nodeType = Node.ELEMENT_NODE
666 nodeValue = None
667 schemaType = _no_type
668
669 _magic_id_nodes = 0
670
671 _child_node_types = (Node.ELEMENT_NODE,
672 Node.PROCESSING_INSTRUCTION_NODE,
673 Node.COMMENT_NODE,
674 Node.TEXT_NODE,
675 Node.CDATA_SECTION_NODE,
676 Node.ENTITY_REFERENCE_NODE)
677
678 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
679 localName=None):
680 self.parentNode = None
681 self.tagName = self.nodeName = tagName
682 self.prefix = prefix
683 self.namespaceURI = namespaceURI
684 self.childNodes = NodeList()
685 self.nextSibling = self.previousSibling = None
686
687 # Attribute dictionaries are lazily created
688 # attributes are double-indexed:
689 # tagName -> Attribute
690 # URI,localName -> Attribute
691 # in the future: consider lazy generation
692 # of attribute objects this is too tricky
693 # for now because of headaches with
694 # namespaces.
695 self._attrs = None
696 self._attrsNS = None
697
698 def _ensure_attributes(self):
699 if self._attrs is None:
700 self._attrs = {}
701 self._attrsNS = {}
702
703 def _get_localName(self):
704 try:
705 return self._localName
706 except AttributeError:
707 return self.tagName.split(":", 1)[-1]
708
709 def _get_tagName(self):
710 return self.tagName
711
712 def unlink(self):
713 if self._attrs is not None:
714 for attr in list(self._attrs.values()):
715 attr.unlink()
716 self._attrs = None
717 self._attrsNS = None
718 Node.unlink(self)
719
720 def getAttribute(self, attname):
721 if self._attrs is None:
722 return ""
723 try:
724 return self._attrs[attname].value
725 except KeyError:
726 return ""
727
728 def getAttributeNS(self, namespaceURI, localName):
729 if self._attrsNS is None:
730 return ""
731 try:
732 return self._attrsNS[(namespaceURI, localName)].value
733 except KeyError:
734 return ""
735
736 def setAttribute(self, attname, value):
737 attr = self.getAttributeNode(attname)
738 if attr is None:
739 attr = Attr(attname)
740 attr.value = value # also sets nodeValue
741 attr.ownerDocument = self.ownerDocument
742 self.setAttributeNode(attr)
743 elif value != attr.value:
744 attr.value = value
745 if attr.isId:
746 _clear_id_cache(self)
747
748 def setAttributeNS(self, namespaceURI, qualifiedName, value):
749 prefix, localname = _nssplit(qualifiedName)
750 attr = self.getAttributeNodeNS(namespaceURI, localname)
751 if attr is None:
752 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
753 attr.value = value
754 attr.ownerDocument = self.ownerDocument
755 self.setAttributeNode(attr)
756 else:
757 if value != attr.value:
758 attr.value = value
759 if attr.isId:
760 _clear_id_cache(self)
761 if attr.prefix != prefix:
762 attr.prefix = prefix
763 attr.nodeName = qualifiedName
764
765 def getAttributeNode(self, attrname):
766 if self._attrs is None:
767 return None
768 return self._attrs.get(attrname)
769
770 def getAttributeNodeNS(self, namespaceURI, localName):
771 if self._attrsNS is None:
772 return None
773 return self._attrsNS.get((namespaceURI, localName))
774
775 def setAttributeNode(self, attr):
776 if attr.ownerElement not in (None, self):
777 raise xml.dom.InuseAttributeErr("attribute node already owned")
778 self._ensure_attributes()
779 old1 = self._attrs.get(attr.name, None)
780 if old1 is not None:
781 self.removeAttributeNode(old1)
782 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
783 if old2 is not None and old2 is not old1:
784 self.removeAttributeNode(old2)
785 _set_attribute_node(self, attr)
786
787 if old1 is not attr:
788 # It might have already been part of this node, in which case
789 # it doesn't represent a change, and should not be returned.
790 return old1
791 if old2 is not attr:
792 return old2
793
794 setAttributeNodeNS = setAttributeNode
795
796 def removeAttribute(self, name):
797 if self._attrsNS is None:
798 raise xml.dom.NotFoundErr()
799 try:
800 attr = self._attrs[name]
801 except KeyError:
802 raise xml.dom.NotFoundErr()
803 self.removeAttributeNode(attr)
804
805 def removeAttributeNS(self, namespaceURI, localName):
806 if self._attrsNS is None:
807 raise xml.dom.NotFoundErr()
808 try:
809 attr = self._attrsNS[(namespaceURI, localName)]
810 except KeyError:
811 raise xml.dom.NotFoundErr()
812 self.removeAttributeNode(attr)
813
814 def removeAttributeNode(self, node):
815 if node is None:
816 raise xml.dom.NotFoundErr()
817 try:
818 self._attrs[node.name]
819 except KeyError:
820 raise xml.dom.NotFoundErr()
821 _clear_id_cache(self)
822 node.unlink()
823 # Restore this since the node is still useful and otherwise
824 # unlinked
825 node.ownerDocument = self.ownerDocument
826 return node
827
828 removeAttributeNodeNS = removeAttributeNode
829
830 def hasAttribute(self, name):
831 if self._attrs is None:
832 return False
833 return name in self._attrs
834
835 def hasAttributeNS(self, namespaceURI, localName):
836 if self._attrsNS is None:
837 return False
838 return (namespaceURI, localName) in self._attrsNS
839
840 def getElementsByTagName(self, name):
841 return _get_elements_by_tagName_helper(self, name, NodeList())
842
843 def getElementsByTagNameNS(self, namespaceURI, localName):
844 return _get_elements_by_tagName_ns_helper(
845 self, namespaceURI, localName, NodeList())
846
847 def __repr__(self):
848 return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
849
850 def writexml(self, writer, indent="", addindent="", newl=""):
851 # indent = current indentation
852 # addindent = indentation to add to higher levels
853 # newl = newline string
854 writer.write(indent+"<" + self.tagName)
855
856 attrs = self._get_attributes()
857
858 for a_name in attrs.keys():
859 writer.write(" %s=\"" % a_name)
860 _write_data(writer, attrs[a_name].value)
861 writer.write("\"")
862 if self.childNodes:
863 writer.write(">")
864 if (len(self.childNodes) == 1 and
865 self.childNodes[0].nodeType in (
866 Node.TEXT_NODE, Node.CDATA_SECTION_NODE)):
867 self.childNodes[0].writexml(writer, '', '', '')
868 else:
869 writer.write(newl)
870 for node in self.childNodes:
871 node.writexml(writer, indent+addindent, addindent, newl)
872 writer.write(indent)
873 writer.write("</%s>%s" % (self.tagName, newl))
874 else:
875 writer.write("/>%s"%(newl))
876
877 def _get_attributes(self):
878 self._ensure_attributes()
879 return NamedNodeMap(self._attrs, self._attrsNS, self)
880
881 def hasAttributes(self):
882 if self._attrs:
883 return True
884 else:
885 return False
886
887 # DOM Level 3 attributes, based on the 22 Oct 2002 draft
888
889 def setIdAttribute(self, name):
890 idAttr = self.getAttributeNode(name)
891 self.setIdAttributeNode(idAttr)
892
893 def setIdAttributeNS(self, namespaceURI, localName):
894 idAttr = self.getAttributeNodeNS(namespaceURI, localName)
895 self.setIdAttributeNode(idAttr)
896
897 def setIdAttributeNode(self, idAttr):
898 if idAttr is None or not self.isSameNode(idAttr.ownerElement):
899 raise xml.dom.NotFoundErr()
900 if _get_containing_entref(self) is not None:
901 raise xml.dom.NoModificationAllowedErr()
902 if not idAttr._is_id:
903 idAttr._is_id = True
904 self._magic_id_nodes += 1
905 self.ownerDocument._magic_id_count += 1
906 _clear_id_cache(self)
907
908 defproperty(Element, "attributes",
909 doc="NamedNodeMap of attributes on the element.")
910 defproperty(Element, "localName",
911 doc="Namespace-local name of this element.")
912
913
914 def _set_attribute_node(element, attr):
915 _clear_id_cache(element)
916 element._ensure_attributes()
917 element._attrs[attr.name] = attr
918 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
919
920 # This creates a circular reference, but Element.unlink()
921 # breaks the cycle since the references to the attribute
922 # dictionaries are tossed.
923 attr.ownerElement = element
924
925 class Childless:
926 """Mixin that makes childless-ness easy to implement and avoids
927 the complexity of the Node methods that deal with children.
928 """
929 __slots__ = ()
930
931 attributes = None
932 childNodes = EmptyNodeList()
933 firstChild = None
934 lastChild = None
935
936 def _get_firstChild(self):
937 return None
938
939 def _get_lastChild(self):
940 return None
941
942 def appendChild(self, node):
943 raise xml.dom.HierarchyRequestErr(
944 self.nodeName + " nodes cannot have children")
945
946 def hasChildNodes(self):
947 return False
948
949 def insertBefore(self, newChild, refChild):
950 raise xml.dom.HierarchyRequestErr(
951 self.nodeName + " nodes do not have children")
952
953 def removeChild(self, oldChild):
954 raise xml.dom.NotFoundErr(
955 self.nodeName + " nodes do not have children")
956
957 def normalize(self):
958 # For childless nodes, normalize() has nothing to do.
959 pass
960
961 def replaceChild(self, newChild, oldChild):
962 raise xml.dom.HierarchyRequestErr(
963 self.nodeName + " nodes do not have children")
964
965
966 class ProcessingInstruction(Childless, Node):
967 nodeType = Node.PROCESSING_INSTRUCTION_NODE
968 __slots__ = ('target', 'data')
969
970 def __init__(self, target, data):
971 self.target = target
972 self.data = data
973
974 # nodeValue is an alias for data
975 def _get_nodeValue(self):
976 return self.data
977 def _set_nodeValue(self, value):
978 self.data = value
979 nodeValue = property(_get_nodeValue, _set_nodeValue)
980
981 # nodeName is an alias for target
982 def _get_nodeName(self):
983 return self.target
984 def _set_nodeName(self, value):
985 self.target = value
986 nodeName = property(_get_nodeName, _set_nodeName)
987
988 def writexml(self, writer, indent="", addindent="", newl=""):
989 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
990
991
992 class CharacterData(Childless, Node):
993 __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
994
995 def __init__(self):
996 self.ownerDocument = self.parentNode = None
997 self.previousSibling = self.nextSibling = None
998 self._data = ''
999 Node.__init__(self)
1000
1001 def _get_length(self):
1002 return len(self.data)
1003 __len__ = _get_length
1004
1005 def _get_data(self):
1006 return self._data
1007 def _set_data(self, data):
1008 self._data = data
1009
1010 data = nodeValue = property(_get_data, _set_data)
1011
1012 def __repr__(self):
1013 data = self.data
1014 if len(data) > 10:
1015 dotdotdot = "..."
1016 else:
1017 dotdotdot = ""
1018 return '<DOM %s node "%r%s">' % (
1019 self.__class__.__name__, data[0:10], dotdotdot)
1020
1021 def substringData(self, offset, count):
1022 if offset < 0:
1023 raise xml.dom.IndexSizeErr("offset cannot be negative")
1024 if offset >= len(self.data):
1025 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1026 if count < 0:
1027 raise xml.dom.IndexSizeErr("count cannot be negative")
1028 return self.data[offset:offset+count]
1029
1030 def appendData(self, arg):
1031 self.data = self.data + arg
1032
1033 def insertData(self, offset, arg):
1034 if offset < 0:
1035 raise xml.dom.IndexSizeErr("offset cannot be negative")
1036 if offset >= len(self.data):
1037 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1038 if arg:
1039 self.data = "%s%s%s" % (
1040 self.data[:offset], arg, self.data[offset:])
1041
1042 def deleteData(self, offset, count):
1043 if offset < 0:
1044 raise xml.dom.IndexSizeErr("offset cannot be negative")
1045 if offset >= len(self.data):
1046 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1047 if count < 0:
1048 raise xml.dom.IndexSizeErr("count cannot be negative")
1049 if count:
1050 self.data = self.data[:offset] + self.data[offset+count:]
1051
1052 def replaceData(self, offset, count, arg):
1053 if offset < 0:
1054 raise xml.dom.IndexSizeErr("offset cannot be negative")
1055 if offset >= len(self.data):
1056 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1057 if count < 0:
1058 raise xml.dom.IndexSizeErr("count cannot be negative")
1059 if count:
1060 self.data = "%s%s%s" % (
1061 self.data[:offset], arg, self.data[offset+count:])
1062
1063 defproperty(CharacterData, "length", doc="Length of the string data.")
1064
1065
1066 class Text(CharacterData):
1067 __slots__ = ()
1068
1069 nodeType = Node.TEXT_NODE
1070 nodeName = "#text"
1071 attributes = None
1072
1073 def splitText(self, offset):
1074 if offset < 0 or offset > len(self.data):
1075 raise xml.dom.IndexSizeErr("illegal offset value")
1076 newText = self.__class__()
1077 newText.data = self.data[offset:]
1078 newText.ownerDocument = self.ownerDocument
1079 next = self.nextSibling
1080 if self.parentNode and self in self.parentNode.childNodes:
1081 if next is None:
1082 self.parentNode.appendChild(newText)
1083 else:
1084 self.parentNode.insertBefore(newText, next)
1085 self.data = self.data[:offset]
1086 return newText
1087
1088 def writexml(self, writer, indent="", addindent="", newl=""):
1089 _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1090
1091 # DOM Level 3 (WD 9 April 2002)
1092
1093 def _get_wholeText(self):
1094 L = [self.data]
1095 n = self.previousSibling
1096 while n is not None:
1097 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1098 L.insert(0, n.data)
1099 n = n.previousSibling
1100 else:
1101 break
1102 n = self.nextSibling
1103 while n is not None:
1104 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1105 L.append(n.data)
1106 n = n.nextSibling
1107 else:
1108 break
1109 return ''.join(L)
1110
1111 def replaceWholeText(self, content):
1112 # XXX This needs to be seriously changed if minidom ever
1113 # supports EntityReference nodes.
1114 parent = self.parentNode
1115 n = self.previousSibling
1116 while n is not None:
1117 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1118 next = n.previousSibling
1119 parent.removeChild(n)
1120 n = next
1121 else:
1122 break
1123 n = self.nextSibling
1124 if not content:
1125 parent.removeChild(self)
1126 while n is not None:
1127 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1128 next = n.nextSibling
1129 parent.removeChild(n)
1130 n = next
1131 else:
1132 break
1133 if content:
1134 self.data = content
1135 return self
1136 else:
1137 return None
1138
1139 def _get_isWhitespaceInElementContent(self):
1140 if self.data.strip():
1141 return False
1142 elem = _get_containing_element(self)
1143 if elem is None:
1144 return False
1145 info = self.ownerDocument._get_elem_info(elem)
1146 if info is None:
1147 return False
1148 else:
1149 return info.isElementContent()
1150
1151 defproperty(Text, "isWhitespaceInElementContent",
1152 doc="True iff this text node contains only whitespace"
1153 " and is in element content.")
1154 defproperty(Text, "wholeText",
1155 doc="The text of all logically-adjacent text nodes.")
1156
1157
1158 def _get_containing_element(node):
1159 c = node.parentNode
1160 while c is not None:
1161 if c.nodeType == Node.ELEMENT_NODE:
1162 return c
1163 c = c.parentNode
1164 return None
1165
1166 def _get_containing_entref(node):
1167 c = node.parentNode
1168 while c is not None:
1169 if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1170 return c
1171 c = c.parentNode
1172 return None
1173
1174
1175 class Comment(CharacterData):
1176 nodeType = Node.COMMENT_NODE
1177 nodeName = "#comment"
1178
1179 def __init__(self, data):
1180 CharacterData.__init__(self)
1181 self._data = data
1182
1183 def writexml(self, writer, indent="", addindent="", newl=""):
1184 if "--" in self.data:
1185 raise ValueError("'--' is not allowed in a comment node")
1186 writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1187
1188
1189 class CDATASection(Text):
1190 __slots__ = ()
1191
1192 nodeType = Node.CDATA_SECTION_NODE
1193 nodeName = "#cdata-section"
1194
1195 def writexml(self, writer, indent="", addindent="", newl=""):
1196 if self.data.find("]]>") >= 0:
1197 raise ValueError("']]>' not allowed in a CDATA section")
1198 writer.write("<![CDATA[%s]]>" % self.data)
1199
1200
1201 class ReadOnlySequentialNamedNodeMap(object):
1202 __slots__ = '_seq',
1203
1204 def __init__(self, seq=()):
1205 # seq should be a list or tuple
1206 self._seq = seq
1207
1208 def __len__(self):
1209 return len(self._seq)
1210
1211 def _get_length(self):
1212 return len(self._seq)
1213
1214 def getNamedItem(self, name):
1215 for n in self._seq:
1216 if n.nodeName == name:
1217 return n
1218
1219 def getNamedItemNS(self, namespaceURI, localName):
1220 for n in self._seq:
1221 if n.namespaceURI == namespaceURI and n.localName == localName:
1222 return n
1223
1224 def __getitem__(self, name_or_tuple):
1225 if isinstance(name_or_tuple, tuple):
1226 node = self.getNamedItemNS(*name_or_tuple)
1227 else:
1228 node = self.getNamedItem(name_or_tuple)
1229 if node is None:
1230 raise KeyError(name_or_tuple)
1231 return node
1232
1233 def item(self, index):
1234 if index < 0:
1235 return None
1236 try:
1237 return self._seq[index]
1238 except IndexError:
1239 return None
1240
1241 def removeNamedItem(self, name):
1242 raise xml.dom.NoModificationAllowedErr(
1243 "NamedNodeMap instance is read-only")
1244
1245 def removeNamedItemNS(self, namespaceURI, localName):
1246 raise xml.dom.NoModificationAllowedErr(
1247 "NamedNodeMap instance is read-only")
1248
1249 def setNamedItem(self, node):
1250 raise xml.dom.NoModificationAllowedErr(
1251 "NamedNodeMap instance is read-only")
1252
1253 def setNamedItemNS(self, node):
1254 raise xml.dom.NoModificationAllowedErr(
1255 "NamedNodeMap instance is read-only")
1256
1257 def __getstate__(self):
1258 return [self._seq]
1259
1260 def __setstate__(self, state):
1261 self._seq = state[0]
1262
1263 defproperty(ReadOnlySequentialNamedNodeMap, "length",
1264 doc="Number of entries in the NamedNodeMap.")
1265
1266
1267 class Identified:
1268 """Mix-in class that supports the publicId and systemId attributes."""
1269
1270 __slots__ = 'publicId', 'systemId'
1271
1272 def _identified_mixin_init(self, publicId, systemId):
1273 self.publicId = publicId
1274 self.systemId = systemId
1275
1276 def _get_publicId(self):
1277 return self.publicId
1278
1279 def _get_systemId(self):
1280 return self.systemId
1281
1282 class DocumentType(Identified, Childless, Node):
1283 nodeType = Node.DOCUMENT_TYPE_NODE
1284 nodeValue = None
1285 name = None
1286 publicId = None
1287 systemId = None
1288 internalSubset = None
1289
1290 def __init__(self, qualifiedName):
1291 self.entities = ReadOnlySequentialNamedNodeMap()
1292 self.notations = ReadOnlySequentialNamedNodeMap()
1293 if qualifiedName:
1294 prefix, localname = _nssplit(qualifiedName)
1295 self.name = localname
1296 self.nodeName = self.name
1297
1298 def _get_internalSubset(self):
1299 return self.internalSubset
1300
1301 def cloneNode(self, deep):
1302 if self.ownerDocument is None:
1303 # it's ok
1304 clone = DocumentType(None)
1305 clone.name = self.name
1306 clone.nodeName = self.name
1307 operation = xml.dom.UserDataHandler.NODE_CLONED
1308 if deep:
1309 clone.entities._seq = []
1310 clone.notations._seq = []
1311 for n in self.notations._seq:
1312 notation = Notation(n.nodeName, n.publicId, n.systemId)
1313 clone.notations._seq.append(notation)
1314 n._call_user_data_handler(operation, n, notation)
1315 for e in self.entities._seq:
1316 entity = Entity(e.nodeName, e.publicId, e.systemId,
1317 e.notationName)
1318 entity.actualEncoding = e.actualEncoding
1319 entity.encoding = e.encoding
1320 entity.version = e.version
1321 clone.entities._seq.append(entity)
1322 e._call_user_data_handler(operation, e, entity)
1323 self._call_user_data_handler(operation, self, clone)
1324 return clone
1325 else:
1326 return None
1327
1328 def writexml(self, writer, indent="", addindent="", newl=""):
1329 writer.write("<!DOCTYPE ")
1330 writer.write(self.name)
1331 if self.publicId:
1332 writer.write("%s PUBLIC '%s'%s '%s'"
1333 % (newl, self.publicId, newl, self.systemId))
1334 elif self.systemId:
1335 writer.write("%s SYSTEM '%s'" % (newl, self.systemId))
1336 if self.internalSubset is not None:
1337 writer.write(" [")
1338 writer.write(self.internalSubset)
1339 writer.write("]")
1340 writer.write(">"+newl)
1341
1342 class Entity(Identified, Node):
1343 attributes = None
1344 nodeType = Node.ENTITY_NODE
1345 nodeValue = None
1346
1347 actualEncoding = None
1348 encoding = None
1349 version = None
1350
1351 def __init__(self, name, publicId, systemId, notation):
1352 self.nodeName = name
1353 self.notationName = notation
1354 self.childNodes = NodeList()
1355 self._identified_mixin_init(publicId, systemId)
1356
1357 def _get_actualEncoding(self):
1358 return self.actualEncoding
1359
1360 def _get_encoding(self):
1361 return self.encoding
1362
1363 def _get_version(self):
1364 return self.version
1365
1366 def appendChild(self, newChild):
1367 raise xml.dom.HierarchyRequestErr(
1368 "cannot append children to an entity node")
1369
1370 def insertBefore(self, newChild, refChild):
1371 raise xml.dom.HierarchyRequestErr(
1372 "cannot insert children below an entity node")
1373
1374 def removeChild(self, oldChild):
1375 raise xml.dom.HierarchyRequestErr(
1376 "cannot remove children from an entity node")
1377
1378 def replaceChild(self, newChild, oldChild):
1379 raise xml.dom.HierarchyRequestErr(
1380 "cannot replace children of an entity node")
1381
1382 class Notation(Identified, Childless, Node):
1383 nodeType = Node.NOTATION_NODE
1384 nodeValue = None
1385
1386 def __init__(self, name, publicId, systemId):
1387 self.nodeName = name
1388 self._identified_mixin_init(publicId, systemId)
1389
1390
1391 class DOMImplementation(DOMImplementationLS):
1392 _features = [("core", "1.0"),
1393 ("core", "2.0"),
1394 ("core", None),
1395 ("xml", "1.0"),
1396 ("xml", "2.0"),
1397 ("xml", None),
1398 ("ls-load", "3.0"),
1399 ("ls-load", None),
1400 ]
1401
1402 def hasFeature(self, feature, version):
1403 if version == "":
1404 version = None
1405 return (feature.lower(), version) in self._features
1406
1407 def createDocument(self, namespaceURI, qualifiedName, doctype):
1408 if doctype and doctype.parentNode is not None:
1409 raise xml.dom.WrongDocumentErr(
1410 "doctype object owned by another DOM tree")
1411 doc = self._create_document()
1412
1413 add_root_element = not (namespaceURI is None
1414 and qualifiedName is None
1415 and doctype is None)
1416
1417 if not qualifiedName and add_root_element:
1418 # The spec is unclear what to raise here; SyntaxErr
1419 # would be the other obvious candidate. Since Xerces raises
1420 # InvalidCharacterErr, and since SyntaxErr is not listed
1421 # for createDocument, that seems to be the better choice.
1422 # XXX: need to check for illegal characters here and in
1423 # createElement.
1424
1425 # DOM Level III clears this up when talking about the return value
1426 # of this function. If namespaceURI, qName and DocType are
1427 # Null the document is returned without a document element
1428 # Otherwise if doctype or namespaceURI are not None
1429 # Then we go back to the above problem
1430 raise xml.dom.InvalidCharacterErr("Element with no name")
1431
1432 if add_root_element:
1433 prefix, localname = _nssplit(qualifiedName)
1434 if prefix == "xml" \
1435 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1436 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1437 if prefix and not namespaceURI:
1438 raise xml.dom.NamespaceErr(
1439 "illegal use of prefix without namespaces")
1440 element = doc.createElementNS(namespaceURI, qualifiedName)
1441 if doctype:
1442 doc.appendChild(doctype)
1443 doc.appendChild(element)
1444
1445 if doctype:
1446 doctype.parentNode = doctype.ownerDocument = doc
1447
1448 doc.doctype = doctype
1449 doc.implementation = self
1450 return doc
1451
1452 def createDocumentType(self, qualifiedName, publicId, systemId):
1453 doctype = DocumentType(qualifiedName)
1454 doctype.publicId = publicId
1455 doctype.systemId = systemId
1456 return doctype
1457
1458 # DOM Level 3 (WD 9 April 2002)
1459
1460 def getInterface(self, feature):
1461 if self.hasFeature(feature, None):
1462 return self
1463 else:
1464 return None
1465
1466 # internal
1467 def _create_document(self):
1468 return Document()
1469
1470 class ElementInfo(object):
1471 """Object that represents content-model information for an element.
1472
1473 This implementation is not expected to be used in practice; DOM
1474 builders should provide implementations which do the right thing
1475 using information available to it.
1476
1477 """
1478
1479 __slots__ = 'tagName',
1480
1481 def __init__(self, name):
1482 self.tagName = name
1483
1484 def getAttributeType(self, aname):
1485 return _no_type
1486
1487 def getAttributeTypeNS(self, namespaceURI, localName):
1488 return _no_type
1489
1490 def isElementContent(self):
1491 return False
1492
1493 def isEmpty(self):
1494 """Returns true iff this element is declared to have an EMPTY
1495 content model."""
1496 return False
1497
1498 def isId(self, aname):
1499 """Returns true iff the named attribute is a DTD-style ID."""
1500 return False
1501
1502 def isIdNS(self, namespaceURI, localName):
1503 """Returns true iff the identified attribute is a DTD-style ID."""
1504 return False
1505
1506 def __getstate__(self):
1507 return self.tagName
1508
1509 def __setstate__(self, state):
1510 self.tagName = state
1511
1512 def _clear_id_cache(node):
1513 if node.nodeType == Node.DOCUMENT_NODE:
1514 node._id_cache.clear()
1515 node._id_search_stack = None
1516 elif _in_document(node):
1517 node.ownerDocument._id_cache.clear()
1518 node.ownerDocument._id_search_stack= None
1519
1520 class Document(Node, DocumentLS):
1521 __slots__ = ('_elem_info', 'doctype',
1522 '_id_search_stack', 'childNodes', '_id_cache')
1523 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1524 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1525
1526 implementation = DOMImplementation()
1527 nodeType = Node.DOCUMENT_NODE
1528 nodeName = "#document"
1529 nodeValue = None
1530 attributes = None
1531 parentNode = None
1532 previousSibling = nextSibling = None
1533
1534
1535 # Document attributes from Level 3 (WD 9 April 2002)
1536
1537 actualEncoding = None
1538 encoding = None
1539 standalone = None
1540 version = None
1541 strictErrorChecking = False
1542 errorHandler = None
1543 documentURI = None
1544
1545 _magic_id_count = 0
1546
1547 def __init__(self):
1548 self.doctype = None
1549 self.childNodes = NodeList()
1550 # mapping of (namespaceURI, localName) -> ElementInfo
1551 # and tagName -> ElementInfo
1552 self._elem_info = {}
1553 self._id_cache = {}
1554 self._id_search_stack = None
1555
1556 def _get_elem_info(self, element):
1557 if element.namespaceURI:
1558 key = element.namespaceURI, element.localName
1559 else:
1560 key = element.tagName
1561 return self._elem_info.get(key)
1562
1563 def _get_actualEncoding(self):
1564 return self.actualEncoding
1565
1566 def _get_doctype(self):
1567 return self.doctype
1568
1569 def _get_documentURI(self):
1570 return self.documentURI
1571
1572 def _get_encoding(self):
1573 return self.encoding
1574
1575 def _get_errorHandler(self):
1576 return self.errorHandler
1577
1578 def _get_standalone(self):
1579 return self.standalone
1580
1581 def _get_strictErrorChecking(self):
1582 return self.strictErrorChecking
1583
1584 def _get_version(self):
1585 return self.version
1586
1587 def appendChild(self, node):
1588 if node.nodeType not in self._child_node_types:
1589 raise xml.dom.HierarchyRequestErr(
1590 "%s cannot be child of %s" % (repr(node), repr(self)))
1591 if node.parentNode is not None:
1592 # This needs to be done before the next test since this
1593 # may *be* the document element, in which case it should
1594 # end up re-ordered to the end.
1595 node.parentNode.removeChild(node)
1596
1597 if node.nodeType == Node.ELEMENT_NODE \
1598 and self._get_documentElement():
1599 raise xml.dom.HierarchyRequestErr(
1600 "two document elements disallowed")
1601 return Node.appendChild(self, node)
1602
1603 def removeChild(self, oldChild):
1604 try:
1605 self.childNodes.remove(oldChild)
1606 except ValueError:
1607 raise xml.dom.NotFoundErr()
1608 oldChild.nextSibling = oldChild.previousSibling = None
1609 oldChild.parentNode = None
1610 if self.documentElement is oldChild:
1611 self.documentElement = None
1612
1613 return oldChild
1614
1615 def _get_documentElement(self):
1616 for node in self.childNodes:
1617 if node.nodeType == Node.ELEMENT_NODE:
1618 return node
1619
1620 def unlink(self):
1621 if self.doctype is not None:
1622 self.doctype.unlink()
1623 self.doctype = None
1624 Node.unlink(self)
1625
1626 def cloneNode(self, deep):
1627 if not deep:
1628 return None
1629 clone = self.implementation.createDocument(None, None, None)
1630 clone.encoding = self.encoding
1631 clone.standalone = self.standalone
1632 clone.version = self.version
1633 for n in self.childNodes:
1634 childclone = _clone_node(n, deep, clone)
1635 assert childclone.ownerDocument.isSameNode(clone)
1636 clone.childNodes.append(childclone)
1637 if childclone.nodeType == Node.DOCUMENT_NODE:
1638 assert clone.documentElement is None
1639 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1640 assert clone.doctype is None
1641 clone.doctype = childclone
1642 childclone.parentNode = clone
1643 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1644 self, clone)
1645 return clone
1646
1647 def createDocumentFragment(self):
1648 d = DocumentFragment()
1649 d.ownerDocument = self
1650 return d
1651
1652 def createElement(self, tagName):
1653 e = Element(tagName)
1654 e.ownerDocument = self
1655 return e
1656
1657 def createTextNode(self, data):
1658 if not isinstance(data, str):
1659 raise TypeError("node contents must be a string")
1660 t = Text()
1661 t.data = data
1662 t.ownerDocument = self
1663 return t
1664
1665 def createCDATASection(self, data):
1666 if not isinstance(data, str):
1667 raise TypeError("node contents must be a string")
1668 c = CDATASection()
1669 c.data = data
1670 c.ownerDocument = self
1671 return c
1672
1673 def createComment(self, data):
1674 c = Comment(data)
1675 c.ownerDocument = self
1676 return c
1677
1678 def createProcessingInstruction(self, target, data):
1679 p = ProcessingInstruction(target, data)
1680 p.ownerDocument = self
1681 return p
1682
1683 def createAttribute(self, qName):
1684 a = Attr(qName)
1685 a.ownerDocument = self
1686 a.value = ""
1687 return a
1688
1689 def createElementNS(self, namespaceURI, qualifiedName):
1690 prefix, localName = _nssplit(qualifiedName)
1691 e = Element(qualifiedName, namespaceURI, prefix)
1692 e.ownerDocument = self
1693 return e
1694
1695 def createAttributeNS(self, namespaceURI, qualifiedName):
1696 prefix, localName = _nssplit(qualifiedName)
1697 a = Attr(qualifiedName, namespaceURI, localName, prefix)
1698 a.ownerDocument = self
1699 a.value = ""
1700 return a
1701
1702 # A couple of implementation-specific helpers to create node types
1703 # not supported by the W3C DOM specs:
1704
1705 def _create_entity(self, name, publicId, systemId, notationName):
1706 e = Entity(name, publicId, systemId, notationName)
1707 e.ownerDocument = self
1708 return e
1709
1710 def _create_notation(self, name, publicId, systemId):
1711 n = Notation(name, publicId, systemId)
1712 n.ownerDocument = self
1713 return n
1714
1715 def getElementById(self, id):
1716 if id in self._id_cache:
1717 return self._id_cache[id]
1718 if not (self._elem_info or self._magic_id_count):
1719 return None
1720
1721 stack = self._id_search_stack
1722 if stack is None:
1723 # we never searched before, or the cache has been cleared
1724 stack = [self.documentElement]
1725 self._id_search_stack = stack
1726 elif not stack:
1727 # Previous search was completed and cache is still valid;
1728 # no matching node.
1729 return None
1730
1731 result = None
1732 while stack:
1733 node = stack.pop()
1734 # add child elements to stack for continued searching
1735 stack.extend([child for child in node.childNodes
1736 if child.nodeType in _nodeTypes_with_children])
1737 # check this node
1738 info = self._get_elem_info(node)
1739 if info:
1740 # We have to process all ID attributes before
1741 # returning in order to get all the attributes set to
1742 # be IDs using Element.setIdAttribute*().
1743 for attr in node.attributes.values():
1744 if attr.namespaceURI:
1745 if info.isIdNS(attr.namespaceURI, attr.localName):
1746 self._id_cache[attr.value] = node
1747 if attr.value == id:
1748 result = node
1749 elif not node._magic_id_nodes:
1750 break
1751 elif info.isId(attr.name):
1752 self._id_cache[attr.value] = node
1753 if attr.value == id:
1754 result = node
1755 elif not node._magic_id_nodes:
1756 break
1757 elif attr._is_id:
1758 self._id_cache[attr.value] = node
1759 if attr.value == id:
1760 result = node
1761 elif node._magic_id_nodes == 1:
1762 break
1763 elif node._magic_id_nodes:
1764 for attr in node.attributes.values():
1765 if attr._is_id:
1766 self._id_cache[attr.value] = node
1767 if attr.value == id:
1768 result = node
1769 if result is not None:
1770 break
1771 return result
1772
1773 def getElementsByTagName(self, name):
1774 return _get_elements_by_tagName_helper(self, name, NodeList())
1775
1776 def getElementsByTagNameNS(self, namespaceURI, localName):
1777 return _get_elements_by_tagName_ns_helper(
1778 self, namespaceURI, localName, NodeList())
1779
1780 def isSupported(self, feature, version):
1781 return self.implementation.hasFeature(feature, version)
1782
1783 def importNode(self, node, deep):
1784 if node.nodeType == Node.DOCUMENT_NODE:
1785 raise xml.dom.NotSupportedErr("cannot import document nodes")
1786 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1787 raise xml.dom.NotSupportedErr("cannot import document type nodes")
1788 return _clone_node(node, deep, self)
1789
1790 def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
1791 if encoding is None:
1792 writer.write('<?xml version="1.0" ?>'+newl)
1793 else:
1794 writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
1795 encoding, newl))
1796 for node in self.childNodes:
1797 node.writexml(writer, indent, addindent, newl)
1798
1799 # DOM Level 3 (WD 9 April 2002)
1800
1801 def renameNode(self, n, namespaceURI, name):
1802 if n.ownerDocument is not self:
1803 raise xml.dom.WrongDocumentErr(
1804 "cannot rename nodes from other documents;\n"
1805 "expected %s,\nfound %s" % (self, n.ownerDocument))
1806 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1807 raise xml.dom.NotSupportedErr(
1808 "renameNode() only applies to element and attribute nodes")
1809 if namespaceURI != EMPTY_NAMESPACE:
1810 if ':' in name:
1811 prefix, localName = name.split(':', 1)
1812 if ( prefix == "xmlns"
1813 and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1814 raise xml.dom.NamespaceErr(
1815 "illegal use of 'xmlns' prefix")
1816 else:
1817 if ( name == "xmlns"
1818 and namespaceURI != xml.dom.XMLNS_NAMESPACE
1819 and n.nodeType == Node.ATTRIBUTE_NODE):
1820 raise xml.dom.NamespaceErr(
1821 "illegal use of the 'xmlns' attribute")
1822 prefix = None
1823 localName = name
1824 else:
1825 prefix = None
1826 localName = None
1827 if n.nodeType == Node.ATTRIBUTE_NODE:
1828 element = n.ownerElement
1829 if element is not None:
1830 is_id = n._is_id
1831 element.removeAttributeNode(n)
1832 else:
1833 element = None
1834 n.prefix = prefix
1835 n._localName = localName
1836 n.namespaceURI = namespaceURI
1837 n.nodeName = name
1838 if n.nodeType == Node.ELEMENT_NODE:
1839 n.tagName = name
1840 else:
1841 # attribute node
1842 n.name = name
1843 if element is not None:
1844 element.setAttributeNode(n)
1845 if is_id:
1846 element.setIdAttributeNode(n)
1847 # It's not clear from a semantic perspective whether we should
1848 # call the user data handlers for the NODE_RENAMED event since
1849 # we're re-using the existing node. The draft spec has been
1850 # interpreted as meaning "no, don't call the handler unless a
1851 # new node is created."
1852 return n
1853
1854 defproperty(Document, "documentElement",
1855 doc="Top-level element of this document.")
1856
1857
1858 def _clone_node(node, deep, newOwnerDocument):
1859 """
1860 Clone a node and give it the new owner document.
1861 Called by Node.cloneNode and Document.importNode
1862 """
1863 if node.ownerDocument.isSameNode(newOwnerDocument):
1864 operation = xml.dom.UserDataHandler.NODE_CLONED
1865 else:
1866 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1867 if node.nodeType == Node.ELEMENT_NODE:
1868 clone = newOwnerDocument.createElementNS(node.namespaceURI,
1869 node.nodeName)
1870 for attr in node.attributes.values():
1871 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1872 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1873 a.specified = attr.specified
1874
1875 if deep:
1876 for child in node.childNodes:
1877 c = _clone_node(child, deep, newOwnerDocument)
1878 clone.appendChild(c)
1879
1880 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1881 clone = newOwnerDocument.createDocumentFragment()
1882 if deep:
1883 for child in node.childNodes:
1884 c = _clone_node(child, deep, newOwnerDocument)
1885 clone.appendChild(c)
1886
1887 elif node.nodeType == Node.TEXT_NODE:
1888 clone = newOwnerDocument.createTextNode(node.data)
1889 elif node.nodeType == Node.CDATA_SECTION_NODE:
1890 clone = newOwnerDocument.createCDATASection(node.data)
1891 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1892 clone = newOwnerDocument.createProcessingInstruction(node.target,
1893 node.data)
1894 elif node.nodeType == Node.COMMENT_NODE:
1895 clone = newOwnerDocument.createComment(node.data)
1896 elif node.nodeType == Node.ATTRIBUTE_NODE:
1897 clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1898 node.nodeName)
1899 clone.specified = True
1900 clone.value = node.value
1901 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1902 assert node.ownerDocument is not newOwnerDocument
1903 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1904 clone = newOwnerDocument.implementation.createDocumentType(
1905 node.name, node.publicId, node.systemId)
1906 clone.ownerDocument = newOwnerDocument
1907 if deep:
1908 clone.entities._seq = []
1909 clone.notations._seq = []
1910 for n in node.notations._seq:
1911 notation = Notation(n.nodeName, n.publicId, n.systemId)
1912 notation.ownerDocument = newOwnerDocument
1913 clone.notations._seq.append(notation)
1914 if hasattr(n, '_call_user_data_handler'):
1915 n._call_user_data_handler(operation, n, notation)
1916 for e in node.entities._seq:
1917 entity = Entity(e.nodeName, e.publicId, e.systemId,
1918 e.notationName)
1919 entity.actualEncoding = e.actualEncoding
1920 entity.encoding = e.encoding
1921 entity.version = e.version
1922 entity.ownerDocument = newOwnerDocument
1923 clone.entities._seq.append(entity)
1924 if hasattr(e, '_call_user_data_handler'):
1925 e._call_user_data_handler(operation, e, entity)
1926 else:
1927 # Note the cloning of Document and DocumentType nodes is
1928 # implementation specific. minidom handles those cases
1929 # directly in the cloneNode() methods.
1930 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1931
1932 # Check for _call_user_data_handler() since this could conceivably
1933 # used with other DOM implementations (one of the FourThought
1934 # DOMs, perhaps?).
1935 if hasattr(node, '_call_user_data_handler'):
1936 node._call_user_data_handler(operation, node, clone)
1937 return clone
1938
1939
1940 def _nssplit(qualifiedName):
1941 fields = qualifiedName.split(':', 1)
1942 if len(fields) == 2:
1943 return fields
1944 else:
1945 return (None, fields[0])
1946
1947
1948 def _do_pulldom_parse(func, args, kwargs):
1949 events = func(*args, **kwargs)
1950 toktype, rootNode = events.getEvent()
1951 events.expandNode(rootNode)
1952 events.clear()
1953 return rootNode
1954
1955 def parse(file, parser=None, bufsize=None):
1956 """Parse a file into a DOM by filename or file object."""
1957 if parser is None and not bufsize:
1958 from xml.dom import expatbuilder
1959 return expatbuilder.parse(file)
1960 else:
1961 from xml.dom import pulldom
1962 return _do_pulldom_parse(pulldom.parse, (file,),
1963 {'parser': parser, 'bufsize': bufsize})
1964
1965 def parseString(string, parser=None):
1966 """Parse a file into a DOM from a string."""
1967 if parser is None:
1968 from xml.dom import expatbuilder
1969 return expatbuilder.parseString(string)
1970 else:
1971 from xml.dom import pulldom
1972 return _do_pulldom_parse(pulldom.parseString, (string,),
1973 {'parser': parser})
1974
1975 def getDOMImplementation(features=None):
1976 if features:
1977 if isinstance(features, str):
1978 features = domreg._parse_feature_string(features)
1979 for f, v in features:
1980 if not Document.implementation.hasFeature(f, v):
1981 return None
1982 return Document.implementation