转自 http://hg.python.org/cpython/file/2.7/Lib/xml/dom/minidom.py

 

"""\

2 minidom.py -- a lightweight DOM implementation.
3
4 parse("foo.xml")
5
6 parseString("<foo><bar/></foo>")
7
8 Todo:
9 =====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15 """
16
17 import xml.dom
18
19 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
20 from xml.dom.minicompat import *
21 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
22
23 # This is used by the ID-cache invalidation checks; the list isn't
24 # actually complete, since the nodes being checked will never be the
25 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is
26 # the node being added or removed, not the node being modified.)
27 #
28 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
29 xml.dom.Node.ENTITY_REFERENCE_NODE)
30
31
32 class Node(xml.dom.Node):
33 namespaceURI = None # this is non-null only for elements and attributes
34 parentNode = None
35 ownerDocument = None
36 nextSibling = None
37 previousSibling = None
38
39 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
40
41 def __nonzero__(self):
42 return True
43
44 def toxml(self, encoding = None):
45 return self.toprettyxml("", "", encoding)
46
47 def toprettyxml(self, indent="\t", newl="\n", encoding = None):
48 # indent = the indentation string to prepend, per level
49 # newl = the newline string to append
50 writer = _get_StringIO()
51 if encoding is not None:
52 import codecs
53 # Can't use codecs.getwriter to preserve 2.0 compatibility
54 writer = codecs.lookup(encoding)[3](writer)
55 if self.nodeType == Node.DOCUMENT_NODE:
56 # Can pass encoding only to document, to put it into XML header
57 self.writexml(writer, "", indent, newl, encoding)
58 else:
59 self.writexml(writer, "", indent, newl)
60 return writer.getvalue()
61
62 def hasChildNodes(self):
63 if self.childNodes:
64 return True
65 else:
66 return False
67
68 def _get_childNodes(self):
69 return self.childNodes
70
71 def _get_firstChild(self):
72 if self.childNodes:
73 return self.childNodes[0]
74
75 def _get_lastChild(self):
76 if self.childNodes:
77 return self.childNodes[-1]
78
79 def insertBefore(self, newChild, refChild):
80 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
81 for c in tuple(newChild.childNodes):
82 self.insertBefore(c, refChild)
83 ### The DOM does not clearly specify what to return in this case
84 return newChild
85 if newChild.nodeType not in self._child_node_types:
86 raise xml.dom.HierarchyRequestErr(
87 "%s cannot be child of %s" % (repr(newChild), repr(self)))
88 if newChild.parentNode is not None:
89 newChild.parentNode.removeChild(newChild)
90 if refChild is None:
91 self.appendChild(newChild)
92 else:
93 try:
94 index = self.childNodes.index(refChild)
95 except ValueError:
96 raise xml.dom.NotFoundErr()
97 if newChild.nodeType in _nodeTypes_with_children:
98 _clear_id_cache(self)
99 self.childNodes.insert(index, newChild)
100 newChild.nextSibling = refChild
101 refChild.previousSibling = newChild
102 if index:
103 node = self.childNodes[index-1]
104 node.nextSibling = newChild
105 newChild.previousSibling = node
106 else:
107 newChild.previousSibling = None
108 newChild.parentNode = self
109 return newChild
110
111 def appendChild(self, node):
112 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
113 for c in tuple(node.childNodes):
114 self.appendChild(c)
115 ### The DOM does not clearly specify what to return in this case
116 return node
117 if node.nodeType not in self._child_node_types:
118 raise xml.dom.HierarchyRequestErr(
119 "%s cannot be child of %s" % (repr(node), repr(self)))
120 elif node.nodeType in _nodeTypes_with_children:
121 _clear_id_cache(self)
122 if node.parentNode is not None:
123 node.parentNode.removeChild(node)
124 _append_child(self, node)
125 node.nextSibling = None
126 return node
127
128 def replaceChild(self, newChild, oldChild):
129 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
130 refChild = oldChild.nextSibling
131 self.removeChild(oldChild)
132 return self.insertBefore(newChild, refChild)
133 if newChild.nodeType not in self._child_node_types:
134 raise xml.dom.HierarchyRequestErr(
135 "%s cannot be child of %s" % (repr(newChild), repr(self)))
136 if newChild is oldChild:
137 return
138 if newChild.parentNode is not None:
139 newChild.parentNode.removeChild(newChild)
140 try:
141 index = self.childNodes.index(oldChild)
142 except ValueError:
143 raise xml.dom.NotFoundErr()
144 self.childNodes[index] = newChild
145 newChild.parentNode = self
146 oldChild.parentNode = None
147 if (newChild.nodeType in _nodeTypes_with_children
148 or oldChild.nodeType in _nodeTypes_with_children):
149 _clear_id_cache(self)
150 newChild.nextSibling = oldChild.nextSibling
151 newChild.previousSibling = oldChild.previousSibling
152 oldChild.nextSibling = None
153 oldChild.previousSibling = None
154 if newChild.previousSibling:
155 newChild.previousSibling.nextSibling = newChild
156 if newChild.nextSibling:
157 newChild.nextSibling.previousSibling = newChild
158 return oldChild
159
160 def removeChild(self, oldChild):
161 try:
162 self.childNodes.remove(oldChild)
163 except ValueError:
164 raise xml.dom.NotFoundErr()
165 if oldChild.nextSibling is not None:
166 oldChild.nextSibling.previousSibling = oldChild.previousSibling
167 if oldChild.previousSibling is not None:
168 oldChild.previousSibling.nextSibling = oldChild.nextSibling
169 oldChild.nextSibling = oldChild.previousSibling = None
170 if oldChild.nodeType in _nodeTypes_with_children:
171 _clear_id_cache(self)
172
173 oldChild.parentNode = None
174 return oldChild
175
176 def normalize(self):
177 L = []
178 for child in self.childNodes:
179 if child.nodeType == Node.TEXT_NODE:
180 if not child.data:
181 # empty text node; discard
182 if L:
183 L[-1].nextSibling = child.nextSibling
184 if child.nextSibling:
185 child.nextSibling.previousSibling = child.previousSibling
186 child.unlink()
187 elif L and L[-1].nodeType == child.nodeType:
188 # collapse text node
189 node = L[-1]
190 node.data = node.data + child.data
191 node.nextSibling = child.nextSibling
192 if child.nextSibling:
193 child.nextSibling.previousSibling = node
194 child.unlink()
195 else:
196 L.append(child)
197 else:
198 L.append(child)
199 if child.nodeType == Node.ELEMENT_NODE:
200 child.normalize()
201 self.childNodes[:] = L
202
203 def cloneNode(self, deep):
204 return _clone_node(self, deep, self.ownerDocument or self)
205
206 def isSupported(self, feature, version):
207 return self.ownerDocument.implementation.hasFeature(feature, version)
208
209 def _get_localName(self):
210 # Overridden in Element and Attr where localName can be Non-Null
211 return None
212
213 # Node interfaces from Level 3 (WD 9 April 2002)
214
215 def isSameNode(self, other):
216 return self is other
217
218 def getInterface(self, feature):
219 if self.isSupported(feature, None):
220 return self
221 else:
222 return None
223
224 # The "user data" functions use a dictionary that is only present
225 # if some user data has been set, so be careful not to assume it
226 # exists.
227
228 def getUserData(self, key):
229 try:
230 return self._user_data[key][0]
231 except (AttributeError, KeyError):
232 return None
233
234 def setUserData(self, key, data, handler):
235 old = None
236 try:
237 d = self._user_data
238 except AttributeError:
239 d = {}
240 self._user_data = d
241 if key in d:
242 old = d[key][0]
243 if data is None:
244 # ignore handlers passed for None
245 handler = None
246 if old is not None:
247 del d[key]
248 else:
249 d[key] = (data, handler)
250 return old
251
252 def _call_user_data_handler(self, operation, src, dst):
253 if hasattr(self, "_user_data"):
254 for key, (data, handler) in self._user_data.items():
255 if handler is not None:
256 handler.handle(operation, key, data, src, dst)
257
258 # minidom-specific API:
259
260 def unlink(self):
261 self.parentNode = self.ownerDocument = None
262 if self.childNodes:
263 for child in self.childNodes:
264 child.unlink()
265 self.childNodes = NodeList()
266 self.previousSibling = None
267 self.nextSibling = None
268
269 defproperty(Node, "firstChild", doc="First child node, or None.")
270 defproperty(Node, "lastChild", doc="Last child node, or None.")
271 defproperty(Node, "localName", doc="Namespace-local name of this node.")
272
273
274 def _append_child(self, node):
275 # fast path with less checks; usable by DOM builders if careful
276 childNodes = self.childNodes
277 if childNodes:
278 last = childNodes[-1]
279 node.__dict__["previousSibling"] = last
280 last.__dict__["nextSibling"] = node
281 childNodes.append(node)
282 node.__dict__["parentNode"] = self
283
284 def _in_document(node):
285 # return True iff node is part of a document tree
286 while node is not None:
287 if node.nodeType == Node.DOCUMENT_NODE:
288 return True
289 node = node.parentNode
290 return False
291
292 def _write_data(writer, data):
293 "Writes datachars to writer."
294 if data:
295 data = data.replace("&", "&amp;").replace("<", "&lt;"). \
296 replace("\"", "&quot;").replace(">", "&gt;")
297 writer.write(data)
298
299 def _get_elements_by_tagName_helper(parent, name, rc):
300 for node in parent.childNodes:
301 if node.nodeType == Node.ELEMENT_NODE and \
302 (name == "*" or node.tagName == name):
303 rc.append(node)
304 _get_elements_by_tagName_helper(node, name, rc)
305 return rc
306
307 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
308 for node in parent.childNodes:
309 if node.nodeType == Node.ELEMENT_NODE:
310 if ((localName == "*" or node.localName == localName) and
311 (nsURI == "*" or node.namespaceURI == nsURI)):
312 rc.append(node)
313 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
314 return rc
315
316 class DocumentFragment(Node):
317 nodeType = Node.DOCUMENT_FRAGMENT_NODE
318 nodeName = "#document-fragment"
319 nodeValue = None
320 attributes = None
321 parentNode = None
322 _child_node_types = (Node.ELEMENT_NODE,
323 Node.TEXT_NODE,
324 Node.CDATA_SECTION_NODE,
325 Node.ENTITY_REFERENCE_NODE,
326 Node.PROCESSING_INSTRUCTION_NODE,
327 Node.COMMENT_NODE,
328 Node.NOTATION_NODE)
329
330 def __init__(self):
331 self.childNodes = NodeList()
332
333
334 class Attr(Node):
335 nodeType = Node.ATTRIBUTE_NODE
336 attributes = None
337 ownerElement = None
338 specified = False
339 _is_id = False
340
341 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
342
343 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
344 prefix=None):
345 # skip setattr for performance
346 d = self.__dict__
347 d["nodeName"] = d["name"] = qName
348 d["namespaceURI"] = namespaceURI
349 d["prefix"] = prefix
350 d['childNodes'] = NodeList()
351
352 # Add the single child node that represents the value of the attr
353 self.childNodes.append(Text())
354
355 # nodeValue and value are set elsewhere
356
357 def _get_localName(self):
358 return self.nodeName.split(":", 1)[-1]
359
360 def _get_name(self):
361 return self.name
362
363 def _get_specified(self):
364 return self.specified
365
366 def __setattr__(self, name, value):
367 d = self.__dict__
368 if name in ("value", "nodeValue"):
369 d["value"] = d["nodeValue"] = value
370 d2 = self.childNodes[0].__dict__
371 d2["data"] = d2["nodeValue"] = value
372 if self.ownerElement is not None:
373 _clear_id_cache(self.ownerElement)
374 elif name in ("name", "nodeName"):
375 d["name"] = d["nodeName"] = value
376 if self.ownerElement is not None:
377 _clear_id_cache(self.ownerElement)
378 else:
379 d[name] = value
380
381 def _set_prefix(self, prefix):
382 nsuri = self.namespaceURI
383 if prefix == "xmlns":
384 if nsuri and nsuri != XMLNS_NAMESPACE:
385 raise xml.dom.NamespaceErr(
386 "illegal use of 'xmlns' prefix for the wrong namespace")
387 d = self.__dict__
388 d['prefix'] = prefix
389 if prefix is None:
390 newName = self.localName
391 else:
392 newName = "%s:%s" % (prefix, self.localName)
393 if self.ownerElement:
394 _clear_id_cache(self.ownerElement)
395 d['nodeName'] = d['name'] = newName
396
397 def _set_value(self, value):
398 d = self.__dict__
399 d['value'] = d['nodeValue'] = value
400 if self.ownerElement:
401 _clear_id_cache(self.ownerElement)
402 self.childNodes[0].data = value
403
404 def unlink(self):
405 # This implementation does not call the base implementation
406 # since most of that is not needed, and the expense of the
407 # method call is not warranted. We duplicate the removal of
408 # children, but that's all we needed from the base class.
409 elem = self.ownerElement
410 if elem is not None:
411 del elem._attrs[self.nodeName]
412 del elem._attrsNS[(self.namespaceURI, self.localName)]
413 if self._is_id:
414 self._is_id = False
415 elem._magic_id_nodes -= 1
416 self.ownerDocument._magic_id_count -= 1
417 for child in self.childNodes:
418 child.unlink()
419 del self.childNodes[:]
420
421 def _get_isId(self):
422 if self._is_id:
423 return True
424 doc = self.ownerDocument
425 elem = self.ownerElement
426 if doc is None or elem is None:
427 return False
428
429 info = doc._get_elem_info(elem)
430 if info is None:
431 return False
432 if self.namespaceURI:
433 return info.isIdNS(self.namespaceURI, self.localName)
434 else:
435 return info.isId(self.nodeName)
436
437 def _get_schemaType(self):
438 doc = self.ownerDocument
439 elem = self.ownerElement
440 if doc is None or elem is None:
441 return _no_type
442
443 info = doc._get_elem_info(elem)
444 if info is None:
445 return _no_type
446 if self.namespaceURI:
447 return info.getAttributeTypeNS(self.namespaceURI, self.localName)
448 else:
449 return info.getAttributeType(self.nodeName)
450
451 defproperty(Attr, "isId", doc="True if this attribute is an ID.")
452 defproperty(Attr, "localName", doc="Namespace-local name of this attribute.")
453 defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
454
455
456 class NamedNodeMap(object):
457 """The attribute list is a transient interface to the underlying
458 dictionaries. Mutations here will change the underlying element's
459 dictionary.
460
461 Ordering is imposed artificially and does not reflect the order of
462 attributes as found in an input document.
463 """
464
465 __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
466
467 def __init__(self, attrs, attrsNS, ownerElement):
468 self._attrs = attrs
469 self._attrsNS = attrsNS
470 self._ownerElement = ownerElement
471
472 def _get_length(self):
473 return len(self._attrs)
474
475 def item(self, index):
476 try:
477 return self[self._attrs.keys()[index]]
478 except IndexError:
479 return None
480
481 def items(self):
482 L = []
483 for node in self._attrs.values():
484 L.append((node.nodeName, node.value))
485 return L
486
487 def itemsNS(self):
488 L = []
489 for node in self._attrs.values():
490 L.append(((node.namespaceURI, node.localName), node.value))
491 return L
492
493 def has_key(self, key):
494 if isinstance(key, StringTypes):
495 return key in self._attrs
496 else:
497 return key in self._attrsNS
498
499 def keys(self):
500 return self._attrs.keys()
501
502 def keysNS(self):
503 return self._attrsNS.keys()
504
505 def values(self):
506 return self._attrs.values()
507
508 def get(self, name, value=None):
509 return self._attrs.get(name, value)
510
511 __len__ = _get_length
512
513 __hash__ = None # Mutable type can't be correctly hashed
514 def __cmp__(self, other):
515 if self._attrs is getattr(other, "_attrs", None):
516 return 0
517 else:
518 return cmp(id(self), id(other))
519
520 def __getitem__(self, attname_or_tuple):
521 if isinstance(attname_or_tuple, tuple):
522 return self._attrsNS[attname_or_tuple]
523 else:
524 return self._attrs[attname_or_tuple]
525
526 # same as set
527 def __setitem__(self, attname, value):
528 if isinstance(value, StringTypes):
529 try:
530 node = self._attrs[attname]
531 except KeyError:
532 node = Attr(attname)
533 node.ownerDocument = self._ownerElement.ownerDocument
534 self.setNamedItem(node)
535 node.value = value
536 else:
537 if not isinstance(value, Attr):
538 raise TypeError, "value must be a string or Attr object"
539 node = value
540 self.setNamedItem(node)
541
542 def getNamedItem(self, name):
543 try:
544 return self._attrs[name]
545 except KeyError:
546 return None
547
548 def getNamedItemNS(self, namespaceURI, localName):
549 try:
550 return self._attrsNS[(namespaceURI, localName)]
551 except KeyError:
552 return None
553
554 def removeNamedItem(self, name):
555 n = self.getNamedItem(name)
556 if n is not None:
557 _clear_id_cache(self._ownerElement)
558 del self._attrs[n.nodeName]
559 del self._attrsNS[(n.namespaceURI, n.localName)]
560 if 'ownerElement' in n.__dict__:
561 n.__dict__['ownerElement'] = None
562 return n
563 else:
564 raise xml.dom.NotFoundErr()
565
566 def removeNamedItemNS(self, namespaceURI, localName):
567 n = self.getNamedItemNS(namespaceURI, localName)
568 if n is not None:
569 _clear_id_cache(self._ownerElement)
570 del self._attrsNS[(n.namespaceURI, n.localName)]
571 del self._attrs[n.nodeName]
572 if 'ownerElement' in n.__dict__:
573 n.__dict__['ownerElement'] = None
574 return n
575 else:
576 raise xml.dom.NotFoundErr()
577
578 def setNamedItem(self, node):
579 if not isinstance(node, Attr):
580 raise xml.dom.HierarchyRequestErr(
581 "%s cannot be child of %s" % (repr(node), repr(self)))
582 old = self._attrs.get(node.name)
583 if old:
584 old.unlink()
585 self._attrs[node.name] = node
586 self._attrsNS[(node.namespaceURI, node.localName)] = node
587 node.ownerElement = self._ownerElement
588 _clear_id_cache(node.ownerElement)
589 return old
590
591 def setNamedItemNS(self, node):
592 return self.setNamedItem(node)
593
594 def __delitem__(self, attname_or_tuple):
595 node = self[attname_or_tuple]
596 _clear_id_cache(node.ownerElement)
597 node.unlink()
598
599 def __getstate__(self):
600 return self._attrs, self._attrsNS, self._ownerElement
601
602 def __setstate__(self, state):
603 self._attrs, self._attrsNS, self._ownerElement = state
604
605 defproperty(NamedNodeMap, "length",
606 doc="Number of nodes in the NamedNodeMap.")
607
608 AttributeList = NamedNodeMap
609
610
611 class TypeInfo(object):
612 __slots__ = 'namespace', 'name'
613
614 def __init__(self, namespace, name):
615 self.namespace = namespace
616 self.name = name
617
618 def __repr__(self):
619 if self.namespace:
620 return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
621 else:
622 return "<TypeInfo %r>" % self.name
623
624 def _get_name(self):
625 return self.name
626
627 def _get_namespace(self):
628 return self.namespace
629
630 _no_type = TypeInfo(None, None)
631
632 class Element(Node):
633 nodeType = Node.ELEMENT_NODE
634 nodeValue = None
635 schemaType = _no_type
636
637 _magic_id_nodes = 0
638
639 _child_node_types = (Node.ELEMENT_NODE,
640 Node.PROCESSING_INSTRUCTION_NODE,
641 Node.COMMENT_NODE,
642 Node.TEXT_NODE,
643 Node.CDATA_SECTION_NODE,
644 Node.ENTITY_REFERENCE_NODE)
645
646 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
647 localName=None):
648 self.tagName = self.nodeName = tagName
649 self.prefix = prefix
650 self.namespaceURI = namespaceURI
651 self.childNodes = NodeList()
652
653 self._attrs = {} # attributes are double-indexed:
654 self._attrsNS = {} # tagName -> Attribute
655 # URI,localName -> Attribute
656 # in the future: consider lazy generation
657 # of attribute objects this is too tricky
658 # for now because of headaches with
659 # namespaces.
660
661 def _get_localName(self):
662 return self.tagName.split(":", 1)[-1]
663
664 def _get_tagName(self):
665 return self.tagName
666
667 def unlink(self):
668 for attr in self._attrs.values():
669 attr.unlink()
670 self._attrs = None
671 self._attrsNS = None
672 Node.unlink(self)
673
674 def getAttribute(self, attname):
675 try:
676 return self._attrs[attname].value
677 except KeyError:
678 return ""
679
680 def getAttributeNS(self, namespaceURI, localName):
681 try:
682 return self._attrsNS[(namespaceURI, localName)].value
683 except KeyError:
684 return ""
685
686 def setAttribute(self, attname, value):
687 attr = self.getAttributeNode(attname)
688 if attr is None:
689 attr = Attr(attname)
690 # for performance
691 d = attr.__dict__
692 d["value"] = d["nodeValue"] = value
693 d["ownerDocument"] = self.ownerDocument
694 self.setAttributeNode(attr)
695 elif value != attr.value:
696 d = attr.__dict__
697 d["value"] = d["nodeValue"] = value
698 if attr.isId:
699 _clear_id_cache(self)
700
701 def setAttributeNS(self, namespaceURI, qualifiedName, value):
702 prefix, localname = _nssplit(qualifiedName)
703 attr = self.getAttributeNodeNS(namespaceURI, localname)
704 if attr is None:
705 # for performance
706 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
707 d = attr.__dict__
708 d["prefix"] = prefix
709 d["nodeName"] = qualifiedName
710 d["value"] = d["nodeValue"] = value
711 d["ownerDocument"] = self.ownerDocument
712 self.setAttributeNode(attr)
713 else:
714 d = attr.__dict__
715 if value != attr.value:
716 d["value"] = d["nodeValue"] = value
717 if attr.isId:
718 _clear_id_cache(self)
719 if attr.prefix != prefix:
720 d["prefix"] = prefix
721 d["nodeName"] = qualifiedName
722
723 def getAttributeNode(self, attrname):
724 return self._attrs.get(attrname)
725
726 def getAttributeNodeNS(self, namespaceURI, localName):
727 return self._attrsNS.get((namespaceURI, localName))
728
729 def setAttributeNode(self, attr):
730 if attr.ownerElement not in (None, self):
731 raise xml.dom.InuseAttributeErr("attribute node already owned")
732 old1 = self._attrs.get(attr.name, None)
733 if old1 is not None:
734 self.removeAttributeNode(old1)
735 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
736 if old2 is not None and old2 is not old1:
737 self.removeAttributeNode(old2)
738 _set_attribute_node(self, attr)
739
740 if old1 is not attr:
741 # It might have already been part of this node, in which case
742 # it doesn't represent a change, and should not be returned.
743 return old1
744 if old2 is not attr:
745 return old2
746
747 setAttributeNodeNS = setAttributeNode
748
749 def removeAttribute(self, name):
750 try:
751 attr = self._attrs[name]
752 except KeyError:
753 raise xml.dom.NotFoundErr()
754 self.removeAttributeNode(attr)
755
756 def removeAttributeNS(self, namespaceURI, localName):
757 try:
758 attr = self._attrsNS[(namespaceURI, localName)]
759 except KeyError:
760 raise xml.dom.NotFoundErr()
761 self.removeAttributeNode(attr)
762
763 def removeAttributeNode(self, node):
764 if node is None:
765 raise xml.dom.NotFoundErr()
766 try:
767 self._attrs[node.name]
768 except KeyError:
769 raise xml.dom.NotFoundErr()
770 _clear_id_cache(self)
771 node.unlink()
772 # Restore this since the node is still useful and otherwise
773 # unlinked
774 node.ownerDocument = self.ownerDocument
775
776 removeAttributeNodeNS = removeAttributeNode
777
778 def hasAttribute(self, name):
779 return name in self._attrs
780
781 def hasAttributeNS(self, namespaceURI, localName):
782 return (namespaceURI, localName) in self._attrsNS
783
784 def getElementsByTagName(self, name):
785 return _get_elements_by_tagName_helper(self, name, NodeList())
786
787 def getElementsByTagNameNS(self, namespaceURI, localName):
788 return _get_elements_by_tagName_ns_helper(
789 self, namespaceURI, localName, NodeList())
790
791 def __repr__(self):
792 return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
793
794 def writexml(self, writer, indent="", addindent="", newl=""):
795 # indent = current indentation
796 # addindent = indentation to add to higher levels
797 # newl = newline string
798 writer.write(indent+"<" + self.tagName)
799
800 attrs = self._get_attributes()
801 a_names = attrs.keys()
802 a_names.sort()
803
804 for a_name in a_names:
805 writer.write(" %s=\"" % a_name)
806 _write_data(writer, attrs[a_name].value)
807 writer.write("\"")
808 if self.childNodes:
809 writer.write(">")
810 if (len(self.childNodes) == 1 and
811 self.childNodes[0].nodeType == Node.TEXT_NODE):
812 self.childNodes[0].writexml(writer, '', '', '')
813 else:
814 writer.write(newl)
815 for node in self.childNodes:
816 node.writexml(writer, indent+addindent, addindent, newl)
817 writer.write(indent)
818 writer.write("</%s>%s" % (self.tagName, newl))
819 else:
820 writer.write("/>%s"%(newl))
821
822 def _get_attributes(self):
823 return NamedNodeMap(self._attrs, self._attrsNS, self)
824
825 def hasAttributes(self):
826 if self._attrs:
827 return True
828 else:
829 return False
830
831 # DOM Level 3 attributes, based on the 22 Oct 2002 draft
832
833 def setIdAttribute(self, name):
834 idAttr = self.getAttributeNode(name)
835 self.setIdAttributeNode(idAttr)
836
837 def setIdAttributeNS(self, namespaceURI, localName):
838 idAttr = self.getAttributeNodeNS(namespaceURI, localName)
839 self.setIdAttributeNode(idAttr)
840
841 def setIdAttributeNode(self, idAttr):
842 if idAttr is None or not self.isSameNode(idAttr.ownerElement):
843 raise xml.dom.NotFoundErr()
844 if _get_containing_entref(self) is not None:
845 raise xml.dom.NoModificationAllowedErr()
846 if not idAttr._is_id:
847 idAttr.__dict__['_is_id'] = True
848 self._magic_id_nodes += 1
849 self.ownerDocument._magic_id_count += 1
850 _clear_id_cache(self)
851
852 defproperty(Element, "attributes",
853 doc="NamedNodeMap of attributes on the element.")
854 defproperty(Element, "localName",
855 doc="Namespace-local name of this element.")
856
857
858 def _set_attribute_node(element, attr):
859 _clear_id_cache(element)
860 element._attrs[attr.name] = attr
861 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
862
863 # This creates a circular reference, but Element.unlink()
864 # breaks the cycle since the references to the attribute
865 # dictionaries are tossed.
866 attr.__dict__['ownerElement'] = element
867
868
869 class Childless:
870 """Mixin that makes childless-ness easy to implement and avoids
871 the complexity of the Node methods that deal with children.
872 """
873
874 attributes = None
875 childNodes = EmptyNodeList()
876 firstChild = None
877 lastChild = None
878
879 def _get_firstChild(self):
880 return None
881
882 def _get_lastChild(self):
883 return None
884
885 def appendChild(self, node):
886 raise xml.dom.HierarchyRequestErr(
887 self.nodeName + " nodes cannot have children")
888
889 def hasChildNodes(self):
890 return False
891
892 def insertBefore(self, newChild, refChild):
893 raise xml.dom.HierarchyRequestErr(
894 self.nodeName + " nodes do not have children")
895
896 def removeChild(self, oldChild):
897 raise xml.dom.NotFoundErr(
898 self.nodeName + " nodes do not have children")
899
900 def normalize(self):
901 # For childless nodes, normalize() has nothing to do.
902 pass
903
904 def replaceChild(self, newChild, oldChild):
905 raise xml.dom.HierarchyRequestErr(
906 self.nodeName + " nodes do not have children")
907
908
909 class ProcessingInstruction(Childless, Node):
910 nodeType = Node.PROCESSING_INSTRUCTION_NODE
911
912 def __init__(self, target, data):
913 self.target = self.nodeName = target
914 self.data = self.nodeValue = data
915
916 def _get_data(self):
917 return self.data
918 def _set_data(self, value):
919 d = self.__dict__
920 d['data'] = d['nodeValue'] = value
921
922 def _get_target(self):
923 return self.target
924 def _set_target(self, value):
925 d = self.__dict__
926 d['target'] = d['nodeName'] = value
927
928 def __setattr__(self, name, value):
929 if name == "data" or name == "nodeValue":
930 self.__dict__['data'] = self.__dict__['nodeValue'] = value
931 elif name == "target" or name == "nodeName":
932 self.__dict__['target'] = self.__dict__['nodeName'] = value
933 else:
934 self.__dict__[name] = value
935
936 def writexml(self, writer, indent="", addindent="", newl=""):
937 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
938
939
940 class CharacterData(Childless, Node):
941 def _get_length(self):
942 return len(self.data)
943 __len__ = _get_length
944
945 def _get_data(self):
946 return self.__dict__['data']
947 def _set_data(self, data):
948 d = self.__dict__
949 d['data'] = d['nodeValue'] = data
950
951 _get_nodeValue = _get_data
952 _set_nodeValue = _set_data
953
954 def __setattr__(self, name, value):
955 if name == "data" or name == "nodeValue":
956 self.__dict__['data'] = self.__dict__['nodeValue'] = value
957 else:
958 self.__dict__[name] = value
959
960 def __repr__(self):
961 data = self.data
962 if len(data) > 10:
963 dotdotdot = "..."
964 else:
965 dotdotdot = ""
966 return '<DOM %s node "%r%s">' % (
967 self.__class__.__name__, data[0:10], dotdotdot)
968
969 def substringData(self, offset, count):
970 if offset < 0:
971 raise xml.dom.IndexSizeErr("offset cannot be negative")
972 if offset >= len(self.data):
973 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
974 if count < 0:
975 raise xml.dom.IndexSizeErr("count cannot be negative")
976 return self.data[offset:offset+count]
977
978 def appendData(self, arg):
979 self.data = self.data + arg
980
981 def insertData(self, offset, arg):
982 if offset < 0:
983 raise xml.dom.IndexSizeErr("offset cannot be negative")
984 if offset >= len(self.data):
985 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
986 if arg:
987 self.data = "%s%s%s" % (
988 self.data[:offset], arg, self.data[offset:])
989
990 def deleteData(self, offset, count):
991 if offset < 0:
992 raise xml.dom.IndexSizeErr("offset cannot be negative")
993 if offset >= len(self.data):
994 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
995 if count < 0:
996 raise xml.dom.IndexSizeErr("count cannot be negative")
997 if count:
998 self.data = self.data[:offset] + self.data[offset+count:]
999
1000 def replaceData(self, offset, count, arg):
1001 if offset < 0:
1002 raise xml.dom.IndexSizeErr("offset cannot be negative")
1003 if offset >= len(self.data):
1004 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1005 if count < 0:
1006 raise xml.dom.IndexSizeErr("count cannot be negative")
1007 if count:
1008 self.data = "%s%s%s" % (
1009 self.data[:offset], arg, self.data[offset+count:])
1010
1011 defproperty(CharacterData, "length", doc="Length of the string data.")
1012
1013
1014 class Text(CharacterData):
1015 # Make sure we don't add an instance __dict__ if we don't already
1016 # have one, at least when that's possible:
1017 # XXX this does not work, CharacterData is an old-style class
1018 # __slots__ = ()
1019
1020 nodeType = Node.TEXT_NODE
1021 nodeName = "#text"
1022 attributes = None
1023
1024 def splitText(self, offset):
1025 if offset < 0 or offset > len(self.data):
1026 raise xml.dom.IndexSizeErr("illegal offset value")
1027 newText = self.__class__()
1028 newText.data = self.data[offset:]
1029 newText.ownerDocument = self.ownerDocument
1030 next = self.nextSibling
1031 if self.parentNode and self in self.parentNode.childNodes:
1032 if next is None:
1033 self.parentNode.appendChild(newText)
1034 else:
1035 self.parentNode.insertBefore(newText, next)
1036 self.data = self.data[:offset]
1037 return newText
1038
1039 def writexml(self, writer, indent="", addindent="", newl=""):
1040 _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1041
1042 # DOM Level 3 (WD 9 April 2002)
1043
1044 def _get_wholeText(self):
1045 L = [self.data]
1046 n = self.previousSibling
1047 while n is not None:
1048 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1049 L.insert(0, n.data)
1050 n = n.previousSibling
1051 else:
1052 break
1053 n = self.nextSibling
1054 while n is not None:
1055 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1056 L.append(n.data)
1057 n = n.nextSibling
1058 else:
1059 break
1060 return ''.join(L)
1061
1062 def replaceWholeText(self, content):
1063 # XXX This needs to be seriously changed if minidom ever
1064 # supports EntityReference nodes.
1065 parent = self.parentNode
1066 n = self.previousSibling
1067 while n is not None:
1068 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1069 next = n.previousSibling
1070 parent.removeChild(n)
1071 n = next
1072 else:
1073 break
1074 n = self.nextSibling
1075 if not content:
1076 parent.removeChild(self)
1077 while n is not None:
1078 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1079 next = n.nextSibling
1080 parent.removeChild(n)
1081 n = next
1082 else:
1083 break
1084 if content:
1085 d = self.__dict__
1086 d['data'] = content
1087 d['nodeValue'] = content
1088 return self
1089 else:
1090 return None
1091
1092 def _get_isWhitespaceInElementContent(self):
1093 if self.data.strip():
1094 return False
1095 elem = _get_containing_element(self)
1096 if elem is None:
1097 return False
1098 info = self.ownerDocument._get_elem_info(elem)
1099 if info is None:
1100 return False
1101 else:
1102 return info.isElementContent()
1103
1104 defproperty(Text, "isWhitespaceInElementContent",
1105 doc="True iff this text node contains only whitespace"
1106 " and is in element content.")
1107 defproperty(Text, "wholeText",
1108 doc="The text of all logically-adjacent text nodes.")
1109
1110
1111 def _get_containing_element(node):
1112 c = node.parentNode
1113 while c is not None:
1114 if c.nodeType == Node.ELEMENT_NODE:
1115 return c
1116 c = c.parentNode
1117 return None
1118
1119 def _get_containing_entref(node):
1120 c = node.parentNode
1121 while c is not None:
1122 if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1123 return c
1124 c = c.parentNode
1125 return None
1126
1127
1128 class Comment(Childless, CharacterData):
1129 nodeType = Node.COMMENT_NODE
1130 nodeName = "#comment"
1131
1132 def __init__(self, data):
1133 self.data = self.nodeValue = data
1134
1135 def writexml(self, writer, indent="", addindent="", newl=""):
1136 if "--" in self.data:
1137 raise ValueError("'--' is not allowed in a comment node")
1138 writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1139
1140
1141 class CDATASection(Text):
1142 # Make sure we don't add an instance __dict__ if we don't already
1143 # have one, at least when that's possible:
1144 # XXX this does not work, Text is an old-style class
1145 # __slots__ = ()
1146
1147 nodeType = Node.CDATA_SECTION_NODE
1148 nodeName = "#cdata-section"
1149
1150 def writexml(self, writer, indent="", addindent="", newl=""):
1151 if self.data.find("]]>") >= 0:
1152 raise ValueError("']]>' not allowed in a CDATA section")
1153 writer.write("<![CDATA[%s]]>" % self.data)
1154
1155
1156 class ReadOnlySequentialNamedNodeMap(object):
1157 __slots__ = '_seq',
1158
1159 def __init__(self, seq=()):
1160 # seq should be a list or tuple
1161 self._seq = seq
1162
1163 def __len__(self):
1164 return len(self._seq)
1165
1166 def _get_length(self):
1167 return len(self._seq)
1168
1169 def getNamedItem(self, name):
1170 for n in self._seq:
1171 if n.nodeName == name:
1172 return n
1173
1174 def getNamedItemNS(self, namespaceURI, localName):
1175 for n in self._seq:
1176 if n.namespaceURI == namespaceURI and n.localName == localName:
1177 return n
1178
1179 def __getitem__(self, name_or_tuple):
1180 if isinstance(name_or_tuple, tuple):
1181 node = self.getNamedItemNS(*name_or_tuple)
1182 else:
1183 node = self.getNamedItem(name_or_tuple)
1184 if node is None:
1185 raise KeyError, name_or_tuple
1186 return node
1187
1188 def item(self, index):
1189 if index < 0:
1190 return None
1191 try:
1192 return self._seq[index]
1193 except IndexError:
1194 return None
1195
1196 def removeNamedItem(self, name):
1197 raise xml.dom.NoModificationAllowedErr(
1198 "NamedNodeMap instance is read-only")
1199
1200 def removeNamedItemNS(self, namespaceURI, localName):
1201 raise xml.dom.NoModificationAllowedErr(
1202 "NamedNodeMap instance is read-only")
1203
1204 def setNamedItem(self, node):
1205 raise xml.dom.NoModificationAllowedErr(
1206 "NamedNodeMap instance is read-only")
1207
1208 def setNamedItemNS(self, node):
1209 raise xml.dom.NoModificationAllowedErr(
1210 "NamedNodeMap instance is read-only")
1211
1212 def __getstate__(self):
1213 return [self._seq]
1214
1215 def __setstate__(self, state):
1216 self._seq = state[0]
1217
1218 defproperty(ReadOnlySequentialNamedNodeMap, "length",
1219 doc="Number of entries in the NamedNodeMap.")
1220
1221
1222 class Identified:
1223 """Mix-in class that supports the publicId and systemId attributes."""
1224
1225 # XXX this does not work, this is an old-style class
1226 # __slots__ = 'publicId', 'systemId'
1227
1228 def _identified_mixin_init(self, publicId, systemId):
1229 self.publicId = publicId
1230 self.systemId = systemId
1231
1232 def _get_publicId(self):
1233 return self.publicId
1234
1235 def _get_systemId(self):
1236 return self.systemId
1237
1238 class DocumentType(Identified, Childless, Node):
1239 nodeType = Node.DOCUMENT_TYPE_NODE
1240 nodeValue = None
1241 name = None
1242 publicId = None
1243 systemId = None
1244 internalSubset = None
1245
1246 def __init__(self, qualifiedName):
1247 self.entities = ReadOnlySequentialNamedNodeMap()
1248 self.notations = ReadOnlySequentialNamedNodeMap()
1249 if qualifiedName:
1250 prefix, localname = _nssplit(qualifiedName)
1251 self.name = localname
1252 self.nodeName = self.name
1253
1254 def _get_internalSubset(self):
1255 return self.internalSubset
1256
1257 def cloneNode(self, deep):
1258 if self.ownerDocument is None:
1259 # it's ok
1260 clone = DocumentType(None)
1261 clone.name = self.name
1262 clone.nodeName = self.name
1263 operation = xml.dom.UserDataHandler.NODE_CLONED
1264 if deep:
1265 clone.entities._seq = []
1266 clone.notations._seq = []
1267 for n in self.notations._seq:
1268 notation = Notation(n.nodeName, n.publicId, n.systemId)
1269 clone.notations._seq.append(notation)
1270 n._call_user_data_handler(operation, n, notation)
1271 for e in self.entities._seq:
1272 entity = Entity(e.nodeName, e.publicId, e.systemId,
1273 e.notationName)
1274 entity.actualEncoding = e.actualEncoding
1275 entity.encoding = e.encoding
1276 entity.version = e.version
1277 clone.entities._seq.append(entity)
1278 e._call_user_data_handler(operation, n, entity)
1279 self._call_user_data_handler(operation, self, clone)
1280 return clone
1281 else:
1282 return None
1283
1284 def writexml(self, writer, indent="", addindent="", newl=""):
1285 writer.write("<!DOCTYPE ")
1286 writer.write(self.name)
1287 if self.publicId:
1288 writer.write("%s PUBLIC '%s'%s '%s'"
1289 % (newl, self.publicId, newl, self.systemId))
1290 elif self.systemId:
1291 writer.write("%s SYSTEM '%s'" % (newl, self.systemId))
1292 if self.internalSubset is not None:
1293 writer.write(" [")
1294 writer.write(self.internalSubset)
1295 writer.write("]")
1296 writer.write(">"+newl)
1297
1298 class Entity(Identified, Node):
1299 attributes = None
1300 nodeType = Node.ENTITY_NODE
1301 nodeValue = None
1302
1303 actualEncoding = None
1304 encoding = None
1305 version = None
1306
1307 def __init__(self, name, publicId, systemId, notation):
1308 self.nodeName = name
1309 self.notationName = notation
1310 self.childNodes = NodeList()
1311 self._identified_mixin_init(publicId, systemId)
1312
1313 def _get_actualEncoding(self):
1314 return self.actualEncoding
1315
1316 def _get_encoding(self):
1317 return self.encoding
1318
1319 def _get_version(self):
1320 return self.version
1321
1322 def appendChild(self, newChild):
1323 raise xml.dom.HierarchyRequestErr(
1324 "cannot append children to an entity node")
1325
1326 def insertBefore(self, newChild, refChild):
1327 raise xml.dom.HierarchyRequestErr(
1328 "cannot insert children below an entity node")
1329
1330 def removeChild(self, oldChild):
1331 raise xml.dom.HierarchyRequestErr(
1332 "cannot remove children from an entity node")
1333
1334 def replaceChild(self, newChild, oldChild):
1335 raise xml.dom.HierarchyRequestErr(
1336 "cannot replace children of an entity node")
1337
1338 class Notation(Identified, Childless, Node):
1339 nodeType = Node.NOTATION_NODE
1340 nodeValue = None
1341
1342 def __init__(self, name, publicId, systemId):
1343 self.nodeName = name
1344 self._identified_mixin_init(publicId, systemId)
1345
1346
1347 class DOMImplementation(DOMImplementationLS):
1348 _features = [("core", "1.0"),
1349 ("core", "2.0"),
1350 ("core", None),
1351 ("xml", "1.0"),
1352 ("xml", "2.0"),
1353 ("xml", None),
1354 ("ls-load", "3.0"),
1355 ("ls-load", None),
1356 ]
1357
1358 def hasFeature(self, feature, version):
1359 if version == "":
1360 version = None
1361 return (feature.lower(), version) in self._features
1362
1363 def createDocument(self, namespaceURI, qualifiedName, doctype):
1364 if doctype and doctype.parentNode is not None:
1365 raise xml.dom.WrongDocumentErr(
1366 "doctype object owned by another DOM tree")
1367 doc = self._create_document()
1368
1369 add_root_element = not (namespaceURI is None
1370 and qualifiedName is None
1371 and doctype is None)
1372
1373 if not qualifiedName and add_root_element:
1374 # The spec is unclear what to raise here; SyntaxErr
1375 # would be the other obvious candidate. Since Xerces raises
1376 # InvalidCharacterErr, and since SyntaxErr is not listed
1377 # for createDocument, that seems to be the better choice.
1378 # XXX: need to check for illegal characters here and in
1379 # createElement.
1380
1381 # DOM Level III clears this up when talking about the return value
1382 # of this function. If namespaceURI, qName and DocType are
1383 # Null the document is returned without a document element
1384 # Otherwise if doctype or namespaceURI are not None
1385 # Then we go back to the above problem
1386 raise xml.dom.InvalidCharacterErr("Element with no name")
1387
1388 if add_root_element:
1389 prefix, localname = _nssplit(qualifiedName)
1390 if prefix == "xml" \
1391 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1392 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1393 if prefix and not namespaceURI:
1394 raise xml.dom.NamespaceErr(
1395 "illegal use of prefix without namespaces")
1396 element = doc.createElementNS(namespaceURI, qualifiedName)
1397 if doctype:
1398 doc.appendChild(doctype)
1399 doc.appendChild(element)
1400
1401 if doctype:
1402 doctype.parentNode = doctype.ownerDocument = doc
1403
1404 doc.doctype = doctype
1405 doc.implementation = self
1406 return doc
1407
1408 def createDocumentType(self, qualifiedName, publicId, systemId):
1409 doctype = DocumentType(qualifiedName)
1410 doctype.publicId = publicId
1411 doctype.systemId = systemId
1412 return doctype
1413
1414 # DOM Level 3 (WD 9 April 2002)
1415
1416 def getInterface(self, feature):
1417 if self.hasFeature(feature, None):
1418 return self
1419 else:
1420 return None
1421
1422 # internal
1423 def _create_document(self):
1424 return Document()
1425
1426 class ElementInfo(object):
1427 """Object that represents content-model information for an element.
1428
1429 This implementation is not expected to be used in practice; DOM
1430 builders should provide implementations which do the right thing
1431 using information available to it.
1432
1433 """
1434
1435 __slots__ = 'tagName',
1436
1437 def __init__(self, name):
1438 self.tagName = name
1439
1440 def getAttributeType(self, aname):
1441 return _no_type
1442
1443 def getAttributeTypeNS(self, namespaceURI, localName):
1444 return _no_type
1445
1446 def isElementContent(self):
1447 return False
1448
1449 def isEmpty(self):
1450 """Returns true iff this element is declared to have an EMPTY
1451 content model."""
1452 return False
1453
1454 def isId(self, aname):
1455 """Returns true iff the named attribute is a DTD-style ID."""
1456 return False
1457
1458 def isIdNS(self, namespaceURI, localName):
1459 """Returns true iff the identified attribute is a DTD-style ID."""
1460 return False
1461
1462 def __getstate__(self):
1463 return self.tagName
1464
1465 def __setstate__(self, state):
1466 self.tagName = state
1467
1468 def _clear_id_cache(node):
1469 if node.nodeType == Node.DOCUMENT_NODE:
1470 node._id_cache.clear()
1471 node._id_search_stack = None
1472 elif _in_document(node):
1473 node.ownerDocument._id_cache.clear()
1474 node.ownerDocument._id_search_stack= None
1475
1476 class Document(Node, DocumentLS):
1477 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1478 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1479
1480 nodeType = Node.DOCUMENT_NODE
1481 nodeName = "#document"
1482 nodeValue = None
1483 attributes = None
1484 doctype = None
1485 parentNode = None
1486 previousSibling = nextSibling = None
1487
1488 implementation = DOMImplementation()
1489
1490 # Document attributes from Level 3 (WD 9 April 2002)
1491
1492 actualEncoding = None
1493 encoding = None
1494 standalone = None
1495 version = None
1496 strictErrorChecking = False
1497 errorHandler = None
1498 documentURI = None
1499
1500 _magic_id_count = 0
1501
1502 def __init__(self):
1503 self.childNodes = NodeList()
1504 # mapping of (namespaceURI, localName) -> ElementInfo
1505 # and tagName -> ElementInfo
1506 self._elem_info = {}
1507 self._id_cache = {}
1508 self._id_search_stack = None
1509
1510 def _get_elem_info(self, element):
1511 if element.namespaceURI:
1512 key = element.namespaceURI, element.localName
1513 else:
1514 key = element.tagName
1515 return self._elem_info.get(key)
1516
1517 def _get_actualEncoding(self):
1518 return self.actualEncoding
1519
1520 def _get_doctype(self):
1521 return self.doctype
1522
1523 def _get_documentURI(self):
1524 return self.documentURI
1525
1526 def _get_encoding(self):
1527 return self.encoding
1528
1529 def _get_errorHandler(self):
1530 return self.errorHandler
1531
1532 def _get_standalone(self):
1533 return self.standalone
1534
1535 def _get_strictErrorChecking(self):
1536 return self.strictErrorChecking
1537
1538 def _get_version(self):
1539 return self.version
1540
1541 def appendChild(self, node):
1542 if node.nodeType not in self._child_node_types:
1543 raise xml.dom.HierarchyRequestErr(
1544 "%s cannot be child of %s" % (repr(node), repr(self)))
1545 if node.parentNode is not None:
1546 # This needs to be done before the next test since this
1547 # may *be* the document element, in which case it should
1548 # end up re-ordered to the end.
1549 node.parentNode.removeChild(node)
1550
1551 if node.nodeType == Node.ELEMENT_NODE \
1552 and self._get_documentElement():
1553 raise xml.dom.HierarchyRequestErr(
1554 "two document elements disallowed")
1555 return Node.appendChild(self, node)
1556
1557 def removeChild(self, oldChild):
1558 try:
1559 self.childNodes.remove(oldChild)
1560 except ValueError:
1561 raise xml.dom.NotFoundErr()
1562 oldChild.nextSibling = oldChild.previousSibling = None
1563 oldChild.parentNode = None
1564 if self.documentElement is oldChild:
1565 self.documentElement = None
1566
1567 return oldChild
1568
1569 def _get_documentElement(self):
1570 for node in self.childNodes:
1571 if node.nodeType == Node.ELEMENT_NODE:
1572 return node
1573
1574 def unlink(self):
1575 if self.doctype is not None:
1576 self.doctype.unlink()
1577 self.doctype = None
1578 Node.unlink(self)
1579
1580 def cloneNode(self, deep):
1581 if not deep:
1582 return None
1583 clone = self.implementation.createDocument(None, None, None)
1584 clone.encoding = self.encoding
1585 clone.standalone = self.standalone
1586 clone.version = self.version
1587 for n in self.childNodes:
1588 childclone = _clone_node(n, deep, clone)
1589 assert childclone.ownerDocument.isSameNode(clone)
1590 clone.childNodes.append(childclone)
1591 if childclone.nodeType == Node.DOCUMENT_NODE:
1592 assert clone.documentElement is None
1593 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1594 assert clone.doctype is None
1595 clone.doctype = childclone
1596 childclone.parentNode = clone
1597 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1598 self, clone)
1599 return clone
1600
1601 def createDocumentFragment(self):
1602 d = DocumentFragment()
1603 d.ownerDocument = self
1604 return d
1605
1606 def createElement(self, tagName):
1607 e = Element(tagName)
1608 e.ownerDocument = self
1609 return e
1610
1611 def createTextNode(self, data):
1612 if not isinstance(data, StringTypes):
1613 raise TypeError, "node contents must be a string"
1614 t = Text()
1615 t.data = data
1616 t.ownerDocument = self
1617 return t
1618
1619 def createCDATASection(self, data):
1620 if not isinstance(data, StringTypes):
1621 raise TypeError, "node contents must be a string"
1622 c = CDATASection()
1623 c.data = data
1624 c.ownerDocument = self
1625 return c
1626
1627 def createComment(self, data):
1628 c = Comment(data)
1629 c.ownerDocument = self
1630 return c
1631
1632 def createProcessingInstruction(self, target, data):
1633 p = ProcessingInstruction(target, data)
1634 p.ownerDocument = self
1635 return p
1636
1637 def createAttribute(self, qName):
1638 a = Attr(qName)
1639 a.ownerDocument = self
1640 a.value = ""
1641 return a
1642
1643 def createElementNS(self, namespaceURI, qualifiedName):
1644 prefix, localName = _nssplit(qualifiedName)
1645 e = Element(qualifiedName, namespaceURI, prefix)
1646 e.ownerDocument = self
1647 return e
1648
1649 def createAttributeNS(self, namespaceURI, qualifiedName):
1650 prefix, localName = _nssplit(qualifiedName)
1651 a = Attr(qualifiedName, namespaceURI, localName, prefix)
1652 a.ownerDocument = self
1653 a.value = ""
1654 return a
1655
1656 # A couple of implementation-specific helpers to create node types
1657 # not supported by the W3C DOM specs:
1658
1659 def _create_entity(self, name, publicId, systemId, notationName):
1660 e = Entity(name, publicId, systemId, notationName)
1661 e.ownerDocument = self
1662 return e
1663
1664 def _create_notation(self, name, publicId, systemId):
1665 n = Notation(name, publicId, systemId)
1666 n.ownerDocument = self
1667 return n
1668
1669 def getElementById(self, id):
1670 if id in self._id_cache:
1671 return self._id_cache[id]
1672 if not (self._elem_info or self._magic_id_count):
1673 return None
1674
1675 stack = self._id_search_stack
1676 if stack is None:
1677 # we never searched before, or the cache has been cleared
1678 stack = [self.documentElement]
1679 self._id_search_stack = stack
1680 elif not stack:
1681 # Previous search was completed and cache is still valid;
1682 # no matching node.
1683 return None
1684
1685 result = None
1686 while stack:
1687 node = stack.pop()
1688 # add child elements to stack for continued searching
1689 stack.extend([child for child in node.childNodes
1690 if child.nodeType in _nodeTypes_with_children])
1691 # check this node
1692 info = self._get_elem_info(node)
1693 if info:
1694 # We have to process all ID attributes before
1695 # returning in order to get all the attributes set to
1696 # be IDs using Element.setIdAttribute*().
1697 for attr in node.attributes.values():
1698 if attr.namespaceURI:
1699 if info.isIdNS(attr.namespaceURI, attr.localName):
1700 self._id_cache[attr.value] = node
1701 if attr.value == id:
1702 result = node
1703 elif not node._magic_id_nodes:
1704 break
1705 elif info.isId(attr.name):
1706 self._id_cache[attr.value] = node
1707 if attr.value == id:
1708 result = node
1709 elif not node._magic_id_nodes:
1710 break
1711 elif attr._is_id:
1712 self._id_cache[attr.value] = node
1713 if attr.value == id:
1714 result = node
1715 elif node._magic_id_nodes == 1:
1716 break
1717 elif node._magic_id_nodes:
1718 for attr in node.attributes.values():
1719 if attr._is_id:
1720 self._id_cache[attr.value] = node
1721 if attr.value == id:
1722 result = node
1723 if result is not None:
1724 break
1725 return result
1726
1727 def getElementsByTagName(self, name):
1728 return _get_elements_by_tagName_helper(self, name, NodeList())
1729
1730 def getElementsByTagNameNS(self, namespaceURI, localName):
1731 return _get_elements_by_tagName_ns_helper(
1732 self, namespaceURI, localName, NodeList())
1733
1734 def isSupported(self, feature, version):
1735 return self.implementation.hasFeature(feature, version)
1736
1737 def importNode(self, node, deep):
1738 if node.nodeType == Node.DOCUMENT_NODE:
1739 raise xml.dom.NotSupportedErr("cannot import document nodes")
1740 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1741 raise xml.dom.NotSupportedErr("cannot import document type nodes")
1742 return _clone_node(node, deep, self)
1743
1744 def writexml(self, writer, indent="", addindent="", newl="",
1745 encoding = None):
1746 if encoding is None:
1747 writer.write('<?xml version="1.0" ?>'+newl)
1748 else:
1749 writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
1750 for node in self.childNodes:
1751 node.writexml(writer, indent, addindent, newl)
1752
1753 # DOM Level 3 (WD 9 April 2002)
1754
1755 def renameNode(self, n, namespaceURI, name):
1756 if n.ownerDocument is not self:
1757 raise xml.dom.WrongDocumentErr(
1758 "cannot rename nodes from other documents;\n"
1759 "expected %s,\nfound %s" % (self, n.ownerDocument))
1760 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1761 raise xml.dom.NotSupportedErr(
1762 "renameNode() only applies to element and attribute nodes")
1763 if namespaceURI != EMPTY_NAMESPACE:
1764 if ':' in name:
1765 prefix, localName = name.split(':', 1)
1766 if ( prefix == "xmlns"
1767 and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1768 raise xml.dom.NamespaceErr(
1769 "illegal use of 'xmlns' prefix")
1770 else:
1771 if ( name == "xmlns"
1772 and namespaceURI != xml.dom.XMLNS_NAMESPACE
1773 and n.nodeType == Node.ATTRIBUTE_NODE):
1774 raise xml.dom.NamespaceErr(
1775 "illegal use of the 'xmlns' attribute")
1776 prefix = None
1777 localName = name
1778 else:
1779 prefix = None
1780 localName = None
1781 if n.nodeType == Node.ATTRIBUTE_NODE:
1782 element = n.ownerElement
1783 if element is not None:
1784 is_id = n._is_id
1785 element.removeAttributeNode(n)
1786 else:
1787 element = None
1788 # avoid __setattr__
1789 d = n.__dict__
1790 d['prefix'] = prefix
1791 d['localName'] = localName
1792 d['namespaceURI'] = namespaceURI
1793 d['nodeName'] = name
1794 if n.nodeType == Node.ELEMENT_NODE:
1795 d['tagName'] = name
1796 else:
1797 # attribute node
1798 d['name'] = name
1799 if element is not None:
1800 element.setAttributeNode(n)
1801 if is_id:
1802 element.setIdAttributeNode(n)
1803 # It's not clear from a semantic perspective whether we should
1804 # call the user data handlers for the NODE_RENAMED event since
1805 # we're re-using the existing node. The draft spec has been
1806 # interpreted as meaning "no, don't call the handler unless a
1807 # new node is created."
1808 return n
1809
1810 defproperty(Document, "documentElement",
1811 doc="Top-level element of this document.")
1812
1813
1814 def _clone_node(node, deep, newOwnerDocument):
1815 """
1816 Clone a node and give it the new owner document.
1817 Called by Node.cloneNode and Document.importNode
1818 """
1819 if node.ownerDocument.isSameNode(newOwnerDocument):
1820 operation = xml.dom.UserDataHandler.NODE_CLONED
1821 else:
1822 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1823 if node.nodeType == Node.ELEMENT_NODE:
1824 clone = newOwnerDocument.createElementNS(node.namespaceURI,
1825 node.nodeName)
1826 for attr in node.attributes.values():
1827 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1828 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1829 a.specified = attr.specified
1830
1831 if deep:
1832 for child in node.childNodes:
1833 c = _clone_node(child, deep, newOwnerDocument)
1834 clone.appendChild(c)
1835
1836 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1837 clone = newOwnerDocument.createDocumentFragment()
1838 if deep:
1839 for child in node.childNodes:
1840 c = _clone_node(child, deep, newOwnerDocument)
1841 clone.appendChild(c)
1842
1843 elif node.nodeType == Node.TEXT_NODE:
1844 clone = newOwnerDocument.createTextNode(node.data)
1845 elif node.nodeType == Node.CDATA_SECTION_NODE:
1846 clone = newOwnerDocument.createCDATASection(node.data)
1847 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1848 clone = newOwnerDocument.createProcessingInstruction(node.target,
1849 node.data)
1850 elif node.nodeType == Node.COMMENT_NODE:
1851 clone = newOwnerDocument.createComment(node.data)
1852 elif node.nodeType == Node.ATTRIBUTE_NODE:
1853 clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1854 node.nodeName)
1855 clone.specified = True
1856 clone.value = node.value
1857 elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1858 assert node.ownerDocument is not newOwnerDocument
1859 operation = xml.dom.UserDataHandler.NODE_IMPORTED
1860 clone = newOwnerDocument.implementation.createDocumentType(
1861 node.name, node.publicId, node.systemId)
1862 clone.ownerDocument = newOwnerDocument
1863 if deep:
1864 clone.entities._seq = []
1865 clone.notations._seq = []
1866 for n in node.notations._seq:
1867 notation = Notation(n.nodeName, n.publicId, n.systemId)
1868 notation.ownerDocument = newOwnerDocument
1869 clone.notations._seq.append(notation)
1870 if hasattr(n, '_call_user_data_handler'):
1871 n._call_user_data_handler(operation, n, notation)
1872 for e in node.entities._seq:
1873 entity = Entity(e.nodeName, e.publicId, e.systemId,
1874 e.notationName)
1875 entity.actualEncoding = e.actualEncoding
1876 entity.encoding = e.encoding
1877 entity.version = e.version
1878 entity.ownerDocument = newOwnerDocument
1879 clone.entities._seq.append(entity)
1880 if hasattr(e, '_call_user_data_handler'):
1881 e._call_user_data_handler(operation, n, entity)
1882 else:
1883 # Note the cloning of Document and DocumentType nodes is
1884 # implementation specific. minidom handles those cases
1885 # directly in the cloneNode() methods.
1886 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1887
1888 # Check for _call_user_data_handler() since this could conceivably
1889 # used with other DOM implementations (one of the FourThought
1890 # DOMs, perhaps?).
1891 if hasattr(node, '_call_user_data_handler'):
1892 node._call_user_data_handler(operation, node, clone)
1893 return clone
1894
1895
1896 def _nssplit(qualifiedName):
1897 fields = qualifiedName.split(':', 1)
1898 if len(fields) == 2:
1899 return fields
1900 else:
1901 return (None, fields[0])
1902
1903
1904 def _get_StringIO():
1905 # we can't use cStringIO since it doesn't support Unicode strings
1906 from StringIO import StringIO
1907 return StringIO()
1908
1909 def _do_pulldom_parse(func, args, kwargs):
1910 events = func(*args, **kwargs)
1911 toktype, rootNode = events.getEvent()
1912 events.expandNode(rootNode)
1913 events.clear()
1914 return rootNode
1915
1916 def parse(file, parser=None, bufsize=None):
1917 """Parse a file into a DOM by filename or file object."""
1918 if parser is None and not bufsize:
1919 from xml.dom import expatbuilder
1920 return expatbuilder.parse(file)
1921 else:
1922 from xml.dom import pulldom
1923 return _do_pulldom_parse(pulldom.parse, (file,),
1924 {'parser': parser, 'bufsize': bufsize})
1925
1926 def parseString(string, parser=None):
1927 """Parse a file into a DOM from a string."""
1928 if parser is None:
1929 from xml.dom import expatbuilder
1930 return expatbuilder.parseString(string)
1931 else:
1932 from xml.dom import pulldom
1933 return _do_pulldom_parse(pulldom.parseString, (string,),
1934 {'parser': parser})
1935
1936 def getDOMImplementation(features=None):
1937 if features:
1938 if isinstance(features, StringTypes):
1939 features = domreg._parse_feature_string(features)
1940 for f, v in features:
1941 if not Document.implementation.hasFeature(f, v):
1942 return None
1943 return Document.implementation
posted on 2012-11-15 15:30  Just a Programer  阅读(982)  评论(0编辑  收藏  举报