python DOM解析XML
#conding:utf-8 # -*- coding:utf-8 -*- __author__ = 'hdfs' """ XML 解析 :DOM解析珍整个文档作为一个可遍历的对象 提交给应用程序,dom解析会将文档全部load进内存,这样对于大型的xml可能性能不多好。 """ import pprint import xml.dom.minidom from xml.dom.minidom import Node doc=xml.dom.minidom.parse('books.xml') mapping={} #遍历DOM对象 for node in doc.getElementsByTagName("book"): #获取属性 isbn=node.getAttribute('isbn') #子节点 L=node.getElementsByTagName('title') for node2 in L: title="" for node3 in node2.childNodes: #Node.Text_NODE:文字节点, if node3.nodeType == Node.TEXT_NODE: title+=node3.data mapping[isbn]=title pprint.pprint(mapping)
books.xml:
<catalog> <book isbn="0-596-00128-2"> <title>Python & XML</title> <title>Python & HTML</title> <date>December 2001</date> <author>Jones, Drake</author> </book> <book isbn="0-596-15810-6"> <title>Programming Python, 4th Edition</title> <date>October 2010</date> <author>Lutz</author> </book> <book isbn="0-596-15806-8"> <title>Learning Python, 4th Edition</title> <date>September 2009</date> <author>Lutz</author> </book> <book isbn="0-596-15808-4"> <title>Python Pocket Reference, 4th Edition</title> <date>October 2009</date> <author>Lutz</author> </book> <book isbn="0-596-00797-3"> <title>Python Cookbook, 2nd Edition</title> <date>March 2005</date> <author>Martelli, Ravenscroft, Ascher</author> </book> <book isbn="0-596-10046-9"> <title>Python in a Nutshell, 2nd Edition</title> <date>July 2006</date> <author>Martelli</author> </book> <!-- plus many more Python books that should appear here --> </catalog>