Python 解析xml
1、获取xml树
import xml.etree.ElementTree as ET
def getTree(xmlName): xmlName = xmlName.strip() try: tree = ET.parse(xmlName) except: tree = None print 'Analysis xml file fail,file name: {}'.format(xmlName) return tree
|
2、获取根节点
def getRoot(tree): if tree is not None: root = tree.getroot() else: root = None print 'Get root fail' return root
|
3、查看根节点
def seeRoot(root): '''<country name="tan">我是小明</country>''' if root is not None: print 'root tag:', root.tag # 标签(country) print 'root attrib:', root.attrib # 屬性(name="tan") print 'root text:', root.text # 文本(我是小明) print 'root tail:', root.tail # 尾字符串(未涉及)
|
4、从根开始遍历树
def traverseRoot(root): if root is not None: for label1 in root: print 'label1 tag:', label1.tag print 'label1 attrib:', label1.attrib print 'label1 text:', label1.text print 'label1 tail:', label1.tail print '==================' for label2 in label1: print 'label2 tag:', label2.tag print 'label2 attrib:', label2.attrib print 'label2 text:', label2.text print 'label2 tail:', label2.tail print '==================' for label3 in label2: print 'label3 tag:', label3.tag print 'label3 attrib:', label3.attrib print 'label3 text:', label3.text print 'label3 tail:', label3.tail print '=================='
|
5、找到2012年的gdppc和neighbor下的b标签(找到同层有条件的同层另一个tag的文本)
def findYouNedd(root): '''查找year为2012下的b标签的文本''' if root is not None: for label1 in root: for label2 in label1: if label1.tag == 'country' and label2.text == '2012': # 找到本层标签为country且下一层有2012文本 print 'Find tag为country and next year=2012' for child in label1: if child.tag == 'gdppc': print child.text for youNeed in child: if youNeed.tag == 'b': print 'You need:', youNeed.text
|
6、查找父节点下的子节点
def findChildNode(fatherNode, childNode): childNode = childNode.strip() if fatherNode is not None: childs = fatherNode.findall(childNode) print childs print len(childs)
|
7、另一种办法实现第4点
def findYouNedd2(root): countryNodes = root.findall('country') if root is not None: for countryNode in countryNodes: if countryNode.find('year').text == '2012': print countryNode.find('gdppc').text
|
8、移除节点
def delNode(tree, nodeName): nodeName = nodeName.strip() if tree is not None: root = tree.getroot() findNode = root.find(nodeName) if findNode is not None and findNode.tag == nodeName: root.remove(findNode) tree.write('removeNode.xml') # 移除节点后新的xml
|
9、xml样例(xmlDemo.xml)
<?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank>1</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor name="Austria" direction="E"/> <neighbor name="Switzerland" direction="W"/> </country> <country name="Singapore"> <rank>4</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor name="Malaysia" direction="N">123 <a name="a"> aaa </a> </neighbor> </country> <country name="Singapore"> <rank>68</rank> <year>2012</year> <gdppc>13600</gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E">456 <b name="b"> bbb </b> </neighbor> </country> <city>789</city> </data>
|