Python攻克之路-xml模块
xml模块
描述:xml是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但是Json使用起来更简单,json还没有诞生时,xml已经开始使用很久,至今很多传统公司如金融行业很多系统的接口还是主要使用xml
存储数据的格式:一个个的标签组成
[root@python3 xml]# cat test.xml <?xml version="1.0"?> <data> <country name="Liechtenstein"> #国家,country是标签,name是它的属性 <rank updated="yes">2</rank> #排名 <year>2008</year> #年份 <gdppc>141100</gdppc> #gdp <neighbor name="Austria" direction="E"/> #邻国 <neighbor name="Switzerland" direction="W"/> #邻国,这种写法是自闭合的写法 </country> <country name="Singapore"> <rank updated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor name="Malaysia" direction="N"/> </country> <country name="Panama"> <rank updated="yes">69</rank> <year>2011</year> <gdppc>13600</gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E"/> </country> </data>
xml在各个语言中都支持,以下是在python中的使用
[root@python3 xml]# cat py_xml.py #!/usr/local/python3/bin/python3 import xml.etree.ElementTree as ET tree = ET.parse("test.xml") #解析那个xml的数据 root = tree.getroot() #得到最外层标签data print(root.tag)
遍历xml文档
for child in root: #root是最外层,其他是它的子标签 print(child.tag, child.attrib) #标签名,属性打印 for i in child: print(i.tag, i.text) #子下还有标签,文本
只遍历year节点
for node in root.iter('year'): print(node.tag, node.text)
root的内容
[root@python3 xml]# cat py_xml.py #!/usr/local/python3/bin/python3 import xml.etree.ElementTree as ET tree = ET.parse("xmltest.xml") root = tree.getroot() print(root.tag) [root@python3 xml]# python3 py_xml.py data
打印国家
[root@python3 xml]# cat py_xml.py #!/usr/local/python3/bin/python3 import xml.etree.ElementTree as ET tree = ET.parse("test.xml") root = tree.getroot() print(root.tag) for child in root: print(child.tag, child.attrib) [root@python3 xml]# python3 py_xml.py data country {'name': 'Liechtenstein'} #country: child.tag, {'name': 'Liechtenstein'}是child.attrib country {'name': 'Singapore'} country {'name': 'Panama'}
打印文本
[root@python3 xml]# cat py_xml.py #!/usr/local/python3/bin/python3 import xml.etree.ElementTree as ET tree = ET.parse("test.xml") root = tree.getroot() print(root.tag) for child in root: print(child.tag, child.attrib) for i in child: print(i.tag, i.text) [root@python3 xml]# python3 py_xml.py data country {'name': 'Liechtenstein'} #国家下的文本 rank 2 #rank: i.tag 2: i.text year 2008 gdppc 141100 neighbor None #neighbor是自定义的标签没有自己的文本内容 neighbor None country {'name': 'Singapore'} rank 5 year 2011 gdppc 59900 neighbor None country {'name': 'Panama'} rank 69 year 2011 gdppc 13600 neighbor None neighbor None
xml的修改
[root@python3 xml]# cat py_mod_xml.py #!/usr/local/python3/bin/python3 import xml.etree.ElementTree as ET tree = ET.parse("test.xml") root = tree.getroot() for node in root.iter('year'): #对root下year的标签做循环,node是year的标签,year标签中有text(是那个数字,如2011) new_year = int(node.text) + 1 #对年份的份做修改,处理下,加1,变成新的值,如2011+1 = 2012 node.text = str(new_year) #再转换成字符串,再转换回它的node.text node.set("updated","yes") #还有一个updated的属性,使用set来修改 tree.write("test1.xml") #修改完后都在tree对象里,再使用tree调用write方法重新写 [root@python3 xml]# python3 py_mod_xml.py [root@python3 xml]# cat test1.xml <data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year updated="yes">2009</year> #修改了,重新加一个属性 <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Singapore"> <rank updated="yes">5</rank> <year updated="yes">2012</year> ### <gdppc>59900</gdppc> <neighbor direction="N" name="Malaysia" /> </country> <country name="Panama"> <rank updated="yes">69</rank> <year updated="yes">2012</year> #### <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country>
xml的删除操作
[root@python3 xml]# cat py_del_xml.py #!/usr/local/python3/bin/python3 import xml.etree.ElementTree as ET tree = ET.parse("test.xml") root = tree.getroot() for country in root.findall('country'): #遍历所有国家 rank = int(country.find('rank').text) #找到排名的标签,取到text(排名的数字) if rank > 50: #排名大于50的移除 root.remove(country) tree.write('test2.xml') [root@python3 xml]# python3 py_del_xml.py [root@python3 xml]# cat test2.xml <data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Singapore"> <rank updated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor direction="N" name="Malaysia" /> </country> </data>[root@python3 xml]#
自创建xml
[root@python3 xml]# vim py_create_xml.py #!/usr/local/python3/bin/python3 import xml.etree.ElementTree as ET new_xml = ET.Element("namelist") #ET调用Element方法 name1 = ET.SubElement(new_xml,"name", attrib={"enrolled": "yes"}) #ET调用SubElement方法,和属性 age = ET.SubElement(name1, "age", attrib={"checked": "no"}) sex = ET.SubElement(name1, "sex") age.text = '33' name2 = ET.SubElement(new_xml, "name", attrib={"enrolled": "no"}) age = ET.SubElement(name2, "age") age.text = '19' et = ET.ElementTree(new_xml) #生成文档对象 et.write("test3.xml", encoding="utf-8", xml_declaration=True) ET.dump(new_xml) #打印生成格式 [root@python3 xml]# python3 py_create_xml.py <namelist><name enrolled="yes"><age checked="no">33</age><sex /></name><name enrolled="no"><age>19</age></name></namelist>