day5-xml处理模块

概述

XML是一套定义语义标记的规则,这些标记将文档分成许多部件并对这些部件加以标识。xml是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但json使用起来更简单。

XML格式:通过<>节点来区别数据结构

<?xmlversion="1.0"?>
<data>
<countryname="Liechtenstein">
<rankupdated="yes">2</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighborname="Austria"direction="E"/>
<neighborname="Switzerland"direction="W"/>
</country>
<countryname="Singapore">
<rankupdated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighborname="Malaysia"direction="N"/>
</country>
<countryname="Panama">
<rankupdated="yes">69</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighborname="CostaRica"direction="W"/>
<neighborname="Colombia"direction="E"/>
</country>
</data>

XML的基本操作

1.读取

import xml.etree.ElementTree as ET

tree = ET.parse("xmltest.xml")
root = tree.getroot()
print(root)
print(root.tag)
 
# 遍历xml文档
for child in root:
    print(child.tag, child.attrib)
    for i in child:      #遍历子节点下的所有节点
        print(i.tag, i.text,i.attrib)

# 只遍历year节点
for node in root.iter('year'):
    print(node.tag, node.text)

#输出
<Element 'data' at 0x1029212c8>
data
country {'name': 'Liechtenstein'}
rank 2 {'updated': 'yes'}
year 2008 {}
gdppc 141100 {}
neighbor None {'name': 'Austria', 'direction': 'E'}
neighbor None {'name': 'Switzerland', 'direction': 'W'}
country {'name': 'Singapore'}
rank 5 {'updated': 'yes'}
year 2011 {}
gdppc 59900 {}
neighbor None {'name': 'Malaysia', 'direction': 'N'}
country {'name': 'Panama'}
rank 69 {'updated': 'yes'}
year 2011 {}
gdppc 13600 {}
neighbor None {'name': 'Costa Rica', 'direction': 'W'}
neighbor None {'name': 'Colombia', 'direction': 'E'}
year 2008
year 2011
year 2011

2.修改

import xml.etree.ElementTree as ET

tree=ET.parse("xmltest.xml")
root=tree.getroot()
#修改year节点的值
for node in root.iter('year'): 
     new_year=int(node.text)+1  #修改节点值
     node.text=str(new_year)   #修改后强制转换成字符串类型
     node.set("updated by","dick")  #修改节点属性

tree.write("xmltest.xml")

#输出
<data>
<countryname="Liechtenstein">
<rankupdated="yes">2</rank>
<yearupdatedby="dick">2009</year>
<gdppc>141100</gdppc>
<neighbordirection="E"name="Austria"/>
<neighbordirection="W"name="Switzerland"/>
</country>
<countryname="Singapore">
<rankupdated="yes">5</rank>
<yearupdatedby="dick">2012</year>
<gdppc>59900</gdppc>
<neighbordirection="N"name="Malaysia"/>
</country>
<countryname="Panama">
<rankupdated="yes">69</rank>
<yearupdatedby="dick">2012</year>
<gdppc>13600</gdppc>
<neighbordirection="W"name="CostaRica"/>
<neighbordirection="E"name="Colombia"/>
</country>
</data>

3.删除node节点

import xml.etree.ElementTree as ET

tree=ET.parse("xmltest.xml")
root=tree.getroot()

for country in root.findall('country'):  #找到第一层子节点
      rank=int(country.find('rank').text)  #找到子节点下的'rank'节点的节点值
      if rank>50:
         root.remove(country)   #删除子节点

tree.write('output.xml') #重新写入xml文件

#输出
<data>
<countryname="Liechtenstein">
<rankupdated="yes">2</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbordirection="E"name="Austria"/>
<neighbordirection="W"name="Switzerland"/>
</country>
<countryname="Singapore">
<rankupdated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbordirection="N"name="Malaysia"/>
</country>
</data>

创建XML文件

import xml.etree.ElementTree as ET 

new_xml=ET.Element("personalinfolist") #创建根节点
personalinfo=ET.SubElement(new_xml,"personalinfo",attrib{"enrolled":"yes"}) #创建第一层子节点,后面参数依次是:父节点,子节点,子节点属性
name=ET.SubElement(personalinfo,"name")  #创建第二层子节点
name.text="DickHu"  #设置第二层节点值
age=ET.SubElement(personalinfo,"age")
age.text='23'
personalinfo2=ET.SubElement(new_xml,"personalinfo",attrib{"enrolled":"no"}) #创建另外一个第一层子节点
name=ET.SubElement(personalinfo,"name")
name.text="Jackychen"
age=ET.SubElement(personalinfo2,"age") #创建其第二层子节点
age.text='19'

et=ET.ElementTree(new_xml)#生成文档对象
et.write("test.xml",encoding="utf-8",xml_declaration=True) #在新xml文件的开头自动添加:<?xml version='1.0' encoding='utf-8'?>

ET.dump(new_xml)  #打印生成的格式

#输出
<personalinfolist>
<personalinfoenrolled="yes">
<name>DickHu</name>
<age>23</age>
</personalinfo>
<personalinfo2enrolled="no">
<name>Jackychen</name>
<age>19</age></personalinfo2>
</personalinfolist>
posted @ 2017-08-02 22:22  Mr.hu  阅读(114)  评论(0编辑  收藏  举报