Python攻克之路-xml模块

xml模块
描述:xml是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但是Json使用起来更简单,json还没有诞生时,xml已经开始使用很久,至今很多传统公司如金融行业很多系统的接口还是主要使用xml

 

存储数据的格式:一个个的标签组成

[root@python3 xml]# cat test.xml 
<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">                   #国家,country是标签,name是它的属性
        <rank updated="yes">2</rank>                 #排名
        <year>2008</year>                            #年份
        <gdppc>141100</gdppc>                        #gdp
        <neighbor name="Austria" direction="E"/>     #邻国
        <neighbor name="Switzerland" direction="W"/> #邻国,这种写法是自闭合的写法
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

  

xml在各个语言中都支持,以下是在python中的使用

[root@python3 xml]# cat py_xml.py 
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET

tree = ET.parse("test.xml")   #解析那个xml的数据
root = tree.getroot()         #得到最外层标签data
print(root.tag)

遍历xml文档

for child in root:            #root是最外层,其他是它的子标签
    print(child.tag, child.attrib)  #标签名,属性打印
    for i in child:
        print(i.tag, i.text)        #子下还有标签,文本

只遍历year节点

for node in root.iter('year'):
    print(node.tag, node.text)

root的内容

[root@python3 xml]# cat py_xml.py 
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET

tree = ET.parse("xmltest.xml")
root = tree.getroot()
print(root.tag)
[root@python3 xml]# python3 py_xml.py
data

打印国家

[root@python3 xml]# cat py_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET

tree = ET.parse("test.xml")
root = tree.getroot()
print(root.tag)

for child in root:
    print(child.tag, child.attrib)
[root@python3 xml]# python3 py_xml.py
data
country {'name': 'Liechtenstein'}  #country: child.tag, {'name': 'Liechtenstein'}是child.attrib
country {'name': 'Singapore'}
country {'name': 'Panama'}

打印文本

[root@python3 xml]# cat py_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET

tree = ET.parse("test.xml")
root = tree.getroot()
print(root.tag)

for child in root:
    print(child.tag, child.attrib)
    for i in child:
        print(i.tag, i.text)
[root@python3 xml]# python3 py_xml.py
data
country {'name': 'Liechtenstein'} #国家下的文本
rank 2                            #rank: i.tag   2: i.text
year 2008
gdppc 141100
neighbor None                     #neighbor是自定义的标签没有自己的文本内容
neighbor None

country {'name': 'Singapore'}
rank 5
year 2011
gdppc 59900
neighbor None

country {'name': 'Panama'}
rank 69
year 2011
gdppc 13600
neighbor None
neighbor None

  

xml的修改

[root@python3 xml]# cat py_mod_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET

tree = ET.parse("test.xml")
root = tree.getroot()

for node in root.iter('year'):    #对root下year的标签做循环,node是year的标签,year标签中有text(是那个数字,如2011)
    new_year = int(node.text) + 1 #对年份的份做修改,处理下,加1,变成新的值,如2011+1 = 2012
    node.text = str(new_year)     #再转换成字符串,再转换回它的node.text
    node.set("updated","yes")     #还有一个updated的属性,使用set来修改

tree.write("test1.xml")            #修改完后都在tree对象里,再使用tree调用write方法重新写

[root@python3 xml]# python3 py_mod_xml.py
[root@python3 xml]# cat test1.xml 
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year updated="yes">2009</year>    #修改了,重新加一个属性
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year updated="yes">2012</year>    ###
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year updated="yes">2012</year>        ####
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>

  

xml的删除操作

[root@python3 xml]# cat py_del_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET

tree = ET.parse("test.xml")
root = tree.getroot()

for country in root.findall('country'):        #遍历所有国家
    rank = int(country.find('rank').text)      #找到排名的标签,取到text(排名的数字)
    if rank > 50:                               #排名大于50的移除
        root.remove(country)
tree.write('test2.xml')   

[root@python3 xml]# python3 py_del_xml.py
[root@python3 xml]# cat test2.xml 
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    </data>[root@python3 xml]# 

  

自创建xml

[root@python3 xml]# vim py_create_xml.py
#!/usr/local/python3/bin/python3
import xml.etree.ElementTree as ET

new_xml = ET.Element("namelist")                                  #ET调用Element方法
name1 = ET.SubElement(new_xml,"name", attrib={"enrolled": "yes"})  #ET调用SubElement方法,和属性
age = ET.SubElement(name1, "age", attrib={"checked": "no"})
sex = ET.SubElement(name1, "sex")
age.text = '33'
name2 = ET.SubElement(new_xml, "name", attrib={"enrolled": "no"})
age = ET.SubElement(name2, "age")
age.text = '19'

et = ET.ElementTree(new_xml)                                    #生成文档对象
et.write("test3.xml", encoding="utf-8", xml_declaration=True) 
ET.dump(new_xml)                                                #打印生成格式
[root@python3 xml]# python3 py_create_xml.py 
<namelist><name enrolled="yes"><age checked="no">33</age><sex /></name><name enrolled="no"><age>19</age></name></namelist>

 

 

posted @ 2018-04-16 15:13  Reid21  阅读(142)  评论(0编辑  收藏  举报