day5-xml处理模块
概述
XML是一套定义语义标记的规则,这些标记将文档分成许多部件并对这些部件加以标识。xml是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但json使用起来更简单。
XML格式:通过<>节点来区别数据结构
<?xmlversion="1.0"?> <data> <countryname="Liechtenstein"> <rankupdated="yes">2</rank> <year>2008</year> <gdppc>141100</gdppc> <neighborname="Austria"direction="E"/> <neighborname="Switzerland"direction="W"/> </country> <countryname="Singapore"> <rankupdated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighborname="Malaysia"direction="N"/> </country> <countryname="Panama"> <rankupdated="yes">69</rank> <year>2011</year> <gdppc>13600</gdppc> <neighborname="CostaRica"direction="W"/> <neighborname="Colombia"direction="E"/> </country> </data>
XML的基本操作
1.读取
import xml.etree.ElementTree as ET tree = ET.parse("xmltest.xml") root = tree.getroot() print(root) print(root.tag) # 遍历xml文档 for child in root: print(child.tag, child.attrib) for i in child: #遍历子节点下的所有节点 print(i.tag, i.text,i.attrib) # 只遍历year节点 for node in root.iter('year'): print(node.tag, node.text) #输出 <Element 'data' at 0x1029212c8> data country {'name': 'Liechtenstein'} rank 2 {'updated': 'yes'} year 2008 {} gdppc 141100 {} neighbor None {'name': 'Austria', 'direction': 'E'} neighbor None {'name': 'Switzerland', 'direction': 'W'} country {'name': 'Singapore'} rank 5 {'updated': 'yes'} year 2011 {} gdppc 59900 {} neighbor None {'name': 'Malaysia', 'direction': 'N'} country {'name': 'Panama'} rank 69 {'updated': 'yes'} year 2011 {} gdppc 13600 {} neighbor None {'name': 'Costa Rica', 'direction': 'W'} neighbor None {'name': 'Colombia', 'direction': 'E'} year 2008 year 2011 year 2011
2.修改
import xml.etree.ElementTree as ET tree=ET.parse("xmltest.xml") root=tree.getroot() #修改year节点的值 for node in root.iter('year'): new_year=int(node.text)+1 #修改节点值 node.text=str(new_year) #修改后强制转换成字符串类型 node.set("updated by","dick") #修改节点属性 tree.write("xmltest.xml") #输出 <data> <countryname="Liechtenstein"> <rankupdated="yes">2</rank> <yearupdatedby="dick">2009</year> <gdppc>141100</gdppc> <neighbordirection="E"name="Austria"/> <neighbordirection="W"name="Switzerland"/> </country> <countryname="Singapore"> <rankupdated="yes">5</rank> <yearupdatedby="dick">2012</year> <gdppc>59900</gdppc> <neighbordirection="N"name="Malaysia"/> </country> <countryname="Panama"> <rankupdated="yes">69</rank> <yearupdatedby="dick">2012</year> <gdppc>13600</gdppc> <neighbordirection="W"name="CostaRica"/> <neighbordirection="E"name="Colombia"/> </country> </data>
3.删除node节点
import xml.etree.ElementTree as ET tree=ET.parse("xmltest.xml") root=tree.getroot() for country in root.findall('country'): #找到第一层子节点 rank=int(country.find('rank').text) #找到子节点下的'rank'节点的节点值 if rank>50: root.remove(country) #删除子节点 tree.write('output.xml') #重新写入xml文件 #输出 <data> <countryname="Liechtenstein"> <rankupdated="yes">2</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbordirection="E"name="Austria"/> <neighbordirection="W"name="Switzerland"/> </country> <countryname="Singapore"> <rankupdated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbordirection="N"name="Malaysia"/> </country> </data>
创建XML文件
import xml.etree.ElementTree as ET new_xml=ET.Element("personalinfolist") #创建根节点 personalinfo=ET.SubElement(new_xml,"personalinfo",attrib{"enrolled":"yes"}) #创建第一层子节点,后面参数依次是:父节点,子节点,子节点属性 name=ET.SubElement(personalinfo,"name") #创建第二层子节点 name.text="DickHu" #设置第二层节点值 age=ET.SubElement(personalinfo,"age") age.text='23' personalinfo2=ET.SubElement(new_xml,"personalinfo",attrib{"enrolled":"no"}) #创建另外一个第一层子节点 name=ET.SubElement(personalinfo,"name") name.text="Jackychen" age=ET.SubElement(personalinfo2,"age") #创建其第二层子节点 age.text='19' et=ET.ElementTree(new_xml)#生成文档对象 et.write("test.xml",encoding="utf-8",xml_declaration=True) #在新xml文件的开头自动添加:<?xml version='1.0' encoding='utf-8'?> ET.dump(new_xml) #打印生成的格式 #输出 <personalinfolist> <personalinfoenrolled="yes"> <name>DickHu</name> <age>23</age> </personalinfo> <personalinfo2enrolled="no"> <name>Jackychen</name> <age>19</age></personalinfo2> </personalinfolist>