XML模块(二十四)
xml是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但json使用起来更简单,不过,古时候,在json还没诞生的黑暗年代,
大家只能选择用xml呀,至今很多传统公司如金融行业的很多系统的接口还主要是xml。
xml的格式如下,就是通过<>节点来区别数据结构的:
<?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor name="Austria" direction="E"/> <neighbor name="Switzerland" direction="W"/> </country> <country name="Singapore"> <rank updated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor name="Malaysia" direction="N"/> </country> <country name="Panama"> <rank updated="yes">69</rank> <year>2011</year> <gdppc>13600</gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E"/> </country> </data>
xml协议在各个语言里的都 是支持的,在python中可以用以下模块操作xml:
# print(root.iter('year')) #全文搜索 # print(root.find('country')) #在root的子节点找,只找一个 # print(root.findall('country')) #在root的子节点找,找所有
查:
import xml.etree.ElementTree as ET tree = ET.parse('xml_l') root = tree.getroot() # 只拿year节点 for year in root.iter('year'): print(year.tag,year.text) ''' year 2008 year 2011 year 2011 '''
import xml.etree.ElementTree as ET tree = ET.parse('xml_l') root = tree.getroot() for i in root: print(i) print(i.tag) # tag 标签名 print(i.attrib) # 属性{'name': 'Liechtenstein'} for j in i: print(j.tag) print(j.attrib) # {'updated': 'yes'} print(j.text) ''' <Element 'country' at 0x022D96F0> country {'name': 'Liechtenstein'} rank {'updated': 'yes'} 2 year {} 2008 gdppc {} 141100 neighbor {'name': 'Austria', 'direction': 'E'} None neighbor {'name': 'Switzerland', 'direction': 'W'} None <Element 'country' at 0x022D9840> country {'name': 'Singapore'} rank {'updated': 'yes'} 5 year {} 2011 gdppc {} 59900 neighbor {'name': 'Malaysia', 'direction': 'N'} None <Element 'country' at 0x022D9960> country {'name': 'Panama'} rank {'updated': 'yes'} 69 year {} 2011 gdppc {} 13600 neighbor {'name': 'Costa Rica', 'direction': 'W'} None neighbor {'name': 'Colombia', 'direction': 'E'} None '''
修改:
import xml.etree.ElementTree as ET tree = ET.parse("xml_l") root = tree.getroot() # 修改 for year in root.iter('year'): new_year = int(year.text) + 1 year.text = str(new_year) year.set('update','yes') # 增加属性 tree.write("new_xml.xml")
new_xml.xml
<data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year update="yes">2009</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Singapore"> <rank updated="yes">5</rank> <year update="yes">2012</year> <gdppc>59900</gdppc> <neighbor direction="N" name="Malaysia" /> </country> <country name="Panama"> <rank updated="yes">69</rank> <year update="yes">2012</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data>
import xml.etree.ElementTree as ET tree = ET.parse("xml_l") root = tree.getroot() for country in root.findall('country'): for year in country.findall('year'): if int(year.text) > 2000: year2 = ET.Element('year2') year2.text = 'NewYear' year2.attrib = {'update':'yes'} country.append(year2) # 往country下添加子节点 tree.write('xml_l_swap.xml')
<data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> <year2 update="yes">NewYear</year2></country> <country name="Singapore"> <rank updated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor direction="N" name="Malaysia" /> <year2 update="yes">NewYear</year2></country> <country name="Panama"> <rank updated="yes">69</rank> <year>2011</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> <year2 update="yes">NewYear</year2></country> </data>
删除:
import xml.etree.ElementTree as ET tree = ET.parse("xml_l") root = tree.getroot() # 删除 for country in root.findall('country'): rank = int(country.find('rank').text) if rank > 50: root.remove(country) tree.write('new_xml2.xml')
new_xml2.xml
<data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Singapore"> <rank updated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor direction="N" name="Malaysia" /> </country> </data>
创建XML:
import xml.etree.ElementTree as ET my_xml = ET.Element("namelist") name = ET.SubElement(my_xml, "name", attrib={"enrolled":"yes"}) age = ET.SubElement(name, "age", attrib={"checked":"no"}) sex = ET.SubElement(name, "sex") sex.text = "man" name2 = ET.SubElement(my_xml, "name1", attrib={"enrolled":"no"}) age = ET.SubElement(name2, "age") age.text = "18" et = ET.ElementTree(my_xml) # 生成文档对象 et.write("text.xml", encoding="utf-8", xml_declaration=True)
text.xml
<?xml version='1.0' encoding='utf-8'?> <namelist> <name enrolled="yes"> <age checked="no" /> <sex>man</sex> </name> <name1 enrolled="no"> <age>18</age> </name1> </namelist>