python模块之xml
xml是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但json使用起来更简单,不过,古时候,在json还没诞生的黑暗年代,
大家只能选择用xml呀,至今很多传统公司如金融行业的很多系统的接口还主要是xml。
# test.xml
<?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor name="Austria" direction="E"/> <neighbor name="Switzerland" direction="W"/> </country> <country name="Singapore"> <rank updated="yes">5</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor name="Malaysia" direction="N"/> </country> <country name="Panama"> <rank updated="yes">69</rank> <year>2011</year> <gdppc>13600</gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E"/> </country> </data> xml数据 xml文件
# import xml.etree.ElementTree as ET # 在进行操作之前,都应该进行下面两步: # tree = ET.parse("test.xml") # 形成树形结构 # root = tree.getroot() # 得到树的根系 # print(root) # <Element 'data' at 0x000002043F6B5868> # 循环打印 # for i in root: # print(i) # <Element 'country' at 0x00000261CF835958> # <Element 'country' at 0x00000261CFB33D68> # <Element 'country' at 0x00000261CFB33EF8> # 所有的增删改查都是基于这个root根系去操作 # 查 # 1. 全文搜索year将所有的year标签全部找到 # print(root.iter("year")) # <_elementtree._element_iterator object at 0x00000133E8B6EBA0> # print([i for i in root.iter("year")]) # [<Element 'year' at 0x000001E227DB0548>, <Element 'year' at 0x000001E227DC4E08>, <Element 'year' at 0x000001E227DC4F98>] # 2. 只找第一个,找到就返回 # print(root.find("country")) # <Element 'country' at 0x00000228C9BF5958> # 3. 子啊root的子节点里找,找所有的 # print(root.findall("country")) # [<Element 'country' at 0x00000179E6BD5958>, <Element 'country' at 0x00000179E6ED3DB8>, <Element 'country' at 0x00000179E6ED3F48>] # 练习 # 1. 找所有的rank标签,以及attrib和text # print([i.attrib for i in root.iter("rank")]) # [{'updated': 'yes'}, {'updated': 'yes'}, {'updated': 'yes'}] # print([i.text for i in root.iter("rank")]) # ['2', '5', '69'] # 2. 找到第二个country的neighor标签以及它的属性 # print([tag for tag in root.findall("country")][1].find("neighbor").attrib) # {'name': 'Malaysia', 'direction': 'N'} # 增 append # import xml.etree.ElementTree as ET # tree = ET.parse("test.xml") # root = tree.getroot() # # # 给year所有大于2010年的标签下面添加一个month标签,属性为name:month 内容为30days # for country in root.findall("country"): # for year in country.findall("year"): # if int(year.text) > 2010: # month = ET.Element("month") # month.text = "30days" # month.attrib = {"name": "month"} # country.append(month) # tree.write("test.xml") # 改 # import xml.etree.ElementTree as ET # tree = ET.parse("test.xml") # 形成属性结构 # root = tree.getroot() # 得到树的根系 # # # 对所有的year属性以及值进行修改 # for node in root.iter("year"): # new_year = int(node.text) + 1 # year+1 # node.text = str(new_year) # # node.set("updated", "yes") # # node.set("version", "1.0") # tree.write("test.xml") # 删 # import xml.etree.ElementTree as ET # tree = ET.parse("test.xml") # 形成属性结构 # root = tree.getroot() # 得到树的根系 # # # 将rank值大于50的country标签删除 # for country in root.findall("country"): # rank = int(country.find("rank").text) # if rank > 50: # root.remove(country) # tree.write("test1.xml") # 将更改后的内容写到test1.xml中
# 创建xml文档 import xml.etree.ElementTree as ET new_xml = ET.Element("namelist") name = ET.SubElement(new_xml, "name", attrib={"enrolled": "yes"}) age = ET.SubElement(name, "age", attrib={"checked": "no"}) sex = ET.SubElement(name, "sex") sex.text = "33" name2 = ET.SubElement(new_xml, "name", attrib={"enrolled": "no"}) age2 = ET.SubElement(name2, "age") age2.text = "19" et = ET.ElementTree(new_xml) # 生成文档对象 et.write("test.xml", encoding="utf-8", xml_declaration=True) ET.dump(new_xml) # 打印生成的格式