XML模块
什么是XML
XML是可扩展标记语言(eXtensible Markup Language),标准通用标记语言的子集,是一种用于标记电子文件使其具有结构性的标记语言。被设计用来传输和存储数据,实现不同语言或程序之间进行数据交换的协议,跟json差不多,但json使用起来更简单。
xml的格式如下,就是通过<>节点来其区别数据结构的:
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year update="yes" updated="yes">2012</year>
<gdppc>141100</gdppc>
<neighbor direction="E" name="Austria" />
<neighbor direction="W" name="Switzerland" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year update="yes" updated="yes">2015</year>
<gdppc>59900</gdppc>
<neighbor direction="N" name="Malaysia" />
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year update="yes" updated="yes">2015</year>
<gdppc>13600</gdppc>
<neighbor direction="W" name="Costa Rica" />
<neighbor direction="E" name="Colombia" />
</country>
</data>
python操作xml
import xml.etree.ElementTree as ET
tree = ET.parse("xml_test") # parse解析,把xml数据解析开给对象tree
root = tree.getroot() # 拿到根节点(对象)
print(root.tag) # tag: 打印对象的标签名字
# 遍历xml文档
for i in root:
print(i.tag, i.attrib) # attrib: 拿属性
for j in i:
print(j.tag, j.text) # text: 标签包裹的内容
# 只遍历根节点的year
for node in root.iter('year'):
print(node.tag, node.text)
# 修改
for node in root.iter("year"):
new_year = int(node.text) + 1 # 对year进行操作加1
node.text =str(new_year) # 将操作后的新值赋值
node.set("updated", "yes") # 给year增加属性updated="yes"
tree.write('xml_test') # 修改后必须得写入文件才算完成
# 删除
for country in root.findall("country"):
rank = int(country.find("rank").text)
if rank > 50:
root.remove(country)
tree.write("xml_test1")
创建xml数据
# 创建xml数据
import xml.etree.ElementTree as ET
new_xml = ET.Element("namelist") # Element:创建了根节点<namelist></nameliat>
name = ET.SubElement(new_xml, "name1", attrib={"enrolled": "yes"}) # Subelement:创建子节点
# <name1 enrolled="yes"></name1>
age = ET.SubElement(name, "age", attrib={"check": "no"}) # 在name下在创建子节点age,内容为23
sex = ET.SubElement(name, "sex") # 在name下在创建子节点sex,内容为男
sex.text = '男'
name = ET.SubElement(new_xml, "name2", attrib={"enrolled": "no"}) # Subelement创建子节点
# <name1 enrolled="yes"></name1>
age = ET.SubElement(name, "age") # 在name下在创建子节点age,内容为19
age.text = '19'
sex = ET.SubElement(name, "sex") # 在name下在创建子节点sex
et = ET.ElementTree(new_xml) # 生成文档对象
et.write("new_xml", encoding="UTF-8", xml_declaration=True)
XML格式转字典dict
data_xml = """
<xml>
<rank updated="yes">69</rank>
<year update="yes" updated="yes">2015</year>
<gdppc>13600</gdppc>
<neighbor direction="W" name="Costa Rica" />
</xml>
"""
import xml.etree.ElementTree as ET
def xml_to_dict(data_xml):
data = ET.fromstring(data_xml)
data_dic = {}
# 遍历data对象,将data的标签作为字典的key,标签内容作为字典的value,存入到字典中
for child in data:
data_dic[child.tag] = child.text
return data_dic
字典dict转xml格式
data_dict = {'name':'Bob','age':18,'gender':'male'}
def dict_to_xml(data_dict):
data_xml_list = []
for key,value in data_dict.items():
data_xml_list.append(f'<{key}>{value}</{key}>')
xml_str = ''.join(data_xml_list)
data_xml = f'<xml>{xml_str}</xml>'
return data_xml