Python基础之读写xml总结

参考文章:https://blog.csdn.net/weixin_42749767/article/details/82770563

https://www.cnblogs.com/Erick-L/p/6416379.html

 

两种读写xml的方式各有优势,看自己想要哪种功能。简单说下:

1. minidom创建的XML格式有缩进,好看。用ET创建的xml没有缩进空行等,用文本打开不好看。

2. minidom得到已知标签的所有节点比较容易,但是如果要是得到子节点就不太方便,因为空行和空格会被算作文本节点,那么对不同格式的xml可能得到的结果不同。ET获取子节点比较容易,也可使用相对路径或者xpath进行定位,操作更加灵活。

选用哪种xml库就看自己需求偏向于哪一种了。

一、用xml.dom.minidom写xml

a. 写XML使用范例:

writexml.py

#!/usr/bin/env python
# _*_ coding: UTF-8 _*_
"""=================================================
@Project -> File    : write_xml.py
@IDE     : PyCharm
@Author  : zihan
@Date    : 2021/10/15 13:28
@Desc    :
================================================="""
import xml.dom.minidom


def writexml():
    doc = xml.dom.minidom.Document()
    root = doc.createElement('BOOK')  # 创建节点
    root.setAttribute('Description', 'This is a book store')  # 设置节点属性
    root.setAttribute('Name', 'Happy Home')  # 设置节点属性
    doc.appendChild(root)  # 添加根节点

    book1 = doc.createElement('book')  # 创建节点
    book1.setAttribute('Name', 'C++')  # 设置节点属性

    book1_1 = doc.createElement('book')  # 创建book1_1节点
    book1_1.setAttribute('language', 'Chinese')  # 设置节点属性
    book1_1.appendChild(doc.createTextNode("C++从入门到精通"))  # 设置节点的Value值。doc.createTextNode就是文本节点

    book1_2 = doc.createElement('book')
    book1_2.setAttribute('language', 'Chinese')
    book1_2.appendChild(doc.createTextNode("C++程序设计语言"))

    book2 = doc.createElement('book')
    book2.setAttribute('Name', 'Python')

    book2_1 = doc.createElement('book')
    book2_1.setAttribute('language', 'Chinese')
    book2_1.appendChild(doc.createTextNode("Python从入门到精通"))

    book3 = doc.createElement('book')
    book3.setAttribute('Name', 'Linux')

    book3_1 = doc.createElement('book')
    book3_1.setAttribute('language', 'Chinese')
    book3_1.appendChild(doc.createTextNode("Linux从入门到精通"))

    book1.appendChild(book1_1)  # book1节点添加子节点book1_1
    book1.appendChild(book1_2)  # book1节点添加子节点book1_2
    book2.appendChild(book2_1)  # book2节点添加子节点book2_1
    book3.appendChild(book3_1)  # book3节点添加子节点book3_1

    root.appendChild(book1)  # root节点添加子节点book1
    root.appendChild(book2)  # root节点添加子节点book2
    root.appendChild(book3)  # root节点添加子节点book3

    # 将内容写入book.xml,设置encoding='utf-8'是为了中文不乱码
    with open("book.xml", "w", encoding='utf-8') as f:
        doc.writexml(f, indent='\t', addindent='\t', newl='\n', encoding="utf-8")


def main():
    writexml()


if __name__ == '__main__':
    main()
View Code

 

创建的book.xml如下

<?xml version="1.0" encoding="utf-8"?>
    <BOOK Description="This is a book store" Name="Happy Home">
        <book Name="C++">
            <book language="Chinese">C++从入门到精通</book>
            <book language="Chinese">C++程序设计语言</book>
        </book>
        <book Name="Python">
            <book language="Chinese">Python从入门到精通</book>
        </book>
        <book Name="Linux">
            <book language="Chinese">Linux从入门到精通</book>
        </book>
    </BOOK>
View Code

 

b. 用法介绍:

1. 创建文本对象

2. 创建根节点

3. 创建节点属性

4. 创建文本节点

5. 添加子节点

 

 6. 写xml

二、 用xml.dom.minidom读xml

a. 读XML使用范例:

book.xml

<?xml version="1.0" encoding="utf-8"?>
    <BOOK Description="This is a book store" Name="Happy Home">
        first node
        <book Name="C++">
            <book language="Chinese">C++从入门到精通</book>
            <book language="Chinese">C++程序设计语言</book>
        </book>
        <book Name="Python">
            <book language="Chinese">Python从入门到精通</book>
        </book>
        <book Name="Linux">
            <book language="Chinese">Linux从入门到精通</book>
        </book>
        last node
</BOOK>
View Code

 

book1.xml

<?xml version="1.0" encoding="utf-8"?>
    <BOOK Description="This is a book store" Name="Happy Home"><book Name="C++">
            <book language="Chinese">C++从入门到精通</book>
            <book language="Chinese">C++程序设计语言</book>
        </book><book Name="Python">
            <book language="Chinese">Python从入门到精通</book>
        </book><book Name="Linux">
            <book language="Chinese">Linux从入门到精通</book>
        </book></BOOK>
View Code

 

readxml.py

#!/usr/bin/env python
# _*_ coding: UTF-8 _*_
"""=================================================
@Project -> File    : autocreatexml -> readxml.py
@IDE     : PyCharm
@Author  : zihan
@Date    : 2021/10/15 13:28
@Desc    :
================================================="""
import xml.dom.minidom
import os


def readxml():
    if not os.path.exists("book.xml"):  # 判断XML存不存在
        return
    try:
        book_obj = xml.dom.minidom.parse("book.xml")  # 判断XML格式是否正确
    except:
        print("book.xml format wrong")
        return
    root_node = book_obj.documentElement  # 获取根节点
    print(root_node.nodeName)  # 打印节点名称
    if root_node.hasAttribute("Description"):  # 判断节点是否存在属性"Description"
        print(root_node.getAttribute("Description"))  # 打印节点属性"Description"的值

    second_nodes = root_node.childNodes  # 获取根节点下的子节点,但是得到的列表除了三个节点对象以外还包括了文本(其实就是XML中的空行)
    print(len(second_nodes))  # 结果是7,如果将book.xml中的格式换行去掉,那么结果是3
    for second_node in second_nodes:
        print(second_node.nodeName, end="\t")  # 打印节点名称[text, book, text, book, text, book, text]
    print("")

    print(root_node.firstChild)  # 获取根节点下第一个子节点,如果XML的格式是节点之间有空行的话,那么第一个子节点是文本节点
    print(root_node.lastChild)  # 获取根节点下最后一个子节点,如果XML的格式是节点之间有空行的话,那么最后一个子节点是文本节点

    book_nodes = root_node.getElementsByTagName("book")  # 获取根节点下所有book节点(注意包含子节点及其下所有book节点)
    print(len(book_nodes))  # 结果是7,获取的root下所有的book节点
    for book_node in book_nodes:
        print(book_node.nodeName)
        if book_node.hasAttribute("language"):
            print(book_node.childNodes[0].data, end="\t")  # 打印节点value值
            print(book_node.childNodes[0].nodeValue)  # 打印节点value值


def main():
    readxml()


if __name__ == '__main__':
    main()
View Code

 

如果读的是book.xml,那么输出结果如下:

BOOK
This is a book store
7
#text	book	#text	book	#text	book	#text	
<DOM Text node "'\n\t\t'">
<DOM Text node "'\n'">
7
book
book
C++从入门到精通	C++从入门到精通
book
C++程序设计语言	C++程序设计语言
book
book
Python从入门到精通	Python从入门到精通
book
book
Linux从入门到精通	Linux从入门到精通
View Code

 

如果读的是book1.xml,那么输出结果如下:

BOOK
This is a book store
3
book	book	book	
<DOM Element: book at 0x4231670>
<DOM Element: book at 0x4231800>
7
book
book
C++从入门到精通	C++从入门到精通
book
C++程序设计语言	C++程序设计语言
book
book
Python从入门到精通	Python从入门到精通
book
book
Linux从入门到精通	Linux从入门到精通
View Code

 

b. 用法介绍:

1. 获取文本对象

2. 获取根节点

3. 获取根节点名称

4. 判断节点属性是否存在

5. 获取节点属性的值

6. 根据节点名获取其下所有节点对象。(这里是指的是所有book节点,并不单是子节点的book)

7. 获取当前节点下的子节点。需要注意xml的格式问题,如果节点之间有空行,是会被算作一个文本节点的。(book.xml中根节点下的子节点间就是有空行的,book1.xml中根节点下的子节点间就是没有空行的)

8. 获取当前节点的第一个子节点

9. 获取当前节点的最后一个子节点

10. 获取节点的value值

 或者

 

 

三、用xml.etree.ElementTree写XML

a. 写xml使用范例:

writexml.py

#!/usr/bin/env python
# _*_ coding: UTF-8 _*_
"""=================================================
@Project -> File    : writexml.py
@IDE     : PyCharm
@Author  : zihan
@Date    : 2021/10/16 9:36
@Desc    :
================================================="""
from xml.etree import ElementTree as ET


def writexml_way1():
    root_dic = {
        "Description": "This is a book store",
        "Name": "Happy Home"
    }
    root_node = ET.Element('BOOK', root_dic)  # 创建根节点
    book1 = ET.SubElement(root_node, 'book', {'Name': 'C++'})  # 创建根节点下子节点
    book2 = ET.SubElement(root_node, 'book', {'Name': 'Python'})  # 创建根节点下子节点
    book3 = ET.SubElement(root_node, 'book', {'Name': 'Linux'})  # 创建根节点下子节点

    book1_1 = ET.SubElement(book1, 'book', {'language': 'Chinese'})  # 创建book1节点下子节点
    book1_1.text = "C++从入门到精通"  # 设置book1_1节点的value值
    book1_2 = ET.SubElement(book1, 'book', {'language': 'Chinese'})  # 创建book1节点下子节点
    book1_2.text = "C++程序设计语言"  # 设置book1_2节点的value值

    book2_1 = ET.SubElement(book2, 'book', {'language': 'Chinese'})  # 创建book2节点下子节点
    book2_1.text = "Python从入门到精通"  # 设置book2_1节点的value值

    book3_1 = ET.SubElement(book3, 'book', {'language': 'Chinese'})  # 创建book3节点下子节点
    book3_1.text = "Linux从入门到精通"  # 设置book3_1节点的value值

    tree = ET.ElementTree(root_node)  # 生成文档对象
    # xml_declaration参数是true,生成xml中会有xml格式声明;如果参数是False,那么生成的xml中直接从根节点开始
    tree.write('book3.xml', encoding="utf-8", xml_declaration=True, short_empty_elements=False)


def writexml_way2():
    root_dic = {
        "Description": "This is a book store",
        "Name": "Happy Home"
    }
    root_node = ET.Element("BOOK", root_dic)  # 创建根节点

    book1 = root_node.makeelement('book', {'Name': 'C++'})
    book2 = root_node.makeelement('book', {'Name': 'Python'})
    book3 = root_node.makeelement('book', {'Name': 'Linux'})

    book1_1 = book1.makeelement('book', {'language': 'Chinese'})
    book1_1.text = "C++从入门到精通"  # 设置book1_1节点的value值
    book1_2 = book1.makeelement('book', {'language': 'Chinese'})
    book1_2.text = "C++程序设计语言"  # 设置book1_2节点的value值

    book2_1 = book2.makeelement('book', {'language': 'Chinese'})
    book2_1.text = "Python从入门到精通"  # 设置book2_1节点的value值

    book3_1 = book3.makeelement('book', {'language': 'Chinese'})
    book3_1.text = "Linux从入门到精通"  # 设置book3_1节点的value值

    book1.append(book1_1)
    book1.append(book1_2)
    book2.append(book2_1)
    book3.append(book3_1)

    root_node.append(book1)
    root_node.append(book2)
    root_node.append(book3)

    tree = ET.ElementTree(root_node)
    tree.write('book4.xml', encoding="utf-8", xml_declaration=True, short_empty_elements=False)


def main():
    writexml_way1()
    # writexml_way2()


if __name__ == '__main__':
    main()
View Code

 

book3.xml

<?xml version='1.0' encoding='utf-8'?>
<BOOK Description="This is a book store" Name="Happy Home"><book Name="C++"><book language="Chinese">C++从入门到精通</book><book language="Chinese">C++程序设计语言</book></book><book Name="Python"><book language="Chinese">Python从入门到精通</book></book><book Name="Linux"><book language="Chinese">Linux从入门到精通</book></book></BOOK>
View Code

 

book4.xml

<?xml version='1.0' encoding='utf-8'?>
<BOOK Description="This is a book store" Name="Happy Home"><book Name="C++"><book language="Chinese">C++从入门到精通</book><book language="Chinese">C++程序设计语言</book></book><book Name="Python"><book language="Chinese">Python从入门到精通</book></book><book Name="Linux"><book language="Chinese">Linux从入门到精通</book></book></BOOK>
View Code

 

结果可以看出ET和minidom生成XML的格式是不同的,用ET生成的xml节点之间是不包含空行的,用minidom生成的xml格式会更加的美观,包含的空行

b. 用法介绍:

1. 创建根节点及其属性

2. 创建子节点及其属性

或者

 

3. 设置节点文本信息

 

 

 4. 生成xml

 

 

四、用xml.etree.ElementTree读XML

a. 读xml使用范例:

book.xml

<?xml version="1.0" encoding="utf-8"?>
    <BOOK Description="This is a book store" Name="Happy Home">
        <book Name="C++">
            <book aaa="1">
                <book language="Chinese">C++从入门到精通</book>
                <book language="Chinese">C++程序设计语言</book>
            </book>
        </book>
        <book Name="Python">
            <book  aaa="2">
                <book language="Chinese">Python从入门到精通</book>
            </book>
        </book>
        <book Name="Linux">
            <book aaa="3">
                <book language="Chinese">Linux从入门到精通</book>
            </book>
        </book>
</BOOK>
View Code

 

readxml.py

#!/usr/bin/env python
# _*_ coding: UTF-8 _*_
"""=================================================
@Project -> File    : readxml.py
@IDE     : PyCharm
@Author  : zihan
@Date    : 2021/10/16 9:36
@Desc    :
================================================="""
from xml.etree import ElementTree as ET


def readxml():
    tree = ET.parse('book.xml')
    root = tree.getroot()
    root_tag = root.tag  # 获取root节点的标签BOOK
    print(root_tag)
    root_attrib_dic = root.attrib  # 获取root节点的属性字典{'Description': 'This is a book store', 'Name': 'Happy Home'}
    print(root_attrib_dic)
    root_text = root.text  # 获取root节点的值,如果是结构清晰的XML格式,则这个值可能是一个空行+tab长空格,如果是紧凑型XML,则root节点值为None
    print(root_text)

    print("遍历子节点方法:")
    for root_child in root:  # 遍历root的子节点
        print("\t", root_child.tag, root_child.attrib)  # 打印root子节点标签和属性字典
        for root_child_child in root_child:
            print("\t\t", root_child_child.tag, root_child_child.attrib)
            for root_child_child_child in root_child_child:
                print("\t\t\t", root_child_child_child.tag, root_child_child_child.attrib, root_child_child_child.text)

    print("遍历特定标签子节点方法:")
    for root_child_1 in root.findall('book'):  # 遍历root的子节点并且标签名是book
        print("\t", root_child_1.tag, root_child_1.attrib)  # 打印root下book子节点标签和属性字典

    print("用相对路径遍历特定标签子节点方法:")
    for root_child_1 in root.findall('./book/book'):  # 遍历root的book子节点下的子节点并且标签名是book
        print("\t", root_child_1.tag, root_child_1.attrib)  # 打印root下book子节点的book子节点标签和属性字典

    print("用XPath定位元素遍历所有特定标签节点方法:")
    for root_child_2 in root.findall('.//book/book'):  # 遍历root所有的孩子和孩子的孩子等并且标签名是book
        print("\t", root_child_2.tag, root_child_2.attrib)  # 打印root下所有book节点的节点名和属性列表


def writexml_way1():
    root_dic = {
        "Description": "This is a book store",
        "Name": "Happy Home"
    }
    root_node = ET.Element('BOOK', root_dic)  # 创建根节点
    book1 = ET.SubElement(root_node, 'book', {'Name': 'C++'})  # 创建根节点下子节点
    book2 = ET.SubElement(root_node, 'book', {'Name': 'Python'})  # 创建根节点下子节点
    book3 = ET.SubElement(root_node, 'book', {'Name': 'Linux'})  # 创建根节点下子节点

    book1_1 = ET.SubElement(book1, 'book', {'language': 'Chinese'})  # 创建book1节点下子节点
    book1_1.text = "C++从入门到精通"  # 设置book1_1节点的value值
    book1_2 = ET.SubElement(book1, 'book', {'language': 'Chinese'})  # 创建book1节点下子节点
    book1_2.text = "C++程序设计语言"  # 设置book1_2节点的value值

    book2_1 = ET.SubElement(book2, 'book', {'language': 'Chinese'})  # 创建book2节点下子节点
    book2_1.text = "Python从入门到精通"  # 设置book2_1节点的value值

    book3_1 = ET.SubElement(book3, 'book', {'language': 'Chinese'})  # 创建book3节点下子节点
    book3_1.text = "Linux从入门到精通"  # 设置book3_1节点的value值

    tree = ET.ElementTree(root_node)  # 生成文档对象
    # xml_declaration参数是true,生成xml中会有xml格式声明;如果参数是False,那么生成的xml中直接从根节点开始
    tree.write('book3.xml', encoding="utf-8", xml_declaration=True, short_empty_elements=False)


def writexml_way2():
    root_dic = {
        "Description": "This is a book store",
        "Name": "Happy Home"
    }
    root_node = ET.Element("BOOK", root_dic)  # 创建根节点

    book1 = root_node.makeelement('book', {'Name': 'C++'})
    book2 = root_node.makeelement('book', {'Name': 'Python'})
    book3 = root_node.makeelement('book', {'Name': 'Linux'})

    book1_1 = book1.makeelement('book', {'language': 'Chinese'})
    book1_1.text = "C++从入门到精通"  # 设置book1_1节点的value值
    book1_2 = book1.makeelement('book', {'language': 'Chinese'})
    book1_2.text = "C++程序设计语言"  # 设置book1_2节点的value值

    book2_1 = book2.makeelement('book', {'language': 'Chinese'})
    book2_1.text = "Python从入门到精通"  # 设置book2_1节点的value值

    book3_1 = book3.makeelement('book', {'language': 'Chinese'})
    book3_1.text = "Linux从入门到精通"  # 设置book3_1节点的value值

    book1.append(book1_1)
    book1.append(book1_2)
    book2.append(book2_1)
    book3.append(book3_1)

    root_node.append(book1)
    root_node.append(book2)
    root_node.append(book3)

    tree = ET.ElementTree(root_node)
    tree.write('book4.xml', encoding="utf-8", xml_declaration=True, short_empty_elements=False)


def main():
    # writexml_way1()
    # writexml_way2()
    readxml()


if __name__ == '__main__':
    main()
View Code

 

输出结果:

BOOK
{'Description': 'This is a book store', 'Name': 'Happy Home'}

		
遍历子节点方法:
	 book {'Name': 'C++'}
		 book {'aaa': '1'}
			 book {'language': 'Chinese'} C++从入门到精通
			 book {'language': 'Chinese'} C++程序设计语言
	 book {'Name': 'Python'}
		 book {'aaa': '2'}
			 book {'language': 'Chinese'} Python从入门到精通
	 book {'Name': 'Linux'}
		 book {'aaa': '3'}
			 book {'language': 'Chinese'} Linux从入门到精通
遍历特定标签子节点方法:
	 book {'Name': 'C++'}
	 book {'Name': 'Python'}
	 book {'Name': 'Linux'}
用相对路径遍历特定标签子节点方法:
	 book {'aaa': '1'}
	 book {'aaa': '2'}
	 book {'aaa': '3'}
用XPath定位元素遍历所有特定标签节点方法:
	 book {'aaa': '1'}
	 book {'language': 'Chinese'}
	 book {'language': 'Chinese'}
	 book {'aaa': '2'}
	 book {'language': 'Chinese'}
	 book {'aaa': '3'}
	 book {'language': 'Chinese'}
View Code

 

 

b. 用法介绍

1. 读取文本对象

 

 2. 获取根节点

 

 3. 获取节点标签

 

 4. 获取节点属性字典

 

 5. 获取节点的值,这个地方要注意不同XML格式获取到的值不同

 

 6. 获取子节点

 

 7. 获取特定标签子节点

 

 8. 获取所有特定标签的节点

 

 

未完待续。。。

 

posted @ 2019-12-10 10:38  o云淡风轻o  阅读(2618)  评论(0编辑  收藏  举报