一、通过路径获取数据
from lxml import etree
xml = """
<book>
<id>1</id>
<name>zhao</name>
<price>1.23</price>
<author>
<nick>赵</nick>
<nick>钱</nick>
<nick>孙</nick>
<nick>李</nick>
<div>
<nick>周</nick>
</div>
<span>
<nick>吴</nick>
<div>
<nick>郑</nick>
</div>
</span>
</author>
</book>
"""
tree = etree.XML(xml)
result = tree.xpath("/book/name/text()")
result = tree.xpath("/book/author/nick/text()")
result = tree.xpath("/book/author/div/nick/text()")
result = tree.xpath("/book/author//nick/text()")
result = tree.xpath("/book/author/*/nick/text()")
print(result)
二、通过属性获取想要的数据
1、
from lxml import etree
xml = """
<book>
<id>1</id>
<name>zhao</name>
<price>1.23</price>
<author>
<ul>
<li><a href="www.baidu.com">赵</a></li>
<li><a href="www.sougou.com">钱</a></li>
<li><a href="www.guge.com">孙</a></li>
</ul>
<ol>
<li><a href="li">李</a></li>
<li><a href="zhou">周</a></li>
<li><a href="wu">吴</a></li>
</ol>
<div class='zheng'>郑</div>
</author>
</book>
"""
tree = etree.XML(xml)
result = tree.xpath("/book/author/ul/li/a/text()")
print(result)
2、
result = tree.xpath("/book/author/ul/li[1]/a/text()")
3、
result = tree.xpath("/book/author/ul/li/a[@href='zhao']/text()")
4、
ol_li_list = tree.xpath("/book/author/ol/li")
for i in ol_li_list :
# 在li中继续寻找,相对查找
result = i.xpath("./a/text()")
# 拿到属性值
#result = i.xpath("./a/@href")
print(result)
5、
print(tree.xpath("/book/author/ul/li/a/@href"))