xpath解析

from lxml import etree
# 获取本地文件
tree = etree.parse('bendi.html')
print(tree)
# /表示子元素,//表示子孙后代元素
li = tree.xpath('//body/ul/li')
print(li)
print(len(li))

  

# 获取有id的li
liid = tree.xpath('//body/ul/li[@id]/text()')
for i in liid:
    print(i)

  

# 获取id为bj的li
libj = tree.xpath('//body/ul/li[@id="bj"]/text()')
print(libj)

  获取属性;

属性查询
//@class
 
获取百度一下:
url = 'http://www.baidu.com'
response = urllib.request.urlopen(url)
content = response.read().decode('utf-8')

tree1 = etree.HTML(content)
val = tree1.xpath('//input[@id="su"]//@value')
print(val[0])

  

posted @ 2023-10-01 00:49  sgj191024  阅读(5)  评论(0编辑  收藏  举报