python xpath基础 02

from lxml import etree

html = etree.parse('./test.html', etree.HTMLParser())## #test.html是html文件,etree.HTMLParser(),解析器
# result = html.xpath('//li')#选取所有的li节点,是一个列表的形式
# print(result)
# print(result[0])
#
#
# #输出结果
# '''
# [<Element li at 0x119b71b88>, <Element li at 0x119b71bc8>, <Element li at 0x119b71c08>, <Element li at 0x119b71c48>, <Element li at 0x119b71c88>]
# <Element li at 0x119b71b88>
#
# '''

# result = html.xpath('//li[@class="item-0"]')
#
# '''
# 选取当前文档所有属性
#
# class的值等于item-0的li标签内容的列表
#
# '''
# print(result)
# '''
# 输出结果:
# [<Element li at 0x1162f0d08>, <Element li at 0x1162f0d48>]
# '''
# result = html.xpath('//li[@class="item-0"]/text()')
# '''
# 选取当前文档所有属性
#
# class的值等于item-0的li标签里面的文本内容
#
# '''
# print(result)
#
# '''
# 输出结果:
# ['\n     ']
# '''
# result = html.xpath('//li[@class="item-0"]/a/text()')
# '''
# 选取当前文档所有属性
#
# class的值等于item-0的li标签里面的a标签里面的文本内容
#
# '''
# print(result)
#
# '''
# 输出结果:
# ['first item', 'fifth item']
# '''
# result = html.xpath('//li[@class="item-0"]//text()')
# print(result)
# '''
# 输出结果是:['first item', 'fifth item', '\n     ']
#
# '''
# result = html.xpath('//li/a/@href')
# print(result)
# '''
# 输出结果:['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html']
#
# '''
# result = html.xpath('//a[@href="link4.html"]/../@class')

# print(result)
# '''
# 输出结果:['item-1']
#
# '''
# result = html.xpath('//a[@href="link4.html"]/parent::*/@class')
# print(result)
# '''
# 输出结果:
# ['item-1']
#
# '''
#
# result=html.xpath('//li/@class')
# print(result)
# '''
# 输出结果:['item-0', 'item-1', 'item-inactive', 'item-1', 'item-0']
#
# '''
# result = html.xpath('//li/a')
# print(result)
# '''
# 输出结果:
# <Element a at 0x113e35c88>, <Element a at 0x113e35cc8>,
#  <Element a at 0x113e35d08>, <Element a at 0x113e35d48>, <Element a at 0x113e35d88>]
# '''
# result = html.xpath('//ul//a')
# print(result)
# '''
# 输出结果:[<Element a at 0x117874c88>, <Element a at 0x117874cc8>, <Element a at 0x117874d08>,
# <Element a at 0x117874d48>, <Element a at 0x117874d88>]
#
# '''
# result = html.xpath('//ul/a')
# print(result)
# '''
# 输出结果:[]
# '''

 

posted @ 2018-12-25 20:28  青春叛逆者  阅读(176)  评论(0编辑  收藏  举报