etree-xpath

import requests
from lxml import etree
url = 'https://item.taobao.com/item.htm?spm=a219r.lm893.14.118.238e8d532nCpy9&id=537796877521&ns=1&abbucket=1'

response = requests.get(url)
html = etree.HTML(response.text)
# 1. 定位属性为某值的标签
sel_1 = html.xpath('//div[contains(@class,"tb-main-title")]]')
# 2. 定位属性值是以某值开始的标签
sel_2 = html.xpath('//h3[starts-with(@class,"tb-main-title")]')
# 3. 定位含有某文本的标签
html.xpath('//h3[contains(text(),"小白")]')
# 4. 定位文本为特定值的标签
html.xpath('//span[text()="价格"]')[0].text
posted @ 2017-10-31 01:21  人微言轻1  阅读(1622)  评论(0编辑  收藏  举报