python爬虫案列05:爬取猪八戒网站商品
import requests
from lxml import etree
url = 'https://shenzhen.zbj.com/search/service/?l=0&kw=saas&r=2'
response = requests.get(url).text # 请求网址,并以text形式解析
html = etree.HTML(response) # etree解析,方便定位
a = "SaaS" # join()方法的连接符
divs = html.xpath('//*[@id="__layout"]/div/div[3]/div/div[4]/div/div[2]/div[1]/div') # 定位内容所在标签
for div in divs: # 拿到里面的具体内容
function = a.join(div.xpath('./div/div[2]/div[2]/a/text()'))
price = div.xpath('./div/div[3]/div[1]/span/text() | ./div/div[2]/div[1]/span/text()') # 价格
company = div.xpath('./div/a/div[2]/div[1]/div/text()') # 公司
if len(price) == 2: # 判断价格是否存在
del price[1]
else:
print(price[0])
print(company, function, price)