python爬虫案列05:爬取猪八戒网站商品

import requests
from lxml import etree

url = 'https://shenzhen.zbj.com/search/service/?l=0&kw=saas&r=2'
response = requests.get(url).text  # 请求网址,并以text形式解析
html = etree.HTML(response)  # etree解析,方便定位
a = "SaaS"  # join()方法的连接符

divs = html.xpath('//*[@id="__layout"]/div/div[3]/div/div[4]/div/div[2]/div[1]/div')  # 定位内容所在标签
for div in divs: # 拿到里面的具体内容
    function = a.join(div.xpath('./div/div[2]/div[2]/a/text()'))
    price = div.xpath('./div/div[3]/div[1]/span/text() | ./div/div[2]/div[1]/span/text()')  # 价格
    company = div.xpath('./div/a/div[2]/div[1]/div/text()')  # 公司
    if len(price) == 2:  # 判断价格是否存在
        del price[1]
    else:
        print(price[0])
    print(company, function, price)

 

posted @ 2023-03-14 11:57  shuxi_520  阅读(71)  评论(0编辑  收藏  举报