大众点评商家爬取

对显示搜索结果的网址发送请求

import requests

url = 'https://www.dianping.com/search/keyword/150/0_%E6%84%8F%E9%9D%A2'

headers = {
    'Cookie':'baiduappugc_ab=ugcdetail%3AA%3A1; _lxsdk_cuid=18fee40b7a3c8-005f5aa16f3f6f-26001c51-144000-18fee40b7a37e; _lxsdk=18fee40b7a3c8-005f5aa16f3f6f-26001c51-144000-18fee40b7a37e; _hc.v=dad10692-6e0c-4402-5850-9256da170739.1717689171; fspop=test; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1719247152; s_ViewType=10; WEBDFPID=69y55w298vw15vvy1zw2734zu370z9288095y8v097u97958v3u94896-2034607213206-1719247213206OOSGICKfd79fef3d01d5e9aadc18ccd4d0c95072558; qruuid=e15e73da-9b7e-47ba-860c-22c7789f6580; dper=02023625f81edb23ecbb0420185188f1dde6080fbba05896da553ead0fc74a16e8fc188e79d114ca9c696820d09910f5ea932f09015d590fc94500000000fb200000f08fa513af0aa991e42172c624e809f0f396f2a8ab764fe6daa2d1c7baecdb3df371a116ac704b96c72a511bba45c3ca; ll=7fd06e815b796be3df069dec7836c3df; cy=150; cye=jining; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1719247398; _lxsdk_s=1904b1da218-9f6-f23-939%7C%7C135',
    'Host':'www.dianping.com',
    # 防盗链 告诉浏览器请求的url地址从哪里来
    'Referer':'https://www.dianping.com/search/keyword/150/0_%E6%84%8F%E9%9D%A2',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
}

response = requests.get(url, headers=headers)
# print(response.text)

解析出店家详情页信息:

import parsel

# 解析数据
selector1 = parsel.Selector(response.text)
# 通过css选择获取所有详情页url地址
href = selector1.css('.shop-list ul li .pic a::attr(href)').getall()

遍历详情页列表,提取店家详情信息:

for index in href:
    # 对详情页发送请求
    html_data = requests.get(url=index, headers=headers)
    # 解析数据
    selector2 = parsel.Selector(html_data.text)
    name = selector2.xpath('//*[@id="basic-info"]/h1/text()').get()
    # score = selector2.xpath('//*[@id="basic-info"]/div[1]/div/div[2]').getall()

    print(name)

 

posted @ 2024-06-25 02:22  JJJhr  阅读(74)  评论(0编辑  收藏  举报