Python实现考拉海购数据采集

前言

环境介绍

python 3.8
pycharm 2021专业版
requests >>> pip install requests
parsel >>> pip install parsel

代码实现步骤:

发送请求 >>> 获取数据 >>> 解析数据 >>> 保存数据

对于本篇文章有疑问的同学可以加【资料白嫖、解答交流群:910981974】

代码

发送请求

headers = {
    'cookie': 'kaola_user_key=b640efcb-cc0c-4892-9e58-c506543c3b83; JSESSIONID-WKL-8IO=pOH3hziWLSemIOWCMgbpwlepWeb7nVy7uhD%2BoduXLE9%5C%2FnMD7uu%2F%2B9as9RQVWdcPbOe%2FDQfxKTvAV5j0IdlOU7HNtia8TVLdgNfm2PJUlkztL5xj3h0bvWHSx%2FDiq%5COpmVmWLNbrW3MsNkZU85%5CJ%2BaAB5bpCK92UJyxapJIg1jTmW1O4%3A1646288721219; _klhtxd_=31; cna=diVbGqmG/CYCAa8APXP4VXR9; __da_ntes_utma=2525167.670118404.1646202322.1646202322.1646202322.1; davisit=1; __da_ntes_utmz=2525167.1646202322.1.1.utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none); __da_ntes_utmfc=utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none); xlly_s=1; _samesite_flag_=true; cookie2=1ad0e474c299ec6653e7c5bdbdb00778; t=5da53c986013272dd516019bc902aeec; _tb_token_=eb68509f646be; csg=8c843e87; NTES_OSESS=140147a12e6547538e725cc81b4a63d8; KAOLA_USER_ID=109999078967764758; KAOLA_MAIN_ACCOUNT=16458731261947577@pvkaola.163.com; unb=2213306950380; kaola_csg=287e094b; kaola-user-beta-traffic=15818434647; firstLogin=0; ucn=center; KAOLA_USER_ID.sig=JApGPboS22_xHs24DTHRstXn6LVy3Y0c5tc7qcINN_o; KAOLA_NEW_USER_COOKIE=yes; hb_MA-AE38-1FCC6CD7201B_source=search.kaola.com; __da_ntes_utmb=2525167.1.10.1646205828; NTES_KAOLA_RV=10719774; x5sec=7b227761676272696467652d616c69626162612d67726f75703b32223a226639386134393365663538623963316130636139626433633365363465613165434f4f342f4a4147454c5879774f4c436b5a544b43786f504d6a49784d7a4d774e6a6b314d444d344d4473784d49503437616e392f2f2f2f2f77453d227d; isg=BJSUQ8GMili3Wh6U07ej7bIjZdIG7bjX3X5K_C50d569GTVjVv_YZ6ILH1Ek-vAv; l=eBgnRVORLE91D_9SBOfanurza77OSIRYYuPzaNbMiOCPOBfB51UNW6DngL86C3GVh6zXR3yZPcG9BeYBq7VonxvtDGO5MsHmn; tfstk=cvm1BAZXM1f6pfYVbA9ebVYDFeEAwKX7hFNiCq-_msOzf71D8gPW_L0cMoyYd',
    'referer': 'https://www.kaola.com/',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
}

url = 'https://search.kaola.com/search.html?key=%25E6%2589%258B%25E6%259C%25BA&searchRefer=searchbutton&zn=top'

response = requests.get(url, headers=headers)

 

2. 获取数据

html_data = response.text

 

3. 解析数据

selector = parsel.Selector(html_data)
goods = selector.css('.goodswrap.promotion')
for good in goods:
    # ::text : 获取标签文本内容 <div>abcdefg.....</div>
    # ::attr(属性名称)
    blackCardPrice = good.css('.blackCardPrice::text').get()    # 黑卡价格
    bigPrice = good.css('.bigPrice::text').get()    # 正常价格
    grayPrice = good.css('.grayPrice.deprecated::text').get()   # 原价
    title = good.css('h2::text').get().strip()  # 商品名称
    comment_num = good.css('.comments::text').get().strip()  # 商品名称
    address = good.css('.proPlace.ellipsis::text').get().strip()    # 地点
    selfflag = good.css('.selfflag::text').get().strip()    # 店铺
    link = 'https:' + good.css('a::attr(href)').get()    # 链接
    print(title, blackCardPrice, bigPrice, grayPrice, comment_num, address, selfflag, link)

 

4. 保存数据

with open('考拉海购.csv', mode='a', encoding='utf-8', newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow([title, blackCardPrice, bigPrice, grayPrice, comment_num, address, selfflag, link])

 

 

 

 

 

 

posted @ 2022-03-12 18:44  松鼠爱吃饼干  阅读(278)  评论(0编辑  收藏  举报
Title