Python实现考拉海购数据采集
前言
环境介绍
python 3.8
pycharm 2021专业版
requests >>> pip install requests
parsel >>> pip install parsel
代码实现步骤:
发送请求 >>> 获取数据 >>> 解析数据 >>> 保存数据
对于本篇文章有疑问的同学可以加【资料白嫖、解答交流群:910981974】
代码
发送请求
headers = { 'cookie': 'kaola_user_key=b640efcb-cc0c-4892-9e58-c506543c3b83; JSESSIONID-WKL-8IO=pOH3hziWLSemIOWCMgbpwlepWeb7nVy7uhD%2BoduXLE9%5C%2FnMD7uu%2F%2B9as9RQVWdcPbOe%2FDQfxKTvAV5j0IdlOU7HNtia8TVLdgNfm2PJUlkztL5xj3h0bvWHSx%2FDiq%5COpmVmWLNbrW3MsNkZU85%5CJ%2BaAB5bpCK92UJyxapJIg1jTmW1O4%3A1646288721219; _klhtxd_=31; cna=diVbGqmG/CYCAa8APXP4VXR9; __da_ntes_utma=2525167.670118404.1646202322.1646202322.1646202322.1; davisit=1; __da_ntes_utmz=2525167.1646202322.1.1.utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none); __da_ntes_utmfc=utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none); xlly_s=1; _samesite_flag_=true; cookie2=1ad0e474c299ec6653e7c5bdbdb00778; t=5da53c986013272dd516019bc902aeec; _tb_token_=eb68509f646be; csg=8c843e87; NTES_OSESS=140147a12e6547538e725cc81b4a63d8; KAOLA_USER_ID=109999078967764758; KAOLA_MAIN_ACCOUNT=16458731261947577@pvkaola.163.com; unb=2213306950380; kaola_csg=287e094b; kaola-user-beta-traffic=15818434647; firstLogin=0; ucn=center; KAOLA_USER_ID.sig=JApGPboS22_xHs24DTHRstXn6LVy3Y0c5tc7qcINN_o; KAOLA_NEW_USER_COOKIE=yes; hb_MA-AE38-1FCC6CD7201B_source=search.kaola.com; __da_ntes_utmb=2525167.1.10.1646205828; NTES_KAOLA_RV=10719774; x5sec=7b227761676272696467652d616c69626162612d67726f75703b32223a226639386134393365663538623963316130636139626433633365363465613165434f4f342f4a4147454c5879774f4c436b5a544b43786f504d6a49784d7a4d774e6a6b314d444d344d4473784d49503437616e392f2f2f2f2f77453d227d; isg=BJSUQ8GMili3Wh6U07ej7bIjZdIG7bjX3X5K_C50d569GTVjVv_YZ6ILH1Ek-vAv; l=eBgnRVORLE91D_9SBOfanurza77OSIRYYuPzaNbMiOCPOBfB51UNW6DngL86C3GVh6zXR3yZPcG9BeYBq7VonxvtDGO5MsHmn; tfstk=cvm1BAZXM1f6pfYVbA9ebVYDFeEAwKX7hFNiCq-_msOzf71D8gPW_L0cMoyYd', 'referer': 'https://www.kaola.com/', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36', } url = 'https://search.kaola.com/search.html?key=%25E6%2589%258B%25E6%259C%25BA&searchRefer=searchbutton&zn=top' response = requests.get(url, headers=headers)
2. 获取数据
html_data = response.text
3. 解析数据
selector = parsel.Selector(html_data)
goods = selector.css('.goodswrap.promotion')
for good in goods:
# ::text : 获取标签文本内容 <div>abcdefg.....</div>
# ::attr(属性名称)
blackCardPrice = good.css('.blackCardPrice::text').get() # 黑卡价格
bigPrice = good.css('.bigPrice::text').get() # 正常价格
grayPrice = good.css('.grayPrice.deprecated::text').get() # 原价
title = good.css('h2::text').get().strip() # 商品名称
comment_num = good.css('.comments::text').get().strip() # 商品名称
address = good.css('.proPlace.ellipsis::text').get().strip() # 地点
selfflag = good.css('.selfflag::text').get().strip() # 店铺
link = 'https:' + good.css('a::attr(href)').get() # 链接
print(title, blackCardPrice, bigPrice, grayPrice, comment_num, address, selfflag, link)
4. 保存数据
with open('考拉海购.csv', mode='a', encoding='utf-8', newline='') as f: csv_writer = csv.writer(f) csv_writer.writerow([title, blackCardPrice, bigPrice, grayPrice, comment_num, address, selfflag, link])