爬虫实践05 | 爬取参展公司信息
完整代码:
#2023-08-09 这个筛选了Apparel的代码 import requests import json import pandas as pd import time data=[] for i in range(1, 11): #从第1页开始取,取到第10页 url = f'https://api.balluun.com/search?page={i}&per_page=48' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36', 'Balluun-Client-Id': '516332e69e7a9d219ac62c42046feae522a1', 'Balluun-Domain-Id': '60332E414BC8F8D1D068FB422E0329BA' } payload ={"advanced":True,"type":"brand","filters":{"customfield":{"7901":["Apparel"],"15138":["China"]},"only_brand":True,"locale":["en-us","en-US"],"subscription":[2601,2602]},"sort":[],"locale":"en-US"} res = requests.post(url=url, json=payload, headers=headers).json() if 'search_results' in res: time.sleep(2) #time 放在这里,相当于每次跑完48条数据48/页,休息1秒,然后跑下一页,第二页 for item in res['search_results']['hits']: for customfield in item['customfields']: if customfield['customfield_id'] == 7901: customfield_value = customfield['customfield_value'] break name = item['name'] #获取字典的方式也可以用name=item.get('name') if len(item['company_address']) > 0: # 检查是否有地址信息 city = item['company_address'][0]['city'] else: city = "N/A" # 如果没有城市信息,将其设为 "N/A" if len(item['company_address']) > 0: # 检查是否有地址信息 state = item['company_address'][0]['state'] else: state = "N/A" # 如果没有城市信息,将其设为 "N/A" if len(item['company_address']) > 0: # 检查是否有地址信息 address1 = item['company_address'][0]['address1'] else: address1 = "N/A" # 如果没有城市信息,将其设为 "N/A" #print(name,city,state,address1,customfield_value) data.append([name,state,address1,customfield_value,city]) df = pd.DataFrame(data, columns=['Name', 'State', 'address1', 'Custom Field Value','city']) df.to_excel('data8.xlsx', index=False) print("数据已保存到 data8.xlsx 文件中。")
分类:
Python爬虫实践
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!