DAY-04
爬取豌豆荚多页app数据
import requests import re from bs4 import BeautifulSoup # 1.发送请求 def get_page(url): response = requests.get(url) return response # 2.解析数据 def parse_index(code): app_info = code.find_all(name='li', class_='card') return app_info # 3.保存数据 def save_data(app): app_name = app.h2.a.text detail_url = app.h2.a.attrs['href'] download_num = app.find(class_='install-count').text app_size = app.find(name='span',attrs={"title":re.compile('\d+MB')}).text data = f""" app名称:{app_name} 详情页url:{detail_url} 下载人数:{download_num} app大小:{app_size} \n """ print(data) with open('豌豆荚.txt','a',encoding='utf-8') as f: f.write(data) f.flush() # 写一次刷新 if __name__ == '__main__': for line in range(1,31): url = f'https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page={line}&ctoken=pWcDYXidvX0QXPcolC5uu3T2' index_response = get_page(url) # print(url) data = index_response.json() text = data.get('data').get('content') app_list = parse_index(text) for app in app_list: save_data(app)

浙公网安备 33010602011771号