豆瓣爬取
import requests list=[] def ua(start=0): cookies = { 'll': '"118151"', 'bid': 'JGmehAcUHh0', '_pk_ref.100001.4cf6': '%5B%22%22%2C%22%22%2C1649677087%2C%22https%3A%2F%2Fwww.so.com%2Flink%3Fm%3DbKlGHc6Gj2HNHpFhiczqj0pxzXDgSxFIGqlsiiVQfznXlhKy6bV9w1ve1dZSgHukqTjat9m2%252F0KJ%252BA%252FSJ5E62LWhZBIOHNiFPr%252F%252FzL1ajZBrgf%252BiElJ7to9MV2Pi0QSN0PG37FfMVY6sXTMH4tFMR8g%253D%253D%22%5D', '_pk_ses.100001.4cf6': '*', 'ap_v': '0,6.0' } headers = { 'Connection': 'keep-alive', 'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Dest': 'empty', 'Referer': 'https://movie.douban.com/typerank?type_name=%E5%8A%A8%E4%BD%9C&type=5&interval_id=100:90', 'Accept-Language': 'zh-CN,zh;q=0.9', } params = { 'type': '5', 'interval_id': '100:90', 'action': 'None', 'start': start, 'limit': '20', } return headers,params,cookies # for i in range(0,20,20): for i in range(0,360,20): headers,params,cookies=ua(i) response = requests.get('https://movie.douban.com/j/chart/top_list', headers=headers, params=params, cookies=cookies) result=response.json() list.append(result) list =str(list) filename = "douban.html" with open(filename, "w+",encoding='utf-8') as fp: fp.write(list) print("完成")