豆瓣电影爬虫
import requests class DoubanSpider(object): def __init__(self): self.url='https://movie.douban.com/j/chart/top_list?' self.headers = { "Accept": '*/*', "Accept-Encoding": 'gzip, deflate, br', "Accept-Language": 'zh-CN,zh;q=0.9', "Connection": 'keep-alive', "Cookie": 'bid=8-nCfhrghnU; __yadk_uid=YRJL4YjOgUBe7yEo3opPsDvJqnarI2oM; trc_cookie_storage=taboola%2520global%253Auser-id%3D8fc3589f-2abb-45b3-b21f-dabdd8ad9733-tuct3e78214; ll="108309"; ap_v=0,6.0; __utma=30149280.2076664567.1559562682.1559562682.1563870087.2; __utmb=30149280.0.10.1563870087; __utmc=30149280; __utmz=30149280.1563870087.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utma=223695111.1070403850.1559562682.1559562682.1563870087.2; __utmb=223695111.0.10.1563870087; __utmc=223695111; __utmz=223695111.1563870087.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1563870089%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D12rRCXsSrd1oLSL7dm_tBhgHaysfIxjCzkJqy6_F4kEL-HNdTUAh3Z6A-WLXShEt%26wd%3D%26eqid%3Dae785c100002a33c000000025d36c37d%22%5D; _pk_ses.100001.4cf6=*; _pk_id.100001.4cf6=1ee68b08630bb7f8.1559562681.2.1563872988.1559562681.', "Host": 'movie.douban.com', "Referer": 'https://movie.douban.com/typerank?type_name=%E7%88%B1%E6%83%85&type=13&interval_id=100:90&action=', "User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36', "X-Requested-With": 'XMLHttpRequest', } #请求+解析 def get_film_info(self,params): html_json=requests.get( url=self.url, headers=self.headers, params=params ).json() for film in html_json: #名称 name=film['title'] #评分 score=film['score'] print(name,score) def main(self): tp = input('请输入类型') num = input('请输入电影数量') params={ 'type': str(tp), 'interval_id': '100:90', 'action':'', 'start': '0', 'limit':str(num), } self.get_film_info(params) if __name__ == '__main__': spider=DoubanSpider() spider.main()