爬取豆瓣Top250_Ajax动态页面
爬取网址:
完整代码:
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
import sys from urllib import request, parse import ssl ssl._create_default_https_context = ssl._create_unverified_context url = "https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action" headers = { "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)" } data = { "start": "20", "limit": "20" } # 注意: POST data should be bytes or an iterable of bytes # 解决办法: data = urllib.parse.urlencode(values).encode(encoding='UTF8') data = parse.urlencode(data).encode(encoding='UTF8') # 获取爬出内容的编码类型 type = sys.getfilesystemencoding() req = request.Request(url, data=data, headers=headers) # 将获取到的页面进行解码, 以解决中文十六进制编码的问题 response = request.urlopen(req).read().decode(type) with open("doubantop250.json", "w", encoding="utf-8") as f: f.write(response) print("ok")
参考文章: