爬虫第三篇:requests模块
requests模块其实就是对urllib.request模块的进步一不优化,提供了很多可选的参数,同时简化了操作。下面我还是贴上具体操作的代码。
requests GET请求
GET请求html文件
import requests baseurl = "http://www.baidu.com/s?" headers = {"User-Agent":"Mozilla/5.0"} key = "关键字" pn = 1 pn = (int(pn) - 1) * 10 params = { "wd" : key, "pn" : pn, } res = requests.get(baseurl, params=params, headers=headers) res.encoding = "utf-8" html = res.text print(html)
# res.status_code # HTTP响应码
# res.url # url地址
GET请求二进制文件
import requests url = "https://ss0.bdstatic.com/70cFvHSh_Q1YnxGkpoWK1HF6hhy/it/u=2914738751,1449131471&fm=26&gp=0.jpg" headers = {"User-Agent":"Mozilla/5.0"} # 三步走,发请求,指编码,获内容 res = requests.get(url,headers=headers) res.encoding = "utf-8" html = res.content # 以 wb 的方式写入本地文件 with open("./mmm.jpg","wb") as f: f.write(html)
requests POST 请求
import requests import json key = 'this is a demo'data = { "i":key, "from":"AUTO", "to":"AUTO", "smartresult":"dict", "client":"fanyideskweb", "salt":"15458120942800", "sign":"108feafc7c01c7461a41034463a8df9b", "ts":"1545812094280", "bv":"363eb5a1de8cfbadd0cd78bd6bd43bee", "doctype":"json", "version":"2.1", "keyfrom":"fanyi.web", "action":"FY_BY_REALTIME", "typoResult":"false" } url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule" headers = {"User-Agent":"Mozilla/5.0"} res = requests.post(url,data=data,headers=headers) res.encoding = "utf-8" html = res.text # 把json格式的字符串转为python中字典 rDict = json.loads(html) result = rDict["translateResult"][0][0]["tgt"]