09 Requests
pip3 install requests
验证完成安装 命令行下 import requests 无报错信息即安装成功
一般常用的测试网页为 http://httpbin.org/get
import requests response = requests.get('http://www.baidu.com') print(type(response)) print(response.status_code) print(type(response.text)) print(response.text) #网页信息 print(response.cookies)
get请求
import requests response = requests.get('http://httpbin.org/get') #测试网页 print(response.text)
带参数的get请求
#通过params参数构建url地址
#params前面是逗号 import requests data = { 'name':'liu', 'age':22 } response = requests.get('http://httpbin.org/get',params=data) print(response.text) 打印结果 { "args": { "age": "22", "name": "liu" }, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Connection": "close", "Host": "httpbin.org", "User-Agent": "python-requests/2.20.0" }, "origin": "210.77.180.38", "url": "http://httpbin.org/get?name=liu&age=22" }
解析json
import requests
import json
response = requests.get('http://httpbin.org/get')
print(response.text)
print(response.json())
print(json.loads(response.text)) #两次返回结果是一样的
print(type(response.json()))
打印结果
{
"args": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "close",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.20.0"
},
"origin": "210.77.180.38",
"url": "http://httpbin.org/get"
}
{'headers': {'User-Agent': 'python-requests/2.20.0', 'Connection': 'close', 'Accept': '*/*', 'Host': 'httpbin.org', 'Accept-Encoding': 'gzip, deflate'}, 'origin': '210.77.180.38', 'args': {}, 'url': 'http://httpbin.org/get'}
{'headers': {'User-Agent': 'python-requests/2.20.0', 'Connection': 'close', 'Accept': '*/*', 'Host': 'httpbin.org', 'Accept-Encoding': 'gzip, deflate'}, 'origin': '210.77.180.38', 'args': {}, 'url': 'http://httpbin.org/get'}
<class 'dict'>
获取二进制数据
import requests response = requests.get('http://github.com/favicon.ico') print(type(response.text),type(response.content)) print(response.text) print(response.content)
打印结果
下载图片
import requests response = requests.get('http://github.com/favicon.ico') with open('favicon.ico','wb') as f: f.write(response.content) #content 获取二进制数据 f.close() 运行后可在路径下找到下载的图片
#以访问知乎为例
import requests
response = requests.get('https://www.zhihu.com/explore')
print(response.text)
打印结果
<html>
<head><title>400 Bad Request</title></head>
<body bgcolor="white">
<center><h1>400 Bad Request</h1></center>
<hr><center>openresty</center>
</body>
</html>
通过加headers来访问
import requests headers = { 'User-Agent':..................自行添加..................... } response = requests.get('https://www.zhihu.com/explore',headers=headers) print(response.text)
基本post请求
import requests data = { 'name':'liu', 'age':22 } response = requests.post('http://httpbin.org/post',data = data) print(response.text) 打印结果 { "args": {}, "data": "", "files": {}, "form": { "age": "22", "name": "liu" }, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Connection": "close", "Content-Length": "15", "Content-Type": "application/x-www-form-urlencoded", "Host": "httpbin.org", "User-Agent": "python-requests/2.20.0" }, "json": null, "origin": "210.77.180.38", "url": "http://httpbin.org/post" }
添加headers 与get方法一样
import requests data = { 'name':'liu', 'age':22 } headers = {'User-Agent':.............} response = requests.post('http://httpbin.org/post',data = data,headers=headers) print(response.json())
响应
response 属性
import requests response = requests.get('http://www.baidu.com') print(type(resopnse.status_code),response.status_code) print(type(response.headers),response.headers) print(type(response.cookies),response.cookies) print(type(response.url),response.url) print(type(response.history),response.history) 打印结果 <class 'int'> 200 <class 'requests.structures.CaseInsensitiveDict'> {'Content-Type': 'text/html', 'Cache-Control': 'private, no-cache, no-store, proxy-revalidate, no-transform', 'Transfer-Encoding': 'chunked', 'Server': 'bfe/1.0.8.18', 'Content-Encoding': 'gzip', 'Set-Cookie': 'BDORZ=27315; max-age=86400; domain=.baidu.com; path=/', 'Last-Modified': 'Mon, 23 Jan 2017 13:27:36 GMT', 'Date': 'Thu, 08 Nov 2018 07:18:47 GMT', 'Pragma': 'no-cache', 'Connection': 'Keep-Alive'} <class 'requests.cookies.RequestsCookieJar'> <RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]> <class 'str'> http://www.baidu.com/ <class 'list'> []
状态码判断
import requests response = requests.get('http://www.baidu.com') exit() if not response.status_code == requests.codes.ok else print('访问成功') exit() if not response.status_code ==200 else print('访问成功') #可以直接用状态码200替换 打印结果 访问成功 访问成功
高级操作
文件上传
import requests files = {'file':open('favicon.ico','rb')} response = requests.post('http://httpbin.org/post',files = files) print(response.text)
获取cookie
import requests response = requests.get('http://www.baidu.com') print(response.cookies) for key,value in response.cookies.items(): print(key + '='+ value) 返回结果 <RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]> BDORZ=27315
会话维持
模拟登陆
import requests requests.get('http://httpbin.org/cookies/set/number/123456') #设置cookies response = requests.get('http://httpbin.org/cookies') print(response.text) 打印结果 { "cookies": {} } #运行结果cookies是个空
因为设置set cookies的浏览器和访问获取get的浏览器是两个相对独立的访问行为,所以获取的cookies为空。所以要通过requeset库中session函数实现
通过session对象在同一个浏览器中发起两次get请求来实现
import requests s = requests.Session() s.get('http://httpbin.org/cookies/set/number/123456') response = s.get('http://httpbin.org/cookies') print(response.text) 打印结果 { "cookies": { "number": "123456" } }
证书验证
import requests response = requests.get('https://www.12306.cn') print(response.status_code)
import requests response = requests.get('https://www.12306.cn',verify=False) #verify设置为false 即取消证书认证默认为true print(response.status_code) 打印结果 200 c:\users\elric\appdata\local\programs\python\python35-32\lib\site-packages\urllib3\connectionpool.py:847: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings InsecureRequestWarning) c:\users\elric\appdata\local\programs\python\python35-32\lib\site-packages\urllib3\connectionpool.py:847: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings InsecureRequestWarning)
因为会有警告信息 所以需要引入urllib3中的告警不可用设置 即可取消
import requests from requests.packages import urllib3 urllib3.disable_warnings() #包中的告警不可用 response = requests.get('https://www.12306.cn',verify=False) #verify设置为false 即取消证书认证默认为true print(response.status_code) 打印结果 200
添加本地证书信息
import requests response = requests.get('https://www.12306.cn',cert =( '/path/server.crt','/path/key') #将本地的证书路径添加进去后不再有告警
代理设置
import requests proxies = { 'http':'http;//代理地址', 'https':'https://..代理地址' } response = requests.get('http://xxxxx.com',proxies=proxies) print(response.status_code)
#有用户名和密码的情况 import requests proxies = { 'http';'http://user:password@代理地址', 'https://..代理地址' } response = requests.get('http://xxxxx.com',proxies=proxies) print(response.status_code)
#如果不是http或https代理 需要设置socks代理
import requests #pip3 install 'requests(socks)' proxies = { 'http':'socks5://代理地址', 'https://socks5://..代理地址' } response = requests.get('http://xxxxx.com',proxies=proxies) print(response.status_code)
超时设置 #设置time out
import requests response = requests.get('https://www.taobao.com',timeout = 1) #一秒内应答 print(response.status_code)
如果网站1秒内未响应,则会提示timeout。提示信息后会继续运行
异常处理
import requests from requests.exceptions import ReadTimeout try: response = requests.get('https://httpbin.org/get',timeout = 0.5) print(response.status_code) except ReadTimeout: print('Timeout') 打印结果 Timeout #通过try 来捕获异常信息
认证设置 遇到需要输入用户名密码的情况
import requests from requests.auth import HTTPBasicAuth r = requests.get('http://.123..23',auth = HTTPBasicAuth('user','123')) print(r.status_code)
import requests from requests.exceptions import ReadTimeout,HTTPError,RequestException try: response = requests.get('http://httpbin.org/get',timeout = 0.6) print(response.status_code) except ReadTimeout: print('Timeout') except HTTPError: print('HTTPError') except RequestException: print('Error')