09 Requests

pip3 install requests

验证完成安装 命令行下 import requests 无报错信息即安装成功

一般常用的测试网页为 http://httpbin.org/get

import requests
response = requests.get('http://www.baidu.com')
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text)   #网页信息  
print(response.cookies)

  

get请求

import requests
response = requests.get('http://httpbin.org/get') #测试网页
print(response.text)

  

 

带参数的get请求

#通过params参数构建url地址
#params前面是逗号 import requests data = { 'name':'liu', 'age':22 } response = requests.get('http://httpbin.org/get',params=data) print(response.text) 打印结果 { "args": { "age": "22", "name": "liu" }, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Connection": "close", "Host": "httpbin.org", "User-Agent": "python-requests/2.20.0" }, "origin": "210.77.180.38", "url": "http://httpbin.org/get?name=liu&age=22" }

  

解析json

import requests
import json
response = requests.get('http://httpbin.org/get')
print(response.text)
print(response.json())
print(json.loads(response.text)) #两次返回结果是一样的
print(type(response.json()))  
打印结果
{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.20.0"
  }, 
  "origin": "210.77.180.38", 
  "url": "http://httpbin.org/get"
}

{'headers': {'User-Agent': 'python-requests/2.20.0', 'Connection': 'close', 'Accept': '*/*', 'Host': 'httpbin.org', 'Accept-Encoding': 'gzip, deflate'}, 'origin': '210.77.180.38', 'args': {}, 'url': 'http://httpbin.org/get'}
{'headers': {'User-Agent': 'python-requests/2.20.0', 'Connection': 'close', 'Accept': '*/*', 'Host': 'httpbin.org', 'Accept-Encoding': 'gzip, deflate'}, 'origin': '210.77.180.38', 'args': {}, 'url': 'http://httpbin.org/get'}
<class 'dict'>

  

获取二进制数据

import requests
response = requests.get('http://github.com/favicon.ico')
print(type(response.text),type(response.content)) 
print(response.text)
print(response.content)
打印结果

<class 'str'> <class 'bytes'>
........
网页源码和图片的二进制字节

  

下载图片

import requests
response = requests.get('http://github.com/favicon.ico')
with open('favicon.ico','wb') as f:
          f.write(response.content) #content 获取二进制数据
          f.close()

运行后可在路径下找到下载的图片

   

#以访问知乎为例
import requests
response = requests.get('https://www.zhihu.com/explore')
print(response.text)
打印结果

<html>
<head><title>400 Bad Request</title></head>
<body bgcolor="white">
<center><h1>400 Bad Request</h1></center>
<hr><center>openresty</center>
</body>
</html>

 通过加headers来访问

import requests
headers = {
    'User-Agent':..................自行添加.....................
}
response = requests.get('https://www.zhihu.com/explore',headers=headers)
print(response.text)

  

基本post请求

import requests
data = {
    'name':'liu',
    'age':22
}
response = requests.post('http://httpbin.org/post',data = data)
print(response.text)
打印结果

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "age": "22", 
    "name": "liu"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "15", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.20.0"
  }, 
  "json": null, 
  "origin": "210.77.180.38", 
  "url": "http://httpbin.org/post"
}

  添加headers  与get方法一样

import requests
data = {
    'name':'liu',
    'age':22
}
headers = {'User-Agent':.............}
response = requests.post('http://httpbin.org/post',data = data,headers=headers)
print(response.json())

  

响应

response 属性

import requests
response = requests.get('http://www.baidu.com')
print(type(resopnse.status_code),response.status_code)
print(type(response.headers),response.headers)
print(type(response.cookies),response.cookies)
print(type(response.url),response.url)
print(type(response.history),response.history)
打印结果

<class 'int'> 200
<class 'requests.structures.CaseInsensitiveDict'> {'Content-Type': 'text/html', 'Cache-Control': 'private, no-cache, no-store, proxy-revalidate, no-transform', 'Transfer-Encoding': 'chunked', 'Server': 'bfe/1.0.8.18', 'Content-Encoding': 'gzip', 'Set-Cookie': 'BDORZ=27315; max-age=86400; domain=.baidu.com; path=/', 'Last-Modified': 'Mon, 23 Jan 2017 13:27:36 GMT', 'Date': 'Thu, 08 Nov 2018 07:18:47 GMT', 'Pragma': 'no-cache', 'Connection': 'Keep-Alive'}
<class 'requests.cookies.RequestsCookieJar'> <RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
<class 'str'> http://www.baidu.com/
<class 'list'> []

  

状态码判断

import requests
response = requests.get('http://www.baidu.com')
exit() if not response.status_code == requests.codes.ok else print('访问成功')
exit() if not response.status_code ==200 else print('访问成功') #可以直接用状态码200替换
打印结果
访问成功
访问成功

  

高级操作

文件上传

import requests
files = {'file':open('favicon.ico','rb')}
response = requests.post('http://httpbin.org/post',files = files)
print(response.text)

 

获取cookie

import requests
response = requests.get('http://www.baidu.com')
print(response.cookies)
for key,value in response.cookies.items():
    print(key + '='+ value)

返回结果

<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
BDORZ=27315

  

会话维持

模拟登陆

import requests
requests.get('http://httpbin.org/cookies/set/number/123456') #设置cookies
response = requests.get('http://httpbin.org/cookies')
print(response.text)
打印结果

{
  "cookies": {}
}

#运行结果cookies是个空 

 因为设置set cookies的浏览器和访问获取get的浏览器是两个相对独立的访问行为,所以获取的cookies为空。所以要通过requeset库中session函数实现

通过session对象在同一个浏览器中发起两次get请求来实现

 

import requests
s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456')
response = s.get('http://httpbin.org/cookies')
print(response.text)
打印结果

{
  "cookies": {
    "number": "123456"
  }
}

  

证书验证

import requests
response = requests.get('https://www.12306.cn')
print(response.status_code)

  

 

import requests
response = requests.get('https://www.12306.cn',verify=False) #verify设置为false 即取消证书认证默认为true
print(response.status_code)
打印结果
200
c:\users\elric\appdata\local\programs\python\python35-32\lib\site-packages\urllib3\connectionpool.py:847: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)
c:\users\elric\appdata\local\programs\python\python35-32\lib\site-packages\urllib3\connectionpool.py:847: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)

 因为会有警告信息 所以需要引入urllib3中的告警不可用设置 即可取消

import requests
from requests.packages import urllib3
urllib3.disable_warnings() #包中的告警不可用
response = requests.get('https://www.12306.cn',verify=False) #verify设置为false 即取消证书认证默认为true
print(response.status_code)

打印结果
200

  

添加本地证书信息

import requests
response = requests.get('https://www.12306.cn',cert =( '/path/server.crt','/path/key')
#将本地的证书路径添加进去后不再有告警

  

代理设置

import requests
proxies = {
    'http':'http;//代理地址',
    'https':'https://..代理地址'
}
response = requests.get('http://xxxxx.com',proxies=proxies)
print(response.status_code)

 

#有用户名和密码的情况
import requests
proxies = {
    'http';'http://user:password@代理地址',
    'https://..代理地址'
}
response = requests.get('http://xxxxx.com',proxies=proxies)
print(response.status_code)

  

#如果不是http或https代理 需要设置socks代理
import requests #pip3 install 'requests(socks)' proxies = { 'http':'socks5://代理地址', 'https://socks5://..代理地址' } response = requests.get('http://xxxxx.com',proxies=proxies) print(response.status_code)

  

  超时设置 #设置time out

import requests
response = requests.get('https://www.taobao.com',timeout = 1) #一秒内应答
print(response.status_code)

  

如果网站1秒内未响应,则会提示timeout。提示信息后会继续运行

异常处理

import requests
from requests.exceptions import ReadTimeout
try:
    response = requests.get('https://httpbin.org/get',timeout = 0.5)
    print(response.status_code)
except ReadTimeout:
    print('Timeout')
打印结果
Timeout

#通过try 来捕获异常信息

  

认证设置 遇到需要输入用户名密码的情况

import requests
from requests.auth import HTTPBasicAuth
r = requests.get('http://.123..23',auth = HTTPBasicAuth('user','123'))
print(r.status_code)

 

import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException
try:
    response = requests.get('http://httpbin.org/get',timeout = 0.6)
    print(response.status_code)
except ReadTimeout:
    print('Timeout')
except HTTPError:
    print('HTTPError')
except RequestException:
    print('Error')

  

 

posted @ 2018-11-08 16:10  犀利的攻城狮  阅读(104)  评论(0编辑  收藏  举报