requests的基本使用
1. 简单获取一个页面信息
1 import requests 2 3 # requests可以通过requests的get、post、put等方法进行请求调用 4 r = requests.get('https://www.baidu.com/') 5 print(r) 6 print(type(r)) 7 print(r.status_code) 8 print(type(r.text)) 9 print(r.text) 10 print(r.cookies)
2. 通过params参数,get请求进行url字符串拼接
1 import requests 2 3 data = { 4 'name':'dmr', 5 'age':25, 6 } 7 # 将data传给参数params,会把url拼接成?key1=value1&key2=value2的形式 8 r = requests.get(url='http://httpbin.org/get', params=data) 9 print(r.text, r.url) 10 # r.text返回的是一个字典类型的json格式,通过json方法解析可以获取到一个字典 11 print(type(r.text)) 12 print(r.json()) 13 print(type(r.json()))
3. 抓取二进制数据(如图片等)
1 import requests 2 3 r = requests.get('https://dss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo_top-e3b63a0b1b.png') 4 # r.text字符串格式;r.content字节格式 5 print(r.text, r.content) 6 print(type(r.text), type(r.content)) 7 # 保存为文件格式 8 with open('baidu.png','wb') as f: 9 f.write(r.content)
4. 添加headers请求头的User-agent进行页面访问
1 import requests 2 3 r = requests.get('https://www.zhihu.com/') 4 print(r.text) 5 headers = { 6 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' 7 } 8 r2 = requests.get('https://www.zhihu.com/', headers=headers) 9 print(r2.text)
5. post请求,data字典内容,提交数据到表单
1 import requests 2 3 data = { 4 'name':'dmr', 5 'age':25, 6 } 7 r = requests.post('http://httpbin.org/post', data=data) 8 print(r.text) 9 10 11 print(dir(r)) 12 print(dir(requests.codes)) 13 print(requests.codes.ok)
6. 扩展用法
1 # 文件上传 2 import requests 3 4 files = {'file':open('baidu.png', 'rb')} 5 r = requests.post('http://httpbin.org/post', files=files) 6 print(r.text) 7 8 # Cookies 9 import requests 10 11 r = requests.get('https://www.baidu.com/') 12 print(r.cookies, r.cookies.items()) 13 for key, value in r.cookies.items(): 14 print(key + '=' + value) 15 16 17 # 设置headers的Cookie值来保持登录状态 18 import requests 19 20 headers = { 21 'Cookie':'JSNID=2D7B55dfsdfsdfECE6E47C6AD; signature=MjEyMzJmMjk3YdsfsdfNzQzODk0YTBlNGE4MDFmYzMxNTgzOTE4MTk4NjMx3cf711642ab38b713e3e9a54f19fad0e; timeStamp=da23b847-43c0-46ab-ba78-9e8ca41a11ea; redisKey=5f948ef7-ae9a-4d67-a7f8-4211baafe92f', 22 'User-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' 23 } 24 r = requests.get('http://ceshi/public/html/index.html', headers=headers) 25 print(r.text) 26 27 28 # 通过cookies参数key和value来保持登录状态 29 import requests 30 31 cookies = 'JSEID=2D7B5505568DCB699685BECE6E47C6AD; signare=MjEyMzJmMjk3YTDk0YTBlNGE4MDFmYzMxNTgzOTE4MTk4NjMx3cf711642ab38b713e3e9a54f19fad0e; timeStamp=da23b847-43c0-46ab-ba78-9e8ca41a11ea; redisKey=5f948ef7-ae9a-4d67-a7f8-4211baafe92f' 32 headers = { 33 'User-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' 34 } 35 jar = requests.cookies.RequestsCookieJar() 36 for cookie in cookies.split(';'): 37 key, value = cookie.strip().split('=') 38 print(key, value) 39 jar.set(key, value) 40 r = requests.get('http://ceshi/public/html/index.html',headers=headers,cookies=jar) 41 print(r.text) 42 43 44 ## 保持会话,Session 45 import requests 46 47 # 给测试页面设置值 48 r = requests.get('http://httpbin.org/cookies/set/dmr/25') 49 # 独立进行第二次访问页面时,返回的页面内容为空 50 r2 = requests.get('http://httpbin.org/cookies') 51 print(r.text, r2.text) 52 53 # 通过session保持会话进行二次访问 54 s = requests.Session() 55 # 给测试页面设置值 56 r = s.get('http://httpbin.org/cookies/set/dmr/25') 57 # 通过session保持会话进行二次访问时,设置的值还存在 58 r2 = s.get('http://httpbin.org/cookies') 59 print(r.text, r2.text) 60 61 62 ## SSL证书验证,verify是否检查证书验证,默认为true 63 import requests 64 65 r = requests.get('https://www.12306.cn') 66 r2 = requests.get('https://www.12306.cn', verify=False) 67 r3 = requests.get('https://www.12306.cn', cert=('path/cert.crt', '/path/key')) 68 69 print(r.text) 70 71 72 ## proxy,代理设置 73 import requests 74 75 # 无效的代理 76 proxies = { 77 'http':'http://10.0.0.100:3128', 78 'https':'http://10.0.0.100:3168', 79 } 80 requests.get('https://www.taobao.com', proxies=proxies) 81 82 83 # 超时设置 84 import requests 85 86 r = requests.get('http://www.python.org', timeout=0.5) 87 print(r.status_code) 88 89 90 ## 弹窗身份认证 91 import requests 92 from requests.auth import HTTPBasicAuth 93 94 auth = HTTPBasicAuth('username', 'password') 95 r = requests.get('http://localhost:5000', auth=auth) 96 # 可简写,requests默认使用HTTPBasicAuth来进行认证 97 r2 = requests.get('http://localhost:5000', auth=('username', 'password')) 98 print(r.status_code, r2.status_code)
7. requests将参数内容封装成数据对象来请求访问页面
1 from requests import Request, Session 2 3 url = 'https://www.baidu.com' 4 data = { 5 'name': 'dmr', 6 'age': 25 7 } 8 headers = { 9 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' 10 } 11 # 构建Request请求对象 12 req = Request(method='POST', url=url, data=data, headers=headers) 13 # 生成会话 14 s = Session() 15 # 构建prepared数据结构请求对象 16 prepared = s.prepare_request(req) 17 # 发送请求 18 r = s.send(prepared) 19 print(r.text, r.status_code)
静静的学习一阵子儿...