day5-requests的post方法
1.初步接触post
1.requests的POST请求
''' # 1.访问login页面获取token信息 Request URL: https://github.com/login Request Method: GET #服务端告诉客户端需要设置的Cookies 响应头(response headers): Set-Cookies 请求头(request headers): Cookie User-Agent ''' headers ={'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36' } import requests import re url1='https://github.com/login' response = requests.get(url1,headers=headers) #把login页返回的cookie信息转换为字典 login_cookies =response.cookies.get_dict() #print(login_cookies) """ 正则:<input type="hidden" name="authenticity_token" value="uL0puwy/WXwQGy8lfuaAGUPAKd21SSXvVpDkTL1knnqB/quwOgpTNqAxWTK46vQQ1V/l+vbiJPJfLme/sE4IQg=="> <input type="hidden" name="authenticity_token" value="(.*?)"> """ token = re.findall('<input type="hidden" name="authenticity_token" value="(.*?)">',response.text,re.S) print(token[0]) #token = re.findall('<input type="hidden" name="authenticity_token" value="(.*?)" />', response.text, re.S) # print(token[0]) """ 2.往session页面发送post请求 (sesson) 的 network Request URL: https://github.com/session Request Method: POST 请求头: Cookie: _ga=GA1.2.2136829516.1560742337; _octo=GH1.1.872698775.1560742337; tz=Asia%2FShanghai; has_recent_activity=1; _device_id=f5631dc46d2624aa16f4da6fd575faec; logged_in=no; _gh_sess=VTIvdGluSUhpS0hCdmNPUWc1aTkwWVIwUVpmbkNxdGtRbGJWVk5KZ0twZEJrVDBpYTJMc0NRd2Q4UjEvNHRrK0RzVnBhQXNoaGRxUS9rRnIrTThLSGVqWGU1S0s1OVRNSnYzYkkycU9GNGt5SXdVSFh4UDhpOTRMNHA5TmRsK1dUUGloVjF5c3E0L2lzSndqU2FEYmhVOHV0ckZUMXVmTUV2cWFDa1d1NDA1MGNraWMxTkxNb1V5ZVpUNW53Qk9ZVmJXWVVIbG1mN3UzRlVKdU9lVDBiTzQzL2tMUS9DeGZyWUZIOVQva0ROd0RpNmhJbTl2VlhxTmJvREIrdmsraDREZ1FqejFRenNyYkRISldkczRvUTd1cjcwUlZQOXYxSEpDNEkwWGJqTEhocit6ZzErUW9zUC9zc2F4aEQ0OTdCc0E5K0FYV24wdnhxN001ZHR0NThTdmtyNlE3QUVualpMQ0wvNlJ1YkQ0UnlrNTcxWjhDRU1VWlN5L0NsV0YwcWFxQTBIekVlQ25Ba2dCSStXNkV1bWFybUZzQ3VmOTRIQWZuK0ZIUTBsaEtmMk1mMHhFVFJ5NEI3R0UvbHc3cHN6Z0M3cmwvTC9qVVpCS2I4TUk1Z08zRVk2TG1LcTVSUzcvQXozSTJwWG89LS11bjhId3hVcDhmdVFBaWQxUkVPUUNBPT0%3D--c73c325ede2287adaf39d672c39b4135bc467328 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36 Form Data: commit: Sign in utf8: ✓ authenticity_token: uL0puwy/WXwQGy8lfuaAGUPAKd21SSXvVpDkTL1knnqB/quwOgpTNqAxWTK46vQQ1V/l+vbiJPJfLme/sE4IQg== login: evan0925 password: bombardfox#0131 webauthn-support: supported """ url2 = 'https://github.com/session' # 拼接请求体信息 form_data = { 'commit':'Sign in', 'utf8': '✓', 'authenticity_token': 'uL0puwy/WXwQGy8lfuaAGUPAKd21SSXvVpDkTL1knnqB/quwOgpTNqAxWTK46vQQ1V/l+vbiJPJfLme/sE4IQg==', 'login': '********', 'password': '***************', 'webauthn-support': 'supported' } """ Referer: https://github.com/login User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36 """ headers1 = { 'Referer': 'https://github.com/login', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36' } # 携带请求头、请求体、login页的cookies信息 response2 = requests.post(url2,data= form_data,headers=headers1,cookies=login_cookies) # print(response2.status_code) # print(response2.text) with open('github.html','w',encoding='utf-8') as f : f.write(response2.text)
说明:
''' post请求登录github Request URL: https://github.com/session Request Method: POST #Referer表示上一个请求的页面 Referer: https://github.com/login User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36 请求体: 只有POST请求才有 commit: Sign in utf8: ✓ authenticity_token: COh/MDoDDUVauDtPbZ2A6pjf4pEA4pV8jwRO8PjHPzbXiLJiwtCXRQ7Ik3kXWxJOOF+i5/1r9twxUqaUnXe5TA== login: HS1 password: *********** webauthn-support: unsupported '''
2.response响应
import requests response = requests.get('https://www.baidu.com') print(response.status_code) #获取响应状态吗 print(response.url) #获取url地址 print(response.text) #获取文本 print(response.content) #获取二进制流 print(response.headers)#获取页面请求头信息 print(response.history) #获取上一次的请求地址 print(response.cookies)#返回cookie对象 print(response.cookies.get_dict()) #获取cookies信息转换成字典 print(response.cookies.items) #同上 print(response.encoding) #字符编码 print(response.elapsed)#访问时间 #一点一点写入二进制流 import requests #一点一点写入二进制流 import requests # 往音频地址发送get请求 url = 'https://vd3.bdstatic.com/mda-ic4pfhh3ex32svqi/hd/mda-ic4pfhh3ex32svqi.mp4?auth_key=1557973824-0-0-bfb2e69bb5198ff65e18065d91b2b8c8&bcevod_channel=searchbox_feed&pd=wisenatural&abtest=all.mp4' response = requests.get(url, stream=True) # stream=True 把content设置为一个迭代器对象 print(response.content) with open('love_for_GD.mp4', 'wb') as f: for content in response.iter_content(): f.write(content)
3.requests高级应用(了解)
''' 证书验证(大部分网站都是https) ''' import requests # https = http + ssl response = requests.get('https://www.xiaohuar.com') print(response.status_code) # 改进1:去掉报错,但是会报警告 import requests response = requests.get('https://www.xiaohuar.com', verify=False) # 不验证证书,报警告,返回200 print(response.status_code) # 改进2:去掉报错,并且去掉警报信息 import requests import urllib3 urllib3.disable_warnings() # 关闭警告 response = requests.get('https://www.xiaohuar.com', verify=False) print(response.status_code) # 改进3:加上证书(伪代码) # 很多网站都是https,但是不用证书也可以访问,大多数情况都是可以携带也可以不携带证书 # 知乎\百度等都是可带可不带 # 有硬性要求的,则必须带,比如对于定向的用户,拿到证书后才有权限访问某个特定网站 import requests response = requests.get( 'https://www.xiaohuar.com', cert=('/path/server.crt', '/path/key')) print(response.status_code) ''' 超时设置 # 两种超时:float or tuple # timeout=0.1 # 代表接收数据的超时时间 # timeout=(0.1,0.2) # 0.1代表链接超时 0.2代表接收数据的超时时间 ''' import requests response = requests.get('https://www.baidu.com', timeout=0.0001) print(response.status_code) ''' 使用代理(重要指数*******) # 代理设置:先发送请求给代理,然后由代理帮忙发送(封ip是常见的事情) # 西刺代理 ''' import requests proxies={ # 带用户名密码的代理,@符号前是用户名与密码 'http': 'http://tank:123@localhost:9527', 'http': 'http://localhost:9527', 'https': 'https://localhost:9527', } response = requests.get('https://www.12306.cn', proxies=proxies) print(response.status_code) # 支持socks代理,安装:pip install requests[socks] import requests proxies = { 'http': 'socks5://user:pass@host:port', 'https': 'socks5://user:pass@host:port' } respone = requests.get('https://www.12306.cn', proxies=proxies) print(respone.status_code) ''' # 认证设置 登录网站时,会弹出一个框,要求你输入用户名与密码(类似于alert),此时无法进入html页面,待授权通过后才能进入html页面。 Requests模块为我们提供了多种身份认证方式,包括基本身份认证等... 其原理指的是通过输入用户名与密码获取用户的凭证来识别用户,然后通过token对用户进行授权。 基本身份认证: HTTP Basic Auth是HTTP1.0提出的认证方式。客户端对于每一个realm,通过提供用户名和密码来进行认证的方式当认证失败时,服务器收到客户端请求,返回401。 ''' import requests # 通过访问github的api来测试 url = 'https://api.github.com/user' HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', } # 测试1,失败返回401 response = requests.get(url, headers=HEADERS) print(response.status_code) # 401 print(response.text) # 测试2,通过requests.auth内的HTTPBasicAuth进行认证,认证成功返回用户信息 from requests.auth import HTTPBasicAuth response = requests.get(url, headers=HEADERS, auth=HTTPBasicAuth('uesrname', 'pwd')) print(response.text) # 测试3,通过requests.get请求内的auth参数默认就是HTTPBasicAuth,认证成功返回用户信息 response = requests.get(url, headers=HEADERS, auth=('*******', '**********')) print(response.text) ''' 上传文件 ''' import requests # 上传文本文件 files1 = {'file': open('user.txt', 'rb')} response = requests.post('http://httpbin.org/post', files=files1) print(response.status_code) # 200 print(response.text) # 200 # 上传图片文件 files2 = {'jpg': open('小狗.jpg', 'rb')} response = requests.post('http://httpbin.org/post', files=files2) print(response.status_code) # 200 print(response.text) # 200 # 上传视频文件 files3 = {'movie': open('love_for_GD.mp4', 'rb')} response = requests.post('http://httpbin.org/post', files=files3) print(response.status_code) # 200 print(response.text) # 200