requests模块
requests
get请求(参数带有中文)
#第一种方式:使用urlencode
import requests
from urllib.parse import urlencode
wd='egon老师'
encode_res=urlencode({'k':wd},encoding='utf-8')#中文会被编码,英文不变
print(encode_res)#k=egon%E8%80%81%E5%B8%88
keyword=encode_res.split('=')[1]
print(keyword)#egon%E8%80%81%E5%B8%88
url='https://www.baidu.com/s?wd=%s&pn=1' %keyword
response=requests.get(url,
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
})
#第二种方式:params
import requests
wd='egon老师'
url='https://www.baidu.com/s
response=requests.get(url,
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
},
params={"wd":wd})
get请求>>>headers
#通常我们在发送请求时都需要带上请求头,请求头是将自身伪装成浏览器的关键,常见的有用的请求头如下
Host
Referer #大型网站通常都会根据该参数判断请求的来源
User-Agent #客户端
Cookie #Cookie信息虽然包含在请求头里,但requests模块有单独的参数来处理他,headers={}内就不要放它了
补充
#stream参数:一点一点的取,比如下载视频时,如果视频100G,用response.content然后一下子写到文件中是不合理的
import requests
response=requests.get('https://gss3.baidu.com/6LZ0ej3k1Qd3ote6lo7D0j9wehsv/tieba-smallvideo-transcode/1767502_56ec685f9c7ec542eeaf6eac93a65dc7_6fe25cd1347c_3.mp4',
stream=True)
with open('b.mp4','wb') as f:
#iter_content迭代
for line in response.iter_content():
f.write(line)
post请求
post请求,登陆
登陆行为需要带cookie,token
登陆github
#网站分析(用户名正确,密码输错)
#也就是分析出登陆所需的路由,请求头,请求体
浏览器输入https://github.com/login
然后输入错误的账号密码,抓包
发现登录行为是post提交到:https://github.com/session
import requests,re
url = "https://github.com/login"
# cookie = response.cookies.get_dict()
#session模块自动保存和发送cookie
session =requests.session()
response = session.get(url)
token = re.search('authenticity_token" value="(.*?)"',response.text).group(1)
User_Agent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
login_url = "https://github.com/session"
resp = session.post(login_url,
headers={
"User-Agent":User_Agent,
"Referer":"https://github.com/login"
},
data={
"commit": "Sign in",
"utf8":"✓",
"authenticity_token": token,
"login": "oldboyedujerry",
"password": "123654asdAsd",
"webauthn-support": "supported"
})
# cookies=cookie)
with open('login.html',"wb")as f:
f.write(resp.content)
print(resp.status_code)
requests使用流程
1.确认url,url请求类型
2.get请求
url,
params={"wd":wd},
headers={
"User-Agent":User_Agent,
"Referer":"https://github.com/login"
},
post请求:
url:"",
headers={"User-Agent":User_Agent,
"Referer":"https://github.com/login"}
data={
"commit": "Sign in",
"utf8":"✓",
"authenticity_token": token,
"login": "oldboyedujerry",
"password": "123654asdAsd",
"webauthn-support": "supported"
}
3.import requests
使用session可以自动处理cookie
session = requests.session()
4.解析响应