requests模块

requests

get请求(参数带有中文)

#第一种方式:使用urlencode
import requests
from urllib.parse import urlencode
wd='egon老师'
encode_res=urlencode({'k':wd},encoding='utf-8')#中文会被编码,英文不变
print(encode_res)#k=egon%E8%80%81%E5%B8%88
keyword=encode_res.split('=')[1]
print(keyword)#egon%E8%80%81%E5%B8%88
url='https://www.baidu.com/s?wd=%s&pn=1' %keyword

response=requests.get(url,
                      headers={
                        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
                      })


#第二种方式:params
import requests
wd='egon老师'
url='https://www.baidu.com/s
response=requests.get(url,
                      headers={
                        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
                      },
                      params={"wd":wd})

get请求>>>headers

#通常我们在发送请求时都需要带上请求头,请求头是将自身伪装成浏览器的关键,常见的有用的请求头如下
Host
Referer #大型网站通常都会根据该参数判断请求的来源
User-Agent #客户端
Cookie #Cookie信息虽然包含在请求头里,但requests模块有单独的参数来处理他,headers={}内就不要放它了

补充

#stream参数:一点一点的取,比如下载视频时,如果视频100G,用response.content然后一下子写到文件中是不合理的
import requests

response=requests.get('https://gss3.baidu.com/6LZ0ej3k1Qd3ote6lo7D0j9wehsv/tieba-smallvideo-transcode/1767502_56ec685f9c7ec542eeaf6eac93a65dc7_6fe25cd1347c_3.mp4',
                      stream=True)

with open('b.mp4','wb') as f:
    			#iter_content迭代
    for line in response.iter_content():
        f.write(line)

post请求

post请求,登陆
登陆行为需要带cookie,token

登陆github
#网站分析(用户名正确,密码输错)
#也就是分析出登陆所需的路由,请求头,请求体
 浏览器输入https://github.com/login
 然后输入错误的账号密码,抓包
发现登录行为是post提交到:https://github.com/session
import requests,re
url = "https://github.com/login"
# cookie = response.cookies.get_dict()
#session模块自动保存和发送cookie
session =requests.session()
response = session.get(url)
token = re.search('authenticity_token" value="(.*?)"',response.text).group(1)

User_Agent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
login_url = "https://github.com/session"
resp = session.post(login_url,
                    headers={
                        "User-Agent":User_Agent,
                        "Referer":"https://github.com/login"
                    },
                    data={
                        "commit": "Sign in",
                        "utf8":"✓",
                        "authenticity_token": token,
                        "login": "oldboyedujerry",
                        "password": "123654asdAsd",
                        "webauthn-support": "supported"
                    })
                    # cookies=cookie)
with open('login.html',"wb")as f:
    f.write(resp.content)
print(resp.status_code)

requests使用流程

1.确认url,url请求类型
2.get请求
    url,
    params={"wd":wd},
    headers={
              "User-Agent":User_Agent,
              "Referer":"https://github.com/login"
                    },
  post请求:
    url:"",
    headers={"User-Agent":User_Agent,
              "Referer":"https://github.com/login"}
    data={
        "commit": "Sign in",
        "utf8":"✓",
        "authenticity_token": token,
        "login": "oldboyedujerry",
        "password": "123654asdAsd",
        "webauthn-support": "supported"
        }
 3.import requests
	使用session可以自动处理cookie
	session = requests.session()
4.解析响应
posted @ 2019-04-15 14:00  robertzhou  阅读(147)  评论(0编辑  收藏  举报