request高级之模拟登录(动态参数)

import requests
from tujian import getImgCodeText
from lxml import etree

url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36',
}

#获取图片验证码链接
response = requests.get(url = url,headers = headers)
page_text = response.text
tree = etree.HTML(page_text)
img_src = 'https://so.gushiwen.cn/' + tree.xpath('//*[@id="imgCode"]/@src')[0]
# print(img_src)

#请求图片链接,获取图片数据并且保存至本地
img_data = requests.get(url = img_src,headers = headers).content
with open('./code.jpg','wb') as fp:
    fp.write(img_data)

#通过封装好的图鉴脚本,识别图片验证码的内容
text = getImgCodeText('./code.jpg',3)
print(text)

#模拟登录
data = {
    "__VIEWSTATE":"jh41JY2kcj85jr4D5GhCBAe6LwDH3mN6TLnVyqtbeMIvOrtAz4TGyn68Vjvy4HUWsA13Lb37CtSaY7lFv6NZRSqEilsHNTFvCqKAU5LxI1BJCq2h6UgMAkGjGKM=",
    "__VIEWSTATEGENERATOR":"C93BE1AE",
    "from":"http://so.gushiwen.cn/user/collect.aspx",
    "email": "古诗文网账号",
    "pwd": "古诗文网密码",
    "code": text,
    "denglu": "登录"
}

#获取登录成功后的首页数据
res = requests.post(url = url,headers = headers,data = data)
# res.encoding = 'gbk'
page_login = res.text
with open('./gushiwen.html','w',encoding='utf8') as fp:
    fp.write(page_login)
  • 查看gushiwen.html发现,没有登录成功,提示验证码错误
    • ***分析原因:
      • 图形验证错误(否,我们打开code图片与解析出来的text数据一致)
      • 没有携带cookie(否,使用session对象,第一次get请求获取图片验证码,第二次发起登录请求,还是报一样的错误)
      • 出现动态变化的请求参数
        • 分析data中的参数,有两个参数不太了解
          • __VIEWSTATE,__VIEWSTATEGENERATOR
          • 通过多次登录,我们分析每次登录__VIEWSTATE参数都是不一样的
    • 如何获取__VIEWSTATE参数呢?
      • 基于抓包工具进行全局搜索,发现该参数值被隐藏在了登录页面的页面源码中 ***
  • 再获取图形验证码图片的时候,同时提取__VIEWSTATE参数,给登录接口使用即可
import requests
from tujian import getImgCodeText
from lxml import etree

url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36',
}

#创建session对象
session = requests.Session()

#获取图片验证码链接
response = session.get(url = url,headers = headers)
page_text = response.text
tree = etree.HTML(page_text)
img_src = 'https://so.gushiwen.cn/' + tree.xpath('//*[@id="imgCode"]/@src')[0]
# print(img_src)

#获取__VIEWSTATE参数
__VIEWSTATE = tree.xpath('//*[@id="__VIEWSTATE"]/@value')[0]

#请求图片链接,获取图片数据并且保存至本地
img_data = session.get(url = img_src,headers = headers).content
with open('./code.jpg','wb') as fp:
    fp.write(img_data)

#通过封装好的图鉴脚本,识别图片验证码的内容
text = getImgCodeText('./code.jpg',3)
print(text)

#模拟登录
data = {
    "__VIEWSTATE":__VIEWSTATE,
    "__VIEWSTATEGENERATOR":"C93BE1AE",
    "from":"http://so.gushiwen.cn/user/collect.aspx",
    "email": "古诗文网账号",
    "pwd": "古诗文网密码",
    "code": text,
    "denglu": "登录"
}

#获取登录成功后的首页数据
res = session.post(url = url,headers = headers,data = data)
# res.encoding = 'gbk'
page_login = res.text
with open('./gushiwen.html','w',encoding='utf8') as fp:
    fp.write(page_login)
posted @ 2022-03-21 23:58  Tony_xiao  阅读(198)  评论(0编辑  收藏  举报