- 模拟登录古诗文网
- 通过浏览器抓包,我们分析登录接口
import requests
from tujian import getImgCodeText
from lxml import etree
url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36',
}
#获取图片验证码链接
response = requests.get(url = url,headers = headers)
page_text = response.text
tree = etree.HTML(page_text)
img_src = 'https://so.gushiwen.cn/' + tree.xpath('//*[@id="imgCode"]/@src')[0]
# print(img_src)
#请求图片链接,获取图片数据并且保存至本地
img_data = requests.get(url = img_src,headers = headers).content
with open('./code.jpg','wb') as fp:
fp.write(img_data)
#通过封装好的图鉴脚本,识别图片验证码的内容
text = getImgCodeText('./code.jpg',3)
print(text)
#模拟登录
data = {
"__VIEWSTATE":"jh41JY2kcj85jr4D5GhCBAe6LwDH3mN6TLnVyqtbeMIvOrtAz4TGyn68Vjvy4HUWsA13Lb37CtSaY7lFv6NZRSqEilsHNTFvCqKAU5LxI1BJCq2h6UgMAkGjGKM=",
"__VIEWSTATEGENERATOR":"C93BE1AE",
"from":"http://so.gushiwen.cn/user/collect.aspx",
"email": "古诗文网账号",
"pwd": "古诗文网密码",
"code": text,
"denglu": "登录"
}
#获取登录成功后的首页数据
res = requests.post(url = url,headers = headers,data = data)
# res.encoding = 'gbk'
page_login = res.text
with open('./gushiwen.html','w',encoding='utf8') as fp:
fp.write(page_login)
- 查看gushiwen.html发现,没有登录成功,提示验证码错误
- ***分析原因:
- 图形验证错误(否,我们打开code图片与解析出来的text数据一致)
- 没有携带cookie(否,使用session对象,第一次get请求获取图片验证码,第二次发起登录请求,还是报一样的错误)
- 出现动态变化的请求参数
- 分析data中的参数,有两个参数不太了解
- __VIEWSTATE,__VIEWSTATEGENERATOR
- 通过多次登录,我们分析每次登录__VIEWSTATE参数都是不一样的
- 如何获取__VIEWSTATE参数呢?
- 基于抓包工具进行全局搜索,发现该参数值被隐藏在了登录页面的页面源码中 ***
- 再获取图形验证码图片的时候,同时提取__VIEWSTATE参数,给登录接口使用即可
import requests
from tujian import getImgCodeText
from lxml import etree
url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36',
}
#创建session对象
session = requests.Session()
#获取图片验证码链接
response = session.get(url = url,headers = headers)
page_text = response.text
tree = etree.HTML(page_text)
img_src = 'https://so.gushiwen.cn/' + tree.xpath('//*[@id="imgCode"]/@src')[0]
# print(img_src)
#获取__VIEWSTATE参数
__VIEWSTATE = tree.xpath('//*[@id="__VIEWSTATE"]/@value')[0]
#请求图片链接,获取图片数据并且保存至本地
img_data = session.get(url = img_src,headers = headers).content
with open('./code.jpg','wb') as fp:
fp.write(img_data)
#通过封装好的图鉴脚本,识别图片验证码的内容
text = getImgCodeText('./code.jpg',3)
print(text)
#模拟登录
data = {
"__VIEWSTATE":__VIEWSTATE,
"__VIEWSTATEGENERATOR":"C93BE1AE",
"from":"http://so.gushiwen.cn/user/collect.aspx",
"email": "古诗文网账号",
"pwd": "古诗文网密码",
"code": text,
"denglu": "登录"
}
#获取登录成功后的首页数据
res = session.post(url = url,headers = headers,data = data)
# res.encoding = 'gbk'
page_login = res.text
with open('./gushiwen.html','w',encoding='utf8') as fp:
fp.write(page_login)