python爬虫案列07:cookie登录小说网站,爬取书架书本信息
# 登录小说网,拿到cookie
# 带着cookie请求网站,拿到书架上的内容
# 使用session进行请求(持续请求,在这个过程中,cookie不会丢失)
import requests
import json
url = 'https://passport.17k.com/ck/user/login'
session = requests.session() # 使用session持续连接
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}
data = {
'loginName': 15178135472,
'password': 'ZXCVBNM.123',
}
response = session.post(url, headers=header, data=data)
# print(response) # 返回200,说明登录成功
# 获取书架上的书,因为是动态加载,F12,选中XHR
url_2 = 'https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919'
resp = session.get(url=url_2, headers=header).text
# print(resp) # 拿到书架内容,返回的是一串json数据
resp_data = json.loads(resp) # 通过json.loads()将json数据转换成python可识别的数据字典/列表
data_list = resp_data['data']
# print(data_list)
for data in data_list:
name = data['bookName'] # 书名
writer = data['authorPenName'] # 作者
bookChannel = data['bookChannel']['name'] # 类型
# 保存到本地
with open("./1.txt", 'a+', encoding='utf-8') as f:
result = '书名:' + name + '---作者:' + writer + '---类型---' + bookChannel + '\n'
f.write(result)
f.close()