爬取17k小说书架
# 登录 -> 得到cookie
# 带着cookie去访问 -> 得到数据
# 必须得把上面两个操作连起来
# 我们可以使用session进行请求 -> session 你可以认为是一连串的请求,在这个过程中的cookie不会丢失
说一说过程中遇到的问题:
刚开始我按着视频做的时候他没有headers 也没有报错 直接就会显示出来
但是我的就就报错了 所以我加了headers
加了headers之后 输出了一大段看不懂的代码 在后续爬取书架内容上 也显示登录错误 也不知道是怎么回事
之后换了一种方式 直接用登录后的cookie传入headers中就可以怕爬取到了
代码
# 登录 -> 得到cookie
# 带着cookie去访问 -> 得到数据
# 必须得把上面两个操作连起来
# 我们可以使用session进行请求 -> session 你可以认为是一连串的请求,在这个过程中的cookie不会丢失
import requests
#会话
session = requests.session()
# 1 登录
url = "https://passport.17k.com/ck/user/login"
# headers = {
# "Cookie":"acw_sc__v2=65df1c56aac8ae0b98370967b5b2a7c021a3cc86; GUID=300934f9-6681-4650-a730-05aabd61db51; Hm_lvt_9793f42b498361373512340937deb2a0=1709120663; sajssdk_2015_cross_new_user=1; acw_tc=2760828417091206637582053ed966381076b785dc25e3b0c765d1d36dd2a5; c_channel=0; c_csc=web; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22300934f9-6681-4650-a730-05aabd61db51%22%2C%22%24device_id%22%3A%2218def87b0c363f-06c85efeee1c6-26001b51-1327104-18def87b0c4987%22%2C%22props%22%3A%7B%7D%2C%22first_id%22%3A%22300934f9-6681-4650-a730-05aabd61db51%22%7D; Hm_lpvt_9793f42b498361373512340937deb2a0=1709121141; ssxmod_itna=YqRx0DyDuD2GG=D8zDX+Qfui7qKMSxAKPfqmQD/7eEDnqD=GFDK40o3gYDCQAKQQimh1Yb=jedPNb6TRrqKb=r8cQWDCPGnDBI+xODqYYkDt4DTD34DYDixibCxi5GRD0KDFF5XUZ9Dm4GWFqGfDDoDY86RDitD4qDBGrdDKqGgCuDD0dfAYDDhAo+xrWdKEwbgWD2NFqDMjeGXeoFqFqbOEcDZiCcN4QpRDB=hxBQMAkNUAeDHCwXM4lvYbOGYA0vx9h4rFEh1YY4qFS8KY0qP4+xPUFx4ziAr+7+LlxqqDD3KQDwNBFD==; ssxmod_itna2=YqRx0DyDuD2GG=D8zDX+Qfui7qKMSxAKPfqeG9iwWDBMuD7PPP+7eDC985+x8E5wHV7eT7NO4dICAX+qK3uDN+KkouEaPaaRpfQAZl6aY9+5w2WlFAPu+IgbL+NyjUMGsTTP7j9VRTtBj4LlAufYDCKMjb0I4Q6glRjKZR6SifRy7e6=ECiA9fXV9l0R+jL=1nnLi6c/KnpqhSuAneuYEcbHH63Hm+01wuLOkaKHmkfdZwXvQIgpTZUYr9a3VqjH1FtbLLF5xFyhufSjVUfUOKB5zFSkntIYjpZLOCGEDUhHMODASdoCG4DQ9Y7y5lwqQ+Y0hs0xxzx7KD4Qbe4+bzRD7YY34oGAsWiiWi+fARz4Y=+Ywx5lBZ7wx7AYi0qN7dZud=4qIObb0QK0qIh40xtz2OBoYVoqbDdODkKhs+GxUY4kfYioxD08DiQlcDC70RCrxD==; tfstk=e_epEb9v1NbH6ILqs9CgUkB9fkjGsJEU-yrXE40HNPUT4zpHKYDnN0gt2eM3Vwz8eVas-yV3qPKUBrpkKw5UVLHoFZbcntqFYYk5oms40wqFnTtXvtX0Yk1XTLNOnMz0tljfOTtCfQ9Lp8nCGp-Dwweb4DhKyL0we02enfufL2JDn2nn9HqIWL97BgSbn-IUpE0xqBIOXQRrOcR-pRDIXgdFMc3cjCAyafmZXqjOXQRrOcotoGAMaQlib"
# }
data = {
"loginName": "",
"password": ""
}
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
}
response = session.post(url=url, data=data,headers=headers)
# print(response.text)
#print(response.cookies)
#print(response.status_code)
# 2.拿书架上的书
# 刚才的那个session中是由cookie的
r = requests.get("https://user.17k.com/ck/user/myInfo/103174741?bindInfo=1&appKey=2406394919",headers = {
"Cookie":"acw_sc__v2=65df1c56aac8ae0b98370967b5b2a7c021a3cc86; GUID=300934f9-6681-4650-a730-05aabd61db51; Hm_lvt_9793f42b498361373512340937deb2a0=1709120663; sajssdk_2015_cross_new_user=1; ssxmod_itna=Yq+hBKDKD50K7KD=rGHr4PpDgiKEIIAhAvQ034D/YDfh4iNDnD8x7YDvAAwFbA/QAPwqfK8RcwKfCe27+ef+nY8cQWDCPGnDB9wxODqYYCDt4DTD34DYDixibCxi5GRD0KDFF5XUZ9Dm4GWFqGfDDoDY86RDitD4qDBmrdDKqGgCdxD0q3A9DDhjo+xrWdKEwbgzDrqFqDMjeGX8o9eFqbOE=xZiCaq4WpRDB=hxBQMAkNUAeDHCwXM4eYvYh+sK7x38hp5Y7h=+YxpbOw4zYxuLBh=8iAK8O+KUS5QbDDAYYxQR5AxeD===; ssxmod_itna2=Yq+hBKDKD50K7KD=rGHr4PpDgiKEIIAhAvQ0YG9bwDfxGN4tGazzYI=98NzTIxn4t42IZwBNhg3flr2w/4tQSNvov+TgDIB9I6MuOkXUl5WReV3tfRnKHtiSjYjxwtQbgUVga/7ZC6KOVaa1dEIk8P7svZKw44A8VEY+hOPGIaFm7S5zxSmmKIv+O4fwK/BA7cot=9Kac7AElo3fs/bC3cuGKbHIEFkL2fKQ4NgWlRL0H9mvypdtQ51G=elENVIXRxTIaTFc4Xi7ZcF4+tMQzyo8fsmBKNi3DQFYZYw7ymuDkmzq9ya/lDVzi4bqsbieRhxRrq90Veu4=0iYD1UyDK04GYV+04SiQS4KcBQlBZmrdfr/+i4Ki2Kb4tE=02KDcr8iH4r=Ig5OB=k0YsDDFqD+OUeYhP7tqn++Uh=lrKBwKKw=QKNmK4lG2AGNbD22xi6xs=G5BWeQKbWjqBnafR7WG+bGR/xBbDxD; accessToken=avatarUrl%3Dhttps%253A%252F%252Fcdn.static.17k.com%252Fuser%252Favatar%252F01%252F41%252F47%252F103174741.jpg-88x88%253Fv%253D1709113797000%26id%3D103174741%26nickname%3D%25E7%25AD%2589%25E9%25A3%258E%25E5%2590%25B9%25E4%25B9%258B%25E9%2587%258D%25E7%2594%259F%26e%3D1724672676%26s%3Df686740c81f8838a; c_channel=0; c_csc=web; tfstk=eTGHEu_JgvyBhO1jWDFB6GiG_q9TdMN7C0C827EyQlr_2eZLv4oorqyz86oKIumZjkP89XdoElPUFvE8vuVoPmY9DIdxR2N7_nKvM_DxPqFrI3BVK2gQNN_O4KHZR_XaQjSF0Kv4kMcv--qco_hfuQOrMouUY23x__wOdVqGa_liI8q2qlfPaf4h49XN3bkhNP8-sT67LPagDY0OgHf-qNdwSFX2PJz_RoLMST67LPagDFYG3UwU5yZA.; Hm_lpvt_9793f42b498361373512340937deb2a0=1709120677; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22103174741%22%2C%22%24device_id%22%3A%2218def87b0c363f-06c85efeee1c6-26001b51-1327104-18def87b0c4987%22%2C%22props%22%3A%7B%7D%2C%22first_id%22%3A%22300934f9-6681-4650-a730-05aabd61db51%22%7D"
})
print(r.json())