爬虫(4)-cookie与登录之17k小说网
# -*- coding: utf-8 -*- """ @Time : 2022/3/21 14:10 @Author : Andrew @File : requests进阶.py """ """ requests进阶 -headers,是HTTP协议中的请求头,一般会存放一些和请求内容无关的数据。有时候也会存放一些安全验证信息, -比如常见的User-Agent,token。cookie等 -通过requests发送的请求,我们可以把请求头信息放在headers中,也可以单独进行存放,,最终有requests自动帮我们拼接完成完整的http请求头 1.模拟浏览器的登录,处理cookie 2.防盗链处理->抓取梨视频数据 3.代理-》防止被封ip """ # 登录 - 》cookie # 带着cookie请求到书架url -》书架上的内容 # 必须得把上面的两个操作连起来 # 我们可以使用session进行请求 -》 session你可以认为是一种一连串的请求,在这个过程中的cookie不会丢失 import requests # 会话 session = requests.session() # 第一种 采用session去拿cookie # 1.登录 # url = "https://passport.17k.com/ck/user/login" # data = { # "loginName": 15691762990, # "password": "q18392556350", # } # session.post(url, data=data) # # # # 2.拿书架的数据 刚才的session中是有cookie的 # resp2 = session.get("https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919") # 第二种野路子 把cookie直接从浏览器那里复制 resp2 = requests.get("https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919", headers={ "cookie": "GUID=a82e13d0-f453-4156-9c48-29f7e5688aac; " "BAIDU_SSP_lcr=https://www.baidu.com/link?url=v6CotuEsf0bbmd1Xc3QiOctg1AOqMGceYsKtCNfNuIG&wd=&eqid" "=9f1d8e53000027950000000562383b59; sajssdk_2015_cross_new_user=1; " "Hm_lvt_9793f42b498361373512340937deb2a0=1647852387; c_channel=0; c_csc=web; " "accessToken=avatarUrl%3Dhttps%253A%252F%252Fcdn.static.17k.com%252Fuser%252Favatar%252F05%252F65%252F54" "%252F94345465.jpg-88x88%253Fv%253D1647852488000%26id%3D94345465%26nickname%3D%25E4%25B9%2594%25E5%258D" "%2581%25E5%2585%25AD%26e%3D1663404544%26s%3D68ff0cfe4c18b9f2; " "Hm_lpvt_9793f42b498361373512340937deb2a0=1647852545; " "sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2294345465%22%2C%22%24device_id%22%3A" "%2217faba7f97d238-0c711bea61cda4-1b3f2865-2073600-17faba7f97e76f%22%2C%22props%22%3A%7B%22" "%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22" "%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%2C%22%24latest_referrer_host%22%3A" "%22www.baidu.com%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%7D%2C" "%22first_id%22%3A%22a82e13d0-f453-4156-9c48-29f7e5688aac%22%7D " }) print(resp2.json()) resp2.close()