随笔- 79 文章- 0 评论- 2 阅读- 35622

requests--模拟登录，处理cookie，防盗链，代理

 1 #!/usr/bin/env python
 2 # -*- coding:utf-8 -*-
 3 # Author:woshinidaye
 4 import requests
 5 #处理cookie，模拟用户登录！！！！！！！！！
 6 '''
 7 #requests
 8 #登录 -> 返回cookie
 9 #携带cookie请求书架URL   -->  书架内容
10 #上述两个操作需要连续进行；
11 #既然是连续的，就考虑使用session，session是指一连串的请求，并且这一连串请求中，cookie不会丢失，保存登录状态。
12 import requests
13 session = requests.session()        #会话
14 #登录
15 url = 'https://passport.17k.com/ck/user/login'
16 data = {
17     'loginName':'13162610949',
18     'passwork':'1qaz@WSX'
19 }
20 resp = session.post(url=url,data=data)
21 print(resp.text)
22 print(resp.cookies)         #查看cookie
23 resp2 = session.get('https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919')
24 print(resp2.json())
25 #cookie有老化时间，下面这种不是很推荐，推荐使用sess
26 resp = requests.get('https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919',headers = {'Cookie':'GUID=8f7b0858-f653-4c2d-a3aa-16ae15c6a5eb; BAIDU_SSP_lcr=https://www.baidu.com/link?url=jwBPHR8PncDeqSBbnzEYrj-RpWQSyU8SqZxbGFBCSoi&wd=&eqid=ae218c39001387600000000361a4d032; sajssdk_2015_cross_new_user=1; Hm_lvt_9793f42b498361373512340937deb2a0=1638191160; c_channel=0; c_csc=web; accessToken=avatarUrl%3Dhttps%253A%252F%252Fcdn.static.17k.com%252Fuser%252Favatar%252F12%252F12%252F52%252F86175212.jpg-88x88%253Fv%253D1638191919000%26id%3D86175212%26nickname%3D%25E4%25B8%258D%25E7%259F%25A5%25E9%2581%2593%25E7%259A%2584%25E6%2598%25B5%25E7%25A7%25B01%26e%3D1653744132%26s%3Dba8377d324376942; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2286175212%22%2C%22%24device_id%22%3A%2217d6bcd58556e2-056bc19aea3d0b-978183a-921600-17d6bcd585689b%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%2C%22%24latest_referrer_host%22%3A%22www.baidu.com%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%7D%2C%22first_id%22%3A%228f7b0858-f653-4c2d-a3aa-16ae15c6a5eb%22%7D; Hm_lpvt_9793f42b498361373512340937deb2a0=1638193021'})
27 for i in resp.json()['data']:
28     print(i['bookName'])
29 '''
30 
31 url = 'http://lms.h3c.com/zxy-student/api/course/list/querySelfCoursePage?_page=1&_pageSize=12'
32 resp = requests.get(url,headers={
33     'Cookie': 'LtpaToken=AAECAzYxQTMyODQxNjJCNUQwQzFsMjEzODmAENzChs6FGbd9+SmFCU4dkXseug==; _gsnewuser=1; _gsuserid=17d6a7d1ef8628-0de3f0d123337c-978183a-921600-17d6a7d1ef97f; _gsprops=%7B%22s_latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22s_latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22s_latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%7D; Hm_lvt_df7237ab1ce22c31bbe68ebd1817c1c4=1638169120,1638169179; SSO_STATUS=l21389; SsoUserIdentity=l21389; token=17e88156-fa6a-4e25-a2f5-433b345f4931; csrfToken=3e82d42a-d21d-48b3-a772-2ec041f22237; en_key=l21389',
34     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
35     'Referer':'http://lms.h3c.com/zxy-student-web/'
36 })
37 print(resp.json())
38 
39 #requests处理防盗链
40 '''
41 import re,json,requests
42 from bs4 import BeautifulSoup
43 from lxml import html
44 etree = html.etree
45 url = 'https://www.pearvideo.com/'
46 User_Agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
47 headers = {
48     "User-Agent": User_Agent
49 }
50 resp = requests.get(url,headers=headers)
51 html = etree.HTML(resp.text)
52 ids = html.xpath('//@data-id')[1:-1]            #拿取视频ID
53 for i in ids:
54     url = 'https://www.pearvideo.com/video_'+i
55     url_2 = f'https://www.pearvideo.com/videoStatus.jsp?contId={i}&mrd=0.9560651377032194'
56     Referer = f'https://www.pearvideo.com/video_{i}'
57     resp2 = requests.get(url_2,headers={
58         "User-Agent": User_Agent,
59         "Referer": Referer          #处理防盗链
60     })
61     systemTime = resp2.json()['systemTime']
62     srcUrl = resp2.json()["videoInfo"]['videos']['srcUrl']
63     srcUrl = srcUrl.replace(systemTime,f'cont-{i}')             #解析SRCurl
64     with open(fr'E:\工作资料\python\Reptile\video\视频{i}.mp4','wb+') as f :                #下载
65         f.write(requests.get(srcUrl).content)
66     print(f'视频{i}下载完成')
67 '''
68 
69 #代理
70 # 这个据说有点敏感，算了