python根据关键字爬取快手视频
import json import os import re import requests url="https://www.kuaishou.com/graphql" keyword=input("请输入要下载视频的关键字") headers = { 'Host': 'www.kuaishou.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0', 'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate, br', 'Referer': f'https://www.kuaishou.com/search/video?searchKey={keyword.encode("utf-8").decode("latin1")}', 'content-type': 'application/json', 'Content-Length': '1275', 'Origin': 'https://www.kuaishou.com', 'Connection': 'keep-alive', 'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_ef8c7b6f6cd9370b51e2090d9005d5b8; client_key=65890b29; ktrace-context=1|MS43NjQ1ODM2OTgyODY2OTgyLjE0NDUyMzIxLjE2NjExMjc4MzgzNjQuNjQzMDY=|MS43NjQ1ODM2OTgyODY2OTgyLjMxNTY2Nzc2LjE2NjExMjc4MzgzNjQuNjQzMDc=|0|graphql-server|webservice|false|NA; kpn=KUAISHOU_VISION', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache' } #print(headers) data = { 'operationName':"visionSearchPhoto",'variables':{"keyword":keyword,"pcursor":"","page":"search"},'query':"fragment photoContent on PhotoEntity {\n id\n duration\n caption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n searchSessionId\n pcursor\n aladdinBanner {\n imgUrl\n link\n __typename\n }\n __typename\n }\n}\n" } data = json.dumps(data)#注意表单提交的格式是json格式,不是普通表单,这是一个坑,注意。 #print(data) resp=requests.post(url,headers=headers,data=data) resp.encoding='utf-8' #print(resp.json()) dic=resp.json() feeds=dic['data']['visionSearchPhoto']['feeds'] #print(feeds) dir_name = '快手video' if not os.path.exists(dir_name): os.mkdir(dir_name) print("文件夹创建成功") for i in range(0,len(feeds)): videoName=feeds[i]['photo']['caption'] #print(videoName) download_href=feeds[i]['photo']['videoResource']['h264']['adaptationSet'][0]['representation'][0]['url'] #print(download_href) video = requests.get(download_href) new_title = re.sub(r'[\/:*?"<>|\n]', '_', videoName)#去除特殊字符,否则无法保存 with open(dir_name+"/"+new_title+'.mp4',mode="wb") as f: f.write(video.content) print("video"+str(i)+" "+videoName+"下载完成") requests.close()