python根据关键字爬取快手视频

import json
import os
import re

import requests

url="https://www.kuaishou.com/graphql"
keyword=input("请输入要下载视频的关键字")
headers = {
'Host': 'www.kuaishou.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0',
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': f'https://www.kuaishou.com/search/video?searchKey={keyword.encode("utf-8").decode("latin1")}',
'content-type': 'application/json',
'Content-Length': '1275',
'Origin': 'https://www.kuaishou.com',
'Connection': 'keep-alive',
'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_ef8c7b6f6cd9370b51e2090d9005d5b8; client_key=65890b29; ktrace-context=1|MS43NjQ1ODM2OTgyODY2OTgyLjE0NDUyMzIxLjE2NjExMjc4MzgzNjQuNjQzMDY=|MS43NjQ1ODM2OTgyODY2OTgyLjMxNTY2Nzc2LjE2NjExMjc4MzgzNjQuNjQzMDc=|0|graphql-server|webservice|false|NA; kpn=KUAISHOU_VISION',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache'
}
#print(headers)
data = {
   'operationName':"visionSearchPhoto",'variables':{"keyword":keyword,"pcursor":"","page":"search"},'query':"fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n  visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    searchSessionId\n    pcursor\n    aladdinBanner {\n      imgUrl\n      link\n      __typename\n    }\n    __typename\n  }\n}\n"
}
data = json.dumps(data)#注意表单提交的格式是json格式,不是普通表单,这是一个坑,注意。
#print(data)
resp=requests.post(url,headers=headers,data=data)
resp.encoding='utf-8'
#print(resp.json())
dic=resp.json()
feeds=dic['data']['visionSearchPhoto']['feeds']
#print(feeds)
dir_name = '快手video'
if not os.path.exists(dir_name):
    os.mkdir(dir_name)
    print("文件夹创建成功")
for i in range(0,len(feeds)):
    videoName=feeds[i]['photo']['caption']
    #print(videoName)
    download_href=feeds[i]['photo']['videoResource']['h264']['adaptationSet'][0]['representation'][0]['url']
    #print(download_href)
    video = requests.get(download_href)
    new_title = re.sub(r'[\/:*?"<>|\n]', '_', videoName)#去除特殊字符,否则无法保存
    with open(dir_name+"/"+new_title+'.mp4',mode="wb") as f:
        f.write(video.content)
    print("video"+str(i)+"   "+videoName+"下载完成")

requests.close()

 

posted @ 2022-08-22 11:25  山海自有归期  阅读(378)  评论(0编辑  收藏  举报