百度云获取全部文件API探索
通过抓包发现百度云获取文件列表接口https://pan.baidu.com/api/list,本文主要试图通过该接口来封装一个获取百度网盘文件的api接口
1.抓包分析
2.代码实现
#! /usr/bin/python3 # -*- coding: UTF-8 -*- import time, requests, json, urllib, sys, os def get_dir_path (dir_path, bdstoken, cookies, db_file): flag = True dir_path = urllib.parse.quote (dir_path) dir_path = dir_path.replace ('/', '%2F') page = 1 args = [] ret_list_all = [] ret_list_dir = [] while True: url = 'https://pan.baidu.com/api/list?' url += 'app_id=250528' url += '&bdstoken=' + bdstoken url += '&channel=chunlei' url += '&clienttype=0' url += '&desc=1' url += '&dir=' + dir_path url += '&logid=MTUzNDM4NDk3MjYzNDAuNTAyODg4NzM4MTQyNDE0Nw==' url += '&num=100' url += '&order=time' url += '&page=' + str (page) url += '&showempty=0' url += '&web=1' Cookie = '' for item in cookies: if item ['name'] == 'BDUSS' or item ['name'] == 'STOKEN': Cookie += item ['name'] + '=' + item ['value'] + ';' Cookie.rstrip (';') headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Cookie": Cookie, "Host": "pan.baidu.com", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", } try: response = requests.get (url = url, headers = headers) content = json.loads (response.text) if content ['list'] == []: break else: for file_item in content ['list']: fs_id = file_item ['fs_id'] path = file_item ['path'] name = file_item ['server_filename'] size = file_item ['size'] try: md5 = file_item ['md5'] except: md5 = 'md5' isdir = file_item ['isdir'] arg = (str (fs_id), path, name, str (size), md5, isdir, str (file_item)) args.append (arg) if isdir == 1: ret_list_dir.append (path) ret_list_all.append (path) except BaseException as e: print ('Error Function: get_dir_path (dir_path, bdstoken, cookies, db_file)') print ('Error Argument:', urllib.parse.unquote (dir_path)) print ('Error :', response.text) print ('Error Reason :', e) flag = False break page += 1 return flag, ret_list_dir, ret_list_all def get_all_files (username, bdstoken, cookies, db_file): flag, cur_list, all_list = get_dir_path ('/', bdstoken, cookies, db_file) retry_list = [] while len (cur_list) != 0: cur_path = cur_list [0] cur_list.pop (0) #print ('next dir path :', cur_path) flag, ret_list, all_list = get_dir_path (cur_path, bdstoken, cookies, db_file) if flag: cur_list.extend (ret_list) #print ('len (cur_list) :', len (cur_list)) #print ('depth :', cur_path.count ('/')) #print ('*' * 64) else: retry_list.append (cur_path) if len (retry_list) == 0: print ('get all files successful') if os.path.exists ('error_dir_path_list_' + username): os.remove ('error_dir_path_list_' + username) else: print ('retry_list is :', retry_list) print ('start get retry_list') cur_list = retry_list retry_list = [] while len (cur_list) != 0: cur_path = cur_list [0] cur_list.pop (0) #print ('next dir path :', cur_path) flag, ret_list, all_list = get_dir_path (cur_path, bdstoken, cookies, db_file) if flag: cur_list.extend (ret_list) #print ('len (cur_list) :', len (cur_list)) #print ('depth :', cur_path.count ('/')) #print ('*' * 64) else: retry_list.append (cur_path) if len (retry_list) == 0: print ('get all files successful') else: print ('retry_list is :', retry_list)
3.代码分析
此代码与上一篇文章百度云删除文件API接口探索紧密关联,其中部分代码共用,主要体现在bdstoken和cookie的获取方面,此文不多做赘述
需要注意的是,构造数据请求链接时,文件路径需要做urldecode,但是Python3中对“/”不做处理,需要手动替换才能正确获取
get_all_files则通过宽度优先遍历的方式遍历全部文件