百度云获取全部文件API探索

通过抓包发现百度云获取文件列表接口https://pan.baidu.com/api/list,本文主要试图通过该接口来封装一个获取百度网盘文件的api接口

 1.抓包分析

2.代码实现

#! /usr/bin/python3
# -*- coding: UTF-8 -*-

import time, requests, json, urllib, sys, os

def get_dir_path (dir_path, bdstoken, cookies, db_file):

    flag = True

    dir_path = urllib.parse.quote (dir_path)
    dir_path = dir_path.replace ('/', '%2F')

    page = 1

    args = []

    ret_list_all = []
    ret_list_dir = []

    while True:
        url = 'https://pan.baidu.com/api/list?'
        url += 'app_id=250528'
        url += '&bdstoken=' + bdstoken
        url += '&channel=chunlei'
        url += '&clienttype=0'
        url += '&desc=1'
        url += '&dir=' + dir_path
        url += '&logid=MTUzNDM4NDk3MjYzNDAuNTAyODg4NzM4MTQyNDE0Nw=='
        url += '&num=100'
        url += '&order=time'
        url += '&page=' + str (page)
        url += '&showempty=0'
        url += '&web=1'

        Cookie = ''
        for item in cookies:
            if item ['name'] == 'BDUSS' or item ['name'] == 'STOKEN':
                Cookie += item ['name'] + '=' + item ['value'] + ';'
        Cookie.rstrip (';')

        headers = {
                "Accept": "application/json, text/javascript, */*; q=0.01",
                "Accept-Encoding": "gzip, deflate, br",
                "Accept-Language": "zh-CN,zh;q=0.9",
                "Connection": "keep-alive",
                "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
                "Cookie": Cookie,
                "Host": "pan.baidu.com",
                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
                }

        try:
            response = requests.get (url = url, headers = headers)
            content = json.loads (response.text)
            if content ['list'] == []:
                break
            else:
                for file_item in content ['list']:
                    fs_id = file_item ['fs_id']
                    path = file_item ['path']
                    name = file_item ['server_filename']
                    size = file_item ['size']
                    try:
                        md5 = file_item ['md5']
                    except:
                        md5 = 'md5'
                    isdir = file_item ['isdir']

                    arg = (str (fs_id), path, name, str (size), md5, isdir, str (file_item))
                    args.append (arg)

                    if isdir == 1:
                        ret_list_dir.append (path)
                    ret_list_all.append (path)
        except BaseException as e:
            print ('Error Function: get_dir_path (dir_path, bdstoken, cookies, db_file)')
            print ('Error Argument:', urllib.parse.unquote (dir_path))
            print ('Error :', response.text)
            print ('Error Reason :', e)
            flag = False
            break

        page += 1

    return flag, ret_list_dir, ret_list_all

def get_all_files (username, bdstoken, cookies, db_file):

    flag, cur_list, all_list = get_dir_path ('/', bdstoken, cookies, db_file)
    retry_list = []
    while len (cur_list) != 0:
        cur_path = cur_list [0]
        cur_list.pop (0)
        #print ('next dir path :', cur_path)
        flag, ret_list, all_list = get_dir_path (cur_path, bdstoken, cookies, db_file)
        if flag:
            cur_list.extend (ret_list)
            #print ('len (cur_list) :', len (cur_list))
            #print ('depth :', cur_path.count ('/'))
            #print ('*' * 64)
        else:
            retry_list.append (cur_path)

    if len (retry_list) == 0:
        print ('get all files successful')

        if os.path.exists ('error_dir_path_list_' + username):
            os.remove ('error_dir_path_list_' + username)
    else:
        print ('retry_list is :', retry_list)
        print ('start get retry_list')

        cur_list = retry_list
        retry_list = []
        while len (cur_list) != 0:
            cur_path = cur_list [0]
            cur_list.pop (0)
            #print ('next dir path :', cur_path)
            flag, ret_list, all_list = get_dir_path (cur_path, bdstoken, cookies, db_file)
            if flag:
                cur_list.extend (ret_list)
                #print ('len (cur_list) :', len (cur_list))
                #print ('depth :', cur_path.count ('/'))
                #print ('*' * 64)
            else:
                retry_list.append (cur_path)

        if len (retry_list) == 0:
            print ('get all files successful')
        else:
            print ('retry_list is :', retry_list)

3.代码分析

此代码与上一篇文章百度云删除文件API接口探索紧密关联,其中部分代码共用,主要体现在bdstoken和cookie的获取方面,此文不多做赘述

需要注意的是,构造数据请求链接时,文件路径需要做urldecode,但是Python3中对“/”不做处理,需要手动替换才能正确获取

get_all_files则通过宽度优先遍历的方式遍历全部文件

 

posted @ 2018-08-25 10:36  菜鸟村长  阅读(4345)  评论(0编辑  收藏  举报