对json中的字典提取其中的所有参数(包括list和dict中)整合为一层

例如:在python中,我有一个字典,类似于{s1:[{s11:0,s12:2},{s13:3,s14:4}],s2:'s2',s3:{s31:0,s32:2}},我想使用递归提取其中所有的字典的key值和value,并在key值中包含它在字典中的位置信息,形成一个新的字段,如{s1_0_s11:0,s1_0_s12:2,s1_1_s13:3,s1_1_s14:4,s2:'s2',s3_s31:0,s3_s32:2}

import json
import multiprocessing
import os
from collections import defaultdict

def extract_dict(d, key_prefix = ""):
    result = {}
    for k, v in d.items():
        if isinstance(v, dict):
            result.update(extract_dict(v, key_prefix + k + "_"))
        elif isinstance(v, list):
            for i, item in enumerate(v):
                if isinstance(item, dict):
                    result.update(extract_dict(item, key_prefix + k + "_" + str(i) + "_"))
        else:
            result[key_prefix + k] = v
    return result


def merge_dicts(dict_list):
    merged_dict = defaultdict(list)
    for d in dict_list:
        for k, v in d.items():
            merged_dict[k].append(v)
    # 去重
    for k, v in merged_dict.items():
        merged_dict[k] = list(set(v))
    return dict(merged_dict)

def extract_dict_worker(json_file, dict_save = 'save-dict'):
    with open(json_file, "r") as f:
        d = json.load(f)
    result = extract_dict(d)
    del result['format_filename']
    with open(os.path.join(dict_save, os.path.basename(json_file)), 'w') as f:
        json.dump(result, f)
    result result
def extract_dicts_parallel(json_dir, n_workers=None):
    json_str_list = [os.path.join(json_dir, jd) for jd in os.listdir(json_dir)] 
    pool = multiprocessing.Pool(n_workers)
    result_list = pool.map(extract_dict_worker, json_str_list)
    pool.close()
    pool.join()
    # 对新的txt进行汇总,去重
    new_dict = merge_dicts(result_list)
    with open('merge_dict.json', 'w') as f:
        json.dump(new_dict, f)



if __name__ == '__main__':
    json_dir = './5000-json'
    # 多进程保存新的txt
    extract_dicts_parallel(json_dir, 20)

 

posted @ 2023-02-13 17:02  海_纳百川  阅读(13)  评论(0编辑  收藏  举报
本站总访问量