Python多进程实例

python多进程实例

废话不多说,直接上代码。

# -*- coding: utf-8 -*-
from multiprocessing import Pool
from multiprocessing import Process
import multiprocessing

import numpy as np
import pandas as pd


# 通用dataframe切片后多进程异步执行方法
def parallelize_dataframe(df, func):
    CPUs = multiprocessing.cpu_count()
    num_partitions = CPUs
    num_cores = CPUs

    df_split = np.array_split(df, num_partitions)
    pool = Pool(num_cores)
    result_list = []
    for df_temp in df_split:
        result_list.append(pool.apply_async(func=func, args=(df_temp, )))
    pool.close()
    pool.join()
    df = pd.concat([i.get() for i in result_list])
    return df


def parall_func(df):
    df['缺岗时长'] = df.apply(get_absence_duration, axis=1)
    return df


def analysis(current_month):
    df_shift = get_df_shift(current_month)  # 排班数据
    # 多线程处理
    df_shift = parallelize_dataframe(df_shift, parall_func)


if __name__ == '__main__':
    # 异步获取打卡数据
    # 创建进程池
    p = Pool()
    result = {}
    for month in month_list:
        result[month] = p.apply_async(func=get_df_card, args=(month, ))
    # 关闭进程池
    p.close()
    # 主进程阻塞,等待子进程的退出
    p.join()
    # 更新df_card_dict
    for k, v in result.items():
        df_card_dict[k] = v.get()
    # 更新df_base
    update_df_base_by_card()

    # 执行数据分析主逻辑
    # 注意:方法体中使用多进程,主程序不能用多进程,否则程序不执行
    for month in month_list:
        analysis(month)
posted @ 2023-08-29 15:36  Steven0325  阅读(37)  评论(0编辑  收藏  举报