Python多进程实例
python多进程实例
废话不多说,直接上代码。
# -*- coding: utf-8 -*-
from multiprocessing import Pool
from multiprocessing import Process
import multiprocessing
import numpy as np
import pandas as pd
# 通用dataframe切片后多进程异步执行方法
def parallelize_dataframe(df, func):
CPUs = multiprocessing.cpu_count()
num_partitions = CPUs
num_cores = CPUs
df_split = np.array_split(df, num_partitions)
pool = Pool(num_cores)
result_list = []
for df_temp in df_split:
result_list.append(pool.apply_async(func=func, args=(df_temp, )))
pool.close()
pool.join()
df = pd.concat([i.get() for i in result_list])
return df
def parall_func(df):
df['缺岗时长'] = df.apply(get_absence_duration, axis=1)
return df
def analysis(current_month):
df_shift = get_df_shift(current_month) # 排班数据
# 多线程处理
df_shift = parallelize_dataframe(df_shift, parall_func)
if __name__ == '__main__':
# 异步获取打卡数据
# 创建进程池
p = Pool()
result = {}
for month in month_list:
result[month] = p.apply_async(func=get_df_card, args=(month, ))
# 关闭进程池
p.close()
# 主进程阻塞,等待子进程的退出
p.join()
# 更新df_card_dict
for k, v in result.items():
df_card_dict[k] = v.get()
# 更新df_base
update_df_base_by_card()
# 执行数据分析主逻辑
# 注意:方法体中使用多进程,主程序不能用多进程,否则程序不执行
for month in month_list:
analysis(month)
God will send the rain when you are ready.You need to prepare your field to receive it.