用python实现多个表格合并后按指定字段去重后求和某一列

需求概述

1、读取某文件夹中的N张报表(格式一致),将其合并为一张汇总表。源表格样式如下图:

2、将合并后的表根据组合名称去重,并将其所对应的余额相加,最后将余额列单元格格式设置为会计专用(余额保留小数后两位,且余额为0时显示为-),用底红色高亮显示余额小于0的行。如下图:

需求实现

import glob
import os

import pandas as pd


def merge_form(base_dir: str, write_file: str):
    """
    合并表格数据
    :param base_dir: 多张表格存放目录
    :param write_file: 生成Excel存放路径
    :return: None
    """
    # 路径格式化
    base_dir = f"{base_dir}{os.sep}*"

    balance_file_list = glob.glob(base_dir)
    concat_df = pd.DataFrame()
    for file in balance_file_list:
        if '汇总表' not in file:
            df = pd.read_excel(file, header=5, usecols=[0, 1, 2, 3, 4], dtype={'托管户账号': str})
            concat_df = pd.concat([concat_df, df])

    # 剔除无用空行
    concat_df = concat_df[concat_df['托管户账号'].notna().values]

    # 根据组合名称分组后求和余额列
    group_and_sum_df = concat_df.groupby(by='组合名称')['余额'].sum()

    # 去重,保留唯一数据行
    drop_duplicate_df = concat_df.drop_duplicates(subset=['组合名称'], keep='first')

    # 合并分组表和去重表
    merge_df = pd.merge(drop_duplicate_df, group_and_sum_df, how='left', on='组合名称')

    # 余额列重新赋值
    merge_df['余额_x'] = merge_df['余额_y']
    # 删除多余列
    merge_df = merge_df.drop(labels=['余额_y'], axis=1)
    # 修改列名
    merge_df.rename(columns={'余额_x': '余额'}, inplace=True)

    # 获取余额小于0的数据,并拿到行索引
    less_than_zero = merge_df[merge_df['余额'] < 0.0]
    idx = less_than_zero.index.values

    # 写入Excel
    writer = pd.ExcelWriter(write_file, engine='xlsxwriter')
    merge_df.to_excel(writer, index=None, sheet_name='汇总表')

    workbook = writer.book
    worksheet = writer.sheets['汇总表']

    # 将余额小于0的行设为底红色
    bg_color_format = workbook.add_format({'bg_color': 'red'})
    for i in idx:
        worksheet.conditional_format(i + 1, 0, i + 1, 4, {'type': 'text', 'criteria': 'containing', 'value': '',
                                                          'format': bg_color_format})
    # 将余额列格式设置为 会计专用
    balance_format = workbook.add_format({'num_format': '_ * #,##0.00_ ;_ * -#,##0.00_ ;_ * "-"??_ ;_ @_ '})
    worksheet.set_column('A:E', width=20)
    worksheet.set_column('D:D', width=20, cell_format=balance_format)
    writer.save()

  

 

posted @ 2019-06-21 10:41  cnblogs用户  阅读(3029)  评论(0编辑  收藏  举报