excel合并

优势：不管下级文件夹有多少，都可以找到excel并合并
　　　点击3下，批量操作
改进点：必须要求列索引（列标题）一样
　　　　sheet功能未添加
　　　　数据去重（依据实际业务情况，不一定是id去重）
import  os

# FROM_DIR = os.getcwd()
# TO_FILE = os.path.join(FROM_DIR, 'combine.xlsx')
# print(FROM_DIR)
# # print(TO_FILE)
# # D:\PycharmProjects\combine_excel
# # D:\PycharmProjects\combine_excel\combine.xlsx
#
# for _root, _dirs, _files in os.walk(FROM_DIR):  # dirs 文件夹  files 文件
#     print(_files)
#


#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
'''合并指定文件夹下所有Excel文件到同一个文件'''
import os
import collections
import operator  # 进行加减乘除等等运算
from openpyxl import load_workbook
from openpyxl import Workbook



def search_excel(from_dir, to_file):
    '''遍历from_dir文件夹，查找Excel文件，返回结果列表
        s.walk() 方法是一个简单易用的文件、目录遍历器，可以帮助我们高效的处理文件、目录方面的事情。

    '''
    _results = []
    for _root, _dirs, _files in os.walk(from_dir):  # root 路径  dirs 路径下的文件夹  files 文件夹内的文件名
        print(_root,_dirs,_files)
        for _file in _files:
            if _file.endswith('.xlsx'):
                _results.append(os.path.join(_root, _file))
                print("_results:",_results)  # 获得路径列表

    try:
        print('Remove combine.xlsx.')
        print("to_file:",to_file)
        _results.remove(to_file)  # remove() 函数用于移除列表中某个值的第一个匹配项。
    except ValueError:
        print('combine.xlsx not exist.')
    return _results


def load_excel(excel_file):
    '''读取Excel文件内容，返回Excel的标题数组和数据有序字典
    excel_file ： excel文件的路径

    '''
    _wb = load_workbook(excel_file, read_only=True)
    _ws = _wb.active
    # print(type(_ws),"----------------------------")   # <class 'openpyxl.worksheet.read_only.ReadOnlyWorksheet'>
    _title = []
    _items = collections.OrderedDict()
    for _r in _ws.rows:    #   按行获取单元格(Cell对象) - 生成器
        print("_r:",_r)
        if not _title:
            for _i in _r:
                print(_i.value)
                _title.append(_i.value)
        else:
            _item = []
            for _i in _r:
                print(_i.value)
                _item.append(_i.value)
            _items[_item[0]] = _item

    _wb.close()
    print(_title,_items,'---------------------------')
    return _title, _items


def save_excel(excel_file, excel_title, excel_items):
    '''保存Excel文件'''
    _wb = Workbook()
    _ws = _wb.active
    _ws.append(excel_title)
    for _k, _v in excel_items.items():
        _ws.append(_v)
    _wb.save(excel_file)


def combine(from_dir, to_file):
    '''合并指定文件夹下所有Excel文件到同一个文件
    collections(英文，收集、集合)，里面自带了一个子类OrderedDict，实现了对字典对象中元素的排序。
    '''
    _excel_files = search_excel(from_dir, to_file)   # 返回一个列表，列表内数各个文件的路径
    if not _excel_files:
        return
    _excel_title = []
    _excel_content = collections.OrderedDict()
    for _file in _excel_files:
        print('Parsing ' + _file)
        _title, _items = load_excel(_file)    #    调用load_excel 函数

        if not _title or not _items:
            print('Skip since it is empty.')
            continue

        if not _excel_title:
            _excel_title = _title
        elif not operator.eq(_title, _excel_title):
            print('Warning: Excel title format are different!')

        for _k, _v in _items.items():
            _excel_content[_k] = _v
        print('Parsing done.')

    if not _excel_title or not _excel_content:
        print('All files is empty.')
        return
    save_excel(to_file, _excel_title, _excel_content)


if __name__ == "__main__":
    print('begin')
    FROM_DIR = os.getcwd()
    TO_FILE = os.path.join(FROM_DIR, 'combine.xlsx')
    combine(FROM_DIR, TO_FILE)
    print('end')
excel合并
posted @ 2018-05-30 17:10 nick560 阅读(228) 评论(0) 编辑收藏举报
刷新页面返回顶部
nick560

excel合并

公告