python3 把excel文件合并并保存到csv文件

  具体是这样,某路径下有很多 excel文件,文件名中包含相同关键字的是一类文件,把包含相同关键字的文件合并成一个文件,生成一个新的csv文件

  

# coding=utf-8
import xlrd
import xlwt
import datetime
import os
import time
import httplib2

# 具体是这样,某路径下有很多 excel文件,文件名中包含相同关键字的是一类文件,把包含相同关键字的文件合并成一个文件,生成一个新的csv文件
class csvFile:
    def WriteLog(self, message, fileName):
        with open(fileName, 'a') as f:
            f.write(message)

    def combinExcelToCsv(self,keyword_list,mainExceldir = "D:\\work\\Excel_txtProcesss\\new-微博",csvDir="D:\\work\\Excel_txtProcesss\\new-微博-合并"):

        # 把文件名中包含上述文件列表的文件合并成一个文件,比如最新微博-new77723-长城.xls和最新微博-new60000-长城.xls 合并成一个文件:长城.csv
        key_filelist_dict = {}
        for root, dirs, files in os.walk(mainExceldir):
            for key in keyword_list:
                filelist = []
                for file in files:
                    strfilename = mainExceldir + "\\" + file
                    if file.find(key) > -1:
                        filelist.append(strfilename)
                if len(filelist) > 0:
                    key_filelist_dict[key] = filelist

        for key1 in key_filelist_dict.keys():
            filename = os.path.join(csvDir, key1 + ".csv")
            file_list = key_filelist_dict[key1]
            file_index = 0

            for file in file_list:
                print(file)
                try:
                    data = xlrd.open_workbook(file)
                    table = data.sheets()[0]
                    nrows = table.nrows
                    ncols = table.ncols

                    # 文件标题
                    if file_index==0:
                        rowValues = table.row_values(0)
                        headtitle = ""
                        for colIndex  in range(0,ncols):
                            headtitle += str(rowValues[colIndex])+","
                        self.WriteLog(headtitle, filename)

                    file_index+=1
                    for i in range(1, nrows):
                        rowValues = table.row_values(i)
                        message = ""
                        for colIndex in range(0, ncols):
                            message += str(rowValues[colIndex]).replace(",", "") + ","
                        print(i)
                        message = message.replace("\n", "")
                        message = message.encode("gbk", "ignore").decode("gbk")
                        self.WriteLog("\n"+message, filename)
                except UnicodeDecodeError as rr:
                    print("error:" + file)
                    print(rr.args)


obj = csvFile()
keyword_list = ["BOSS直聘", "VIVO", "蒙牛", "宝马", "拼多多", "OPPO", "上汽通用", "小米", "优信", "长城", "58", "百威", "东风", "东鹏",
                "富连网", "链家", "青岛", "一汽大众", "梅赛德斯-奔驰"]
# 存放excel文件路径
mainExceldir="D:\\work\\Excel_txtProcesss\\new-微博"
# 要生成的csv文件路径
csvDir="D:\\work\\Excel_txtProcesss\\new-微博-合并"
obj.combinExcelToCsv(keyword_list,mainExceldir)

 

posted on 2018-07-23 15:38  shaomine  阅读(1475)  评论(0编辑  收藏  举报