网站更新内容:请访问: https://bigdata.ministep.cn/

循环读取目录Xlsx文件

import os,re
import pandas as pd
 
dir_path = r'C:\Users\lenovo\Desktop\每日经营看板\经营周报\data'

#col = [] ##选取需要的字段,看了下不是所有的字段都需要的
#df = pd.DataFrame([],columns=col)

df = pd.DataFrame([])
 
for root,dirs,files in os.walk(dir_path):##文件夹的路径
    if files: ##判断是否有文件
        for file_name in files: ##循环文件的名称
            if '.xls' in file_name: #判定是不是文件是否有o2o_order结尾的文件,是的就继续,不是的就退出了;
                path = os.path.join(root,file_name)
                print('正在处理的文件是%s'%(path))
                try:
                    xlsx_file = pd.ExcelFile(path) ##路径
                    data = xlsx_file.parse('data') ##选取表
                    df_tmp = pd.DataFrame(data)
                    print(df_tmp.head())
                    df = pd.concat([df,df_tmp], ignore_index=True, sort=True) ## 数据合并
                    print(df.shape)
                except:
                    print('读取文件,处理数据失败')
            else:
                print('warning:非.xlsx文件不读取')
 
#print(df.head())
 
#保存到本地

cols = ["数据类型","统计日期范围","城市","实收","折前收入","商品毛利","商品毛利率","成本","折扣率","营销费用","营销费用率",
"货损成本","货损率","动销点位数","运营点位数","新增运营点位数","撤点数","动销率","盗损成本","盗损率","单点日实收",
"单点日订单量","购买频次","客单价","订单量","商城UV","下单用户数","留存率","个人补款","企业补款","妥投率","补货单数",
"补货原料数","补货货值"]

df = df[cols]
 
out_path= 'C:/Users/lenovo/Desktop/output-1.xlsx'
 
writer = pd.ExcelWriter(out_path, engine='xlsxwriter')
 
df.to_excel(writer,'Sheet1',index = False)
 
writer.save()
posted @ 2021-04-04 20:19  ministep88  阅读(82)  评论(0编辑  收藏  举报
网站更新内容:请访问:https://bigdata.ministep.cn/