import os,re
import pandas as pd
dir_path = r'C:\Users\lenovo\Desktop\每日经营看板\经营周报\data'
#col = [] ##选取需要的字段,看了下不是所有的字段都需要的
#df = pd.DataFrame([],columns=col)
df = pd.DataFrame([])
for root,dirs,files in os.walk(dir_path):##文件夹的路径
if files: ##判断是否有文件
for file_name in files: ##循环文件的名称
if '.xls' in file_name: #判定是不是文件是否有o2o_order结尾的文件,是的就继续,不是的就退出了;
path = os.path.join(root,file_name)
print('正在处理的文件是%s'%(path))
try:
xlsx_file = pd.ExcelFile(path) ##路径
data = xlsx_file.parse('data') ##选取表
df_tmp = pd.DataFrame(data)
print(df_tmp.head())
df = pd.concat([df,df_tmp], ignore_index=True, sort=True) ## 数据合并
print(df.shape)
except:
print('读取文件,处理数据失败')
else:
print('warning:非.xlsx文件不读取')
#print(df.head())
#保存到本地
cols = ["数据类型","统计日期范围","城市","实收","折前收入","商品毛利","商品毛利率","成本","折扣率","营销费用","营销费用率",
"货损成本","货损率","动销点位数","运营点位数","新增运营点位数","撤点数","动销率","盗损成本","盗损率","单点日实收",
"单点日订单量","购买频次","客单价","订单量","商城UV","下单用户数","留存率","个人补款","企业补款","妥投率","补货单数",
"补货原料数","补货货值"]
df = df[cols]
out_path= 'C:/Users/lenovo/Desktop/output-1.xlsx'
writer = pd.ExcelWriter(out_path, engine='xlsxwriter')
df.to_excel(writer,'Sheet1',index = False)
writer.save()