电商 Python html格式访客数据转为excel格式的数据 html格式的excel转换为excel
原文:
https://www.jianshu.com/p/4f38fe021fb3
# -*- coding: utf-8 -*-
# # 找出非excel格式的文件
# import pandas as pd
# import os
# from shutil import copyfile
# excel_dir = 'D:/工作/数据转换/访客数据/'
# os.chdir(excel_dir)
# for filename in os.listdir(excel_dir):
# root_dir = ""
# print(filename)
# try:
# frame = pd.read_excel(excel_dir+filename)
# root_dir = "D:/工作/数据转换/excel格式/"
# except Exception as e:
# print("异常:", e)
# root_dir = "D:/工作/数据转换/html格式/"
# copyfile(excel_dir+filename, root_dir+filename)
# # html的table转换为excel
# # 原文:https://www.jianshu.com/p/4f38fe021fb3
# import pandas
# import os
# from shutil import copyfile
# excel_dir = 'D:/工作/数据转换/html格式/'
# os.chdir(excel_dir)
# for filename in os.listdir(excel_dir):
# print(filename)
# try:
# with open(excel_dir+filename, 'rb') as f:
# df = pandas.read_html(f.read(),encoding='utf-8')
# bb = pandas.ExcelWriter("D:/工作/数据转换/转换数据/" + filename)
# df[0].to_excel(bb,index=False)
# bb.close()
# except Exception as e:
# print("异常:"+e)
# # copyfile(excel_dir+filename, "D:/工作/数据转换/出错数据/" + filename)
# # 单文件测试
# import pandas
# import os
# with open('访客-2020-09-18 23_45_16.xlsx', 'rb') as f:
# df = pandas.read_html(f.read(),encoding='utf-8')
# bb = pandas.ExcelWriter('访访客-2020-09-18 23_45_16-1.xlsx')
# df[0].to_excel(bb,index=False)
# bb.close()
# # 批量文件改名
# import os
# excel_dir = 'D:/工作/数据转换/1/'
# os.chdir(excel_dir)
# for filename in os.listdir(excel_dir):
# print(filename)
# oldFullName = excel_dir+filename
# newFullName = excel_dir+"1店-"+filename
# os.rename(oldFullName, newFullName)
# # excel转换为csv文件
# import pandas as pd
# import os
# import re
# excel_dir = 'D:/工作/数据转换/转换数据/'
# os.chdir(excel_dir)
# for filename in os.listdir(excel_dir):
# print(filename)
# # filename = "访客-2020-07-30 23_11_30.xlsx"
# df = pd.read_excel(filename,index_col=0)
# # 访问时间列 增加 日期
# m = re.search("(\d{4}-\d{1,2}-\d{1,2})", filename)
# strdate = m.group(1)
# df["访问时间"] =['%s %s' % (strdate, s) for s in df["访问时间"]]
# col_name = df.columns.tolist()
# if '搜索关键字' not in col_name:
# # 插入列
# index = col_name.index('入店来源') + 1
# col_name.insert(index, '搜索关键字')
# df = df.reindex(columns = col_name)
# # 修改值
# df.loc[df['入店来源'].str.find('手淘搜索') > -1 , '搜索关键字'] = df['入店来源'].str.replace('手淘搜索','')
# df.loc[df['入店来源'].str.find('手淘搜索') > -1 , '入店来源'] = '手淘搜索'
# file_name = os.path.splitext(filename)[0]
# df.to_csv('D:/工作/数据转换/CSV数据/'+file_name+'.csv',encoding='utf-8')
# # 数据合并
# import pandas as pd
# import os
# path = "./2店/"
# files = os.listdir(path)
# list_excel = []
# for filename in files:
# fullname = path + filename# excel的相对路径
# df = pd.read_csv(fullname)# 默认读取Excel的第一个表单
# list_excel.append(df)# 把Excel追加到list中
# writer = pd.ExcelWriter('合并后的数据.xlsx')
# pd.concat(list_excel).to_excel(writer,'sheet1',index=False)
# writer.save()
# print('合并完成')