Python--pandas操作excel
pandas将excel内容生成在线的html:
#encoding:utf-8
import pandas as pd
import codecs def test19(): xd = pd.ExcelFile('b站.xls') df = xd.parse() with codecs.open('1.html', 'w', 'UTF-8') as html_file: html_file.write(df.to_html(header=True, index=False))
pandas将多个excel合并成一个excel并对数据进行去重
#encoding:utf-8 import os import pandas as pd def test20(): # 两个excel合并,并去重 current_path = os.path.dirname(os.path.realpath(__file__)) files = os.listdir(os.path.join(current_path, "csv_file")) df1 = pd.read_excel(os.path.join(current_path, "csv_file", files[0])) for file in files[1:]: df2 = pd.read_excel(os.path.join(current_path, "csv_file", file)) # 合并操作 df1 = pd.concat([df1, df2], axis=0, ignore_index=True) df1 = df1.drop_duplicates() # 去重 # df1 = df1.reset_index(drop=True) # 重新生成index df1.to_excel(os.path.join(current_path, "csv_file", "total.xlsx"), index=False) test20()
根据列进行合并
#encoding:utf-8 import os import numpy as np import pandas as pd def test21(): # excel的列合并 # 将除了第一个以外的excel中第几列添加到第一个excel中 # 注意点: excel的行数保持一致 current_path = os.path.dirname(os.path.realpath(__file__)) files = os.listdir(os.path.join(current_path, "csv_file")) df = pd.read_excel(os.path.join(current_path, "csv_file", files[0])) for file in files[1:]: # usecols: 取第几列 new_file = pd.read_excel(os.path.join(current_path, "csv_file", file),usecols=[1]) df = np.hstack((df, new_file)) df = pd.DataFrame(df) df.to_excel(os.path.join(current_path, "csv_file", "total.xlsx"), index=False)