pandas(六):pandas对excel进行读写
一、代码如下
import pandas as pd import numpy as np class Process(): def __init__(self): self.path_1 = "3000条无意义.xlsx" self.path_2 = "录音跟听0526.xlsx" self.s = [] self.l = [] def read_path_1(self): readbook = pd.read_excel(self.path_1, sheet_name="Sheet1") s1 = readbook["语句"] y = readbook["语义"] for s,l in zip(s1, y): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") self.s.append(s) self.l.append(l) def read_path_2(self): t1 = pd.read_excel(self.path_2, sheet_name="0526") t2 = pd.read_excel(self.path_2, sheet_name="0525") t3 = pd.read_excel(self.path_2, sheet_name="0524") s1 = t1["内容"] y1 = t1["正确标签"] for s,l in zip(s1, y1): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") self.s.append(s) self.l.append(l) s2 = t2["内容"] y2 = t2["正确标签"] for s,l in zip(s2, y2): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") self.s.append(s) self.l.append(l) s3 = t3["内容"] y3 = t3["正确标签"] for s,l in zip(s3, y3): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") self.s.append(s) self.l.append(l) def noSemantic_1(self): path_in = "3000_1.xlsx" path_out = "new_data/3000_1.csv" t1 = pd.read_excel(path_in, sheet_name="Sheet1") s1 = t1["话术"] y1 = t1["语义"] sentence = [] label = [] predict = [] for s, l in zip(s1, y1): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") sentence.append(s) label.append(l) predict.append("无意义") dataframe = pd.DataFrame({'sentence': sentence,"predict":predict, 'label': label }) dataframe.to_csv(path_out, index=False, sep='\t', encoding="utf8") def noSemantic_2(self): path_in = "3000_2.xlsx" path_out = "new_data/3000_2.csv" t1 = pd.read_excel(path_in, sheet_name="对话文本") s1 = t1["客户语句"] y1 = t1["语义小类"] sentence = [] label = [] predict = [] for s, l in zip(s1, y1): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") sentence.append(s) label.append(l) predict.append("无意义") dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label}) dataframe.to_csv(path_out, index=False, sep='\t', encoding="utf8") def noSemantic_3(self): path_in = "3000_3.xlsx" path_out = "new_data/3000_3.csv" t1 = pd.read_excel(path_in, sheet_name="Sheet1") s1 = t1["语句"] y1 = t1["语义"] sentence = [] label = [] predict = [] for s, l in zip(s1, y1): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") sentence.append(s) label.append(l) predict.append("无意义") dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label}) dataframe.to_csv(path_out, index=False, sep='\t', encoding="utf8") def luyin_1(self): path_in = "录音跟听_1.xlsx" path_out = "new_data/record_1.csv" t1 = pd.read_excel(path_in, sheet_name="Sheet1") s1 = t1["内容"] p1 = t1["预测标签"] y1 = t1["正确标签"] sentence = [] label = [] predict = [] for s, l, pre in zip(s1, y1, p1): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") p = pre.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") sentence.append(s) label.append(l) predict.append(p) dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label}) dataframe.to_csv(path_out, index=False, sep='\t', encoding="utf8") dataframe.to_excel('new_data/record_1.xlsx', sheet_name='data', index=False, encoding="utf8") def luyin_2(self): path_in = "录音跟听_2.xlsx" path_out = "new_data/record_2.csv" t1 = pd.read_excel(path_in, sheet_name="Sheet1") s1 = t1["内容"] p1 = t1["预测标签"] y1 = t1["正确标签"] sentence = [] label = [] predict = [] for s, l, pre in zip(s1, y1, p1): if str(l) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") p = pre.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") sentence.append(s) label.append(l) predict.append(p) dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label}) dataframe.to_csv(path_out, index=False, sep='\t', encoding="utf8") dataframe.to_excel('new_data/record_2.xlsx', sheet_name='data', index=False, encoding="utf8") def luyin_3(self): path_in = "录音跟听_3.xlsx" path_out = "new_data/record_3.csv" t1 = pd.read_excel(path_in, sheet_name="Sheet1") s1 = t1["内容"] p1 = t1["预测标签"] y1 = t1["正确标签"] sentence = [] label = [] predict = [] for s, l, pre in zip(s1, y1, p1): if str(l) == "nan": continue if str(pre) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") p = pre.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") sentence.append(s) label.append(l) predict.append(p) dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label}) dataframe.to_csv(path_out, index=False, sep='\t', encoding="utf8") dataframe.to_excel('new_data/record_3.xlsx', sheet_name='data', index=False, encoding="utf8") def liucheng(self): path_in = "流程跟听.xlsx" path_out = "new_data/procedure.csv" t1 = pd.read_excel(path_in, sheet_name="Sheet1") s1 = t1["语句"] p1 = t1["预测标签"] y1 = t1["语义"] sentence = [] label = [] predict = [] for s, l, pre in zip(s1, y1, p1): if str(l) == "nan": continue if str(pre) == "nan": continue else: s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") p = pre.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") sentence.append(s) label.append(l) predict.append(p) dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label}) dataframe.to_csv(path_out, index=False, sep='\t', encoding="utf8") dataframe.to_excel('new_data/procedure.xlsx', sheet_name='data',index=False, encoding="utf8") def main(self): self.read_path_1() self.read_path_2() dataframe = pd.DataFrame({'l': self.l, 's': self.s}) dataframe.to_csv("all_0607.csv", index=False, sep='\t', encoding="utf8") # def update_1(self): # path = "excel/3000_拒识语料.csv" # data = pd.read_csv(path, sep="\t") # sentence1 = data["sentence"].tolist() # label1 = data["label"].tolist() # # t2 = pd.read_excel("excel/语义优化_0608_1.xlsx", sheet_name="data") # sentence2 = t2["sentence"].tolist() # label2 = t2["label"].tolist() # # t3 = pd.read_excel("excel/语义优化_0608_2.xlsx", sheet_name="data") # sentence3 = t3["sentence"].tolist() # label3 = t3["label"].tolist() # # s = sentence1 + sentence2 + sentence3 # l = label1 + label2 + label3 # with open("all.txt", "a+", encoding="utf8") as f: # for _l,_s in zip(l, s): # _l = _l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") # _s = _s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") # line = _l + " " + _s # f.write(line + "\n") def no_semantic(self): t1 = pd.read_csv("new_data/3000_1.csv", sep="\t") t2 = pd.read_csv("new_data/3000_2.csv", sep="\t") t3 = pd.read_csv("new_data/3000_3.csv", sep="\t") df = pd.concat([t1, t2, t3], ignore_index=True) df.to_excel('excel/nosemantic.xlsx', sheet_name='data',index=False, encoding="utf8") if __name__ == '__main__': Process().no_semantic()
分类:
pandas处理数据
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧