pandas(三):pandas读取excel
一、代码如下
import pandas as pd class ProcessData(object): def __init__(self): #self.path = "../data/new_data/随机抽听_1.xls" #self.path = "../data/new_data/无意义核对语料.xlsx" self.path = "../data/new_data/0520新增语义.xlsx" def write_suiji(self): readbook = pd.read_excel(self.path, sheet_name="sheet1") s1 = readbook["语句"] y = readbook["标注大类"] with open("../data/train.txt", "a+", encoding="utf8") as f: for s,l in zip(s1, y): s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") line = l + " " + s + "\n" #f.write(l + " " + s + "\n") def get_nomeans(self): readbook = pd.read_excel(self.path, sheet_name="Sheet1") s1 = readbook["客户语句"] y = readbook["语义小类"] with open("../data/train.txt", "a+", encoding="utf8") as f: for s,l in zip(s1, y): s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") line = l + " " + s + "\n" f.write(l + " " + s + "\n") def get_wenti(self): readbook = pd.read_excel(self.path, sheet_name="Sheet1") s1 = readbook["客户话术"] y = readbook["语义"] with open("../data/train.txt", "a+", encoding="utf8") as f: for s,l in zip(s1, y): s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") f.write(l + " " + s + "\n") def get_0520(self): readbook = pd.read_excel(self.path, sheet_name="Sheet1") s1 = readbook["客户语句"] y = readbook["语义"] with open("../data/train.txt", "a+", encoding="utf8") as f: for s,l in zip(s1, y): s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "") f.write(l + " " + s + "\n") def get_fangshi(self): dataLine = [] with open("../data/new_data/还款方式语料.txt", "r", encoding="utf8") as f: for line in f.readlines(): sentence = line.replace(" ", "").replace("\r", "").replace("\t", "").replace("\n", "") dataLine.append(sentence) with open("../data/train.txt", "a+", encoding="utf8") as f: for data in dataLine: s = data l = "支付宝微信号是多少" f.write(l + " " + s + "\n") if __name__ == '__main__': ProcessData().get_fangshi()