pandas(三):pandas读取excel

一、代码如下

复制代码
import pandas as pd

class ProcessData(object):
    def __init__(self):
        #self.path = "../data/new_data/随机抽听_1.xls"
        #self.path = "../data/new_data/无意义核对语料.xlsx"
        self.path = "../data/new_data/0520新增语义.xlsx"
    def write_suiji(self):
        readbook = pd.read_excel(self.path, sheet_name="sheet1")
        s1 = readbook["语句"]
        y = readbook["标注大类"]
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for s,l in zip(s1, y):
                s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")
                l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")
                line = l + "  " + s + "\n"
                #f.write(l + "  " + s + "\n")
    def get_nomeans(self):
        readbook = pd.read_excel(self.path, sheet_name="Sheet1")
        s1 = readbook["客户语句"]
        y = readbook["语义小类"]
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for s,l in zip(s1, y):
                s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")
                l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")
                line = l + "  " + s + "\n"
                f.write(l + "  " + s + "\n")
    def get_wenti(self):
        readbook = pd.read_excel(self.path, sheet_name="Sheet1")
        s1 = readbook["客户话术"]
        y = readbook["语义"]
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for s,l in zip(s1, y):
                s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")
                l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")
                f.write(l + "  " + s + "\n")
    def get_0520(self):
        readbook = pd.read_excel(self.path, sheet_name="Sheet1")
        s1 = readbook["客户语句"]
        y = readbook["语义"]
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for s,l in zip(s1, y):
                s = s.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")
                l = l.replace(" ", "").replace("\r", "").replace("\n", "").replace("\t", "")
                f.write(l + "  " + s + "\n")

    def get_fangshi(self):
        dataLine = []
        with open("../data/new_data/还款方式语料.txt", "r", encoding="utf8") as f:
            for line in f.readlines():
                sentence = line.replace(" ", "").replace("\r", "").replace("\t", "").replace("\n", "")
                dataLine.append(sentence)
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for data in dataLine:
                s = data
                l = "支付宝微信号是多少"
                f.write(l + "  " + s + "\n")

if __name__ == '__main__':
    ProcessData().get_fangshi()
复制代码

 

posted @   jasonzhangxianrong  阅读(221)  评论(0编辑  收藏  举报
编辑推荐:
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
点击右上角即可分享
微信分享提示