随笔分类 - pandas处理数据
摘要:一、代码 def clean(): train = pd.read_csv(os.path.join(raw, "train.txt"),sep="\t", header=None,names=["s", "l"]) train_s = train["s"].tolist() train_l = t
阅读全文
摘要:一、代码如下 import pandas as pd import numpy as np class Process(): def __init__(self): self.path_1 = "3000条无意义.xlsx" self.path_2 = "录音跟听0526.xlsx" self.s
阅读全文
摘要:一、pandas对CSV合并多个文件,分割训练集测试集 def all_data(self): df1 = pd.read_csv("data/POI/negtive.csv") df2 = pd.read_csv("data/POI/positive.csv") df = pd.concat([d
阅读全文
摘要:def update_1(self): path = "excel/3000_拒识语料.csv" data = pd.read_csv(path, sep="\t") sentence1 = data["sentence"].tolist() label1 = data["label"].tolis
阅读全文
摘要:一、代码如下 import pandas as pd class ProcessData(object): def __init__(self): #self.path = "../data/new_data/随机抽听_1.xls" #self.path = "../data/new_data/无意
阅读全文
摘要:一、代码 # -*- coding: UTF-8 -*- import json import pandas as pd """获得所有的文本""" def get_all_text(): file_path = "../datas/format/primary.json" names = [] r
阅读全文
摘要:一、选取列 import pandas as pd df = pd.read_csv('zhihutest.csv', sep="\t") # 类别特征(16) fixlen_category_columns = ['m_sex', 'm_access_frequencies', 'm_twoA',
阅读全文