pandas 分组 抽样
import pandas as pd data = pd.read_csv("data.csv") grouped_data = data.groupby("Group") sampled_data = grouped_data.apply(pd.DataFrame.sample, n=200, random_state=42) #拆分训练集和测试集 delete_index = [i[1] for i in sampled_data.index] train = data[~data.index.isin(delete_index)] train train[['MD','label']].sample(frac=1).to_csv('train.csv',index=False) test = sampled_data test[['MD']].sample(frac=1).to_csv('test.csv',index=False)