02获取数据集并处理(iris)
获取数据-iris,划分训练集和测试集
from sklearn.datasets import load_iris # 1.获取数据集(iris) iris = load_iris() # print("iris数据集内容:", iris) # data,target,target_name print("训练数据集形状:", iris.data.shape) print("目标值形状:", iris.target.shape) print("目标值名称:", iris.target_names) # 2.数据集划分 from sklearn.model_selection import train_test_split # test_size,train_size,random_stat x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target,test_size=0.25) print("训练集x-y:", x_train.shape, y_train.shape) print("测试集x-y:", x_test.shape, y_test.shape)
运行结果:
训练数据集形状: (150, 4) 目标值形状: (150,) 目标值名称: ['setosa' 'versicolor' 'virginica'] 训练集x-y: (112, 4) (112,) 测试集x-y: (38, 4) (38,)