sklearn数据集的导入及划分
鸢尾花数据集的导入及查看:
①鸢尾花数据集的导入:
from sklearn.datasets import load_iris
②查看鸢尾花数据集:
iris=load_iris()
print("鸢尾花数据集:\n",iris)
print("查看数据集描述:\n", iris.DESCR)
print("查看特征值的名字:\n",iris.feature_names)
print("查看特征数据:\n",iris.data,iris.data.shape)
print("查看目标值名字:\n",iris.target_names)
print("查看目标数据:\n",iris.target)
划分数据集:
①导入train_test_split包:
from sklearn.model_selection import train_test_split
②划分数据集:数据集划分为训练集和测试集
x_train,x_test,y_train,y_test=train_test_split(iris.data,iris.target,test_size=0.2)
注:iris.data为数据集的特征值,iris.target为数据集的目标值,test_size为测试值的划分比例(可省,默认为0.25),
x_train:训练集的特征值
x_test:测试集的特征值
y_train:训练集的目标值
y_test:测试集的特征值
完整代码:
from sklearn.datasets import load_iris #导入数据集 from sklearn.model_selection import train_test_split def datatest(): # 获取数据集 iris=load_iris() print("鸢尾花数据集:\n",iris) print("查看数据集描述:\n", iris.DESCR) print("查看特征值的名字:\n",iris.feature_names) print("查看特征数据:\n",iris.data,iris.data.shape) print("查看目标值名字:\n",iris.target_names) print("查看目标数据:\n",iris.target) # 划分数据集 x_train,x_test,y_train,y_test=train_test_split(iris.data,iris.target,test_size=0.2) print(x_train,x_train.shape) print(x_test,x_test.shape) print(y_train,y_train.shape) print(y_test,y_test.shape) if __name__ == '__main__': datatest()