模型评估-留出法

1.留出法

2.python代码演示

点击查看代码
from sklearn import datasets  # 自带数据集
from sklearn.model_selection import train_test_split  # 数据集划分
from sklearn.preprocessing import StandardScaler  # 标准化
import numpy as np
import matplotlib.pyplot as plt

iris = datasets.load_iris()
iris

X, target = iris.data, iris.target

# arg_model模型实例
def model_learn(X, target, arg_model):
    # 模型的训练,数据集划分,标准化
    X = StandardScaler().fit_transform(X)  # 标准化
    random_samples = []  # 存储每次随机划分各类别的样本数
    train_precision = []  # 训练样本精度
    test_precision = []  # 测试样本精度
    for i in range(100):
        # 100次随机划分
        # shuffle 打散
        # stratify:分层采样
        # 数据集划分,20%测试即,80%训练集,采用分层采样
        X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2,
                                                            random_state=i, shuffle=True, stratify=target)
        random_samples.append([len(y_test[y_test == 0]), len(
            y_test[y_test == 1]), len(y_test[y_test == 2])])
        model = arg_model()  # 创建模型实例
        model.fit(X_train, y_train)  # 训练样本
        y_train_pred = model.predict(X_train)  # 训练样本预测,经验误差
        y_test_pred = model.predict(X_test)  # 样本预测 泛化误差

        train_correct = 0
        for j, y in enumerate(y_train_pred):
            if y == y_train[j]:
                train_correct += 1
        train_precision.append(train_correct/len(y_train))

        test_correct = 0
        for j, y in enumerate(y_test_pred):
            if y == y_test[j]:
                test_correct += 1
        test_precision.append(test_correct/len(y_test))
    # print(random_samples) # 因为分层采样 stratify=target ,每个类别都是10个
    print("100次随机划分+分层采样,训练精度是:%.5f" % (np.mean(train_precision)))
    print("100次随机划分+分层采样,测试精度是:%.5f" % (np.mean(test_precision)))
    return train_precision, test_precision

def plt_precision(train_precision, test_precision):
    # 绘制100次训练样本精度图像和测试样本精度图像
    fig, ax = plt.subplots(1, 2, figsize=(12, 4))
    ax[0].plot(train_precision, ".-")
    ax[0].set_title("mean of train_precision ={:.5f}".format(
        np.mean(train_precision)))
    ax[0].set_xlabel("random split number")
    ax[0].set_ylabel("train_precision")
    ax[0].grid()

    ax[1].plot(test_precision, "r.-")
    ax[1].set_title("mean of test_precision ={:.5f}".format(
        np.mean(test_precision)))
    ax[1].set_xlabel("random split number")
    ax[1].set_ylabel("test_precision")
    ax[1].grid()

用逻辑回归拟合

点击查看代码
from sklearn.linear_model import LogisticRegression
train_precision,test_precision = model_learn(X, target, LogisticRegression)
plt_precision(train_precision, test_precision)

用决策树拟合

点击查看代码
from sklearn.tree import DecisionTreeClassifier
train_precision, test_precision = model_learn(
    X, target, DecisionTreeClassifier)
plt_precision(train_precision, test_precision)

KNN拟合

点击查看代码
from sklearn.neighbors import KNeighborsClassifier
train_precision, test_precision = model_learn(
    X, target, KNeighborsClassifier)
plt_precision(train_precision, test_precision)

SVM拟合

点击查看代码
from sklearn.svm import SVC
train_precision, test_precision = model_learn(X, target, SVC)
plt_precision(train_precision, test_precision)
posted @ 2022-01-12 23:21  筷点雪糕侠  阅读(231)  评论(0编辑  收藏  举报