导航

sklearn不同数量的训练集在测试集上的表现的曲线刻画

Posted on 2018-10-31 21:42  wzd321  阅读(470)  评论(0编辑  收藏  举报
def plot_learning_curve(estimator,X,y,cv=5,train_sizes=[0.1,0.3,0.5,0.7,0.8,0.9]):
    """
    描述:对于不同数量的训练样本的估计器的验证和训练评分
    param estimator:object|
    param X:shape=[n_samples,n_feature]
    param y:shape=[n_samples,]
    param cv:int
    param train_size:list of float
    """
    import matplotlib.pyplot as plt
   from sklearn.model_selection import learning_curve plt.figure() plt.title(
"learning curves") plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve(estimator=estimator, X=X, y=y, cv=cv, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores,axis=1) train_scores_std = np.std(train_scores,axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, y1=train_scores_mean-train_scores_std, y2=train_scores_mean+train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, y1=test_scores_mean-test_scores_std, y2=test_scores_mean+test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes,train_scores_mean,"o-",color="r",label="training score") plt.plot(train_sizes, test_scores_mean,'o-',color="g",label="testing score") plt.legend(loc="best") plt.show() plot_learning_curve(estimator=SVC(),X=X,y=y,cv=5,train_sizes=[0.1,0.3,0.5,0.7,0.8,0.9])