学习曲线!初探索

import numpy as np
import matplotlib.pyplot as plt
n_dots=200

X=np.linspace(0,1,n_dots)
X=X.reshape(-1,2)
y=np.sqrt(X[:,0]+X[:,1])+0.2*np.random.rand(100)-0.1

#构建多项式模型
from sklearn.pipeline import Pipeline#流水线,可包含多个数据处理模型
from sklearn.preprocessing import  PolynomialFeatures#数据处理,通过改变数据将非线性转换成线性
from sklearn.linear_model import LinearRegression

def polynomial_model(degree=1):
    polynomial_features=PolynomialFeatures(degree=degree,include_bias=False) #include_bias是否添加x0=1这一列

    linear_regression=LinearRegression()
    #这是一个流水线,先增加多项式的阶数【将高次的非线性转成线性】,然后在用线性回归来拟合
    pipeline=Pipeline([
        ("polynomial_features",polynomial_features),
        ("linear_regression",linear_regression)
    ])
    return pipeline

from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit

#必须定义交叉验证cv,否则test_scores值一个比一个大 ,学习曲线这里好像用的都是ShuffleSplit   n_splits=10计算10 次交叉验证数据集的分数
train_sizes,train_scores,test_scores=learning_curve(polynomial_model(degree=2),X,y,cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0),n_jobs=1,train_sizes=np.linspace(.1,1.0,5))
print(train_sizes)
print(np.mean(train_scores,axis=1))
print(np.mean(test_scores,axis=1))

 

posted @ 2019-06-22 22:59  程序杰杰  阅读(197)  评论(0编辑  收藏  举报