学习曲线!初探索
import numpy as np import matplotlib.pyplot as plt n_dots=200 X=np.linspace(0,1,n_dots) X=X.reshape(-1,2) y=np.sqrt(X[:,0]+X[:,1])+0.2*np.random.rand(100)-0.1 #构建多项式模型 from sklearn.pipeline import Pipeline#流水线,可包含多个数据处理模型 from sklearn.preprocessing import PolynomialFeatures#数据处理,通过改变数据将非线性转换成线性 from sklearn.linear_model import LinearRegression def polynomial_model(degree=1): polynomial_features=PolynomialFeatures(degree=degree,include_bias=False) #include_bias是否添加x0=1这一列 linear_regression=LinearRegression() #这是一个流水线,先增加多项式的阶数【将高次的非线性转成线性】,然后在用线性回归来拟合 pipeline=Pipeline([ ("polynomial_features",polynomial_features), ("linear_regression",linear_regression) ]) return pipeline from sklearn.model_selection import learning_curve from sklearn.model_selection import ShuffleSplit #必须定义交叉验证cv,否则test_scores值一个比一个大 ,学习曲线这里好像用的都是ShuffleSplit n_splits=10计算10 次交叉验证数据集的分数 train_sizes,train_scores,test_scores=learning_curve(polynomial_model(degree=2),X,y,cv=ShuffleSplit(n_splits=10, test_size=0.2, random_state=0),n_jobs=1,train_sizes=np.linspace(.1,1.0,5)) print(train_sizes) print(np.mean(train_scores,axis=1)) print(np.mean(test_scores,axis=1))