简单机器学习算法——线性回归 包含(最小二乘法、岭回归、lasso 、ElasticNet算法)另有多个数据集(forge、wave、肿瘤、波士顿)
因为注释已经很详细了,所以直接上代码:
1 # -- coding: gbk -- 2 import mglearn 3 from pylab import * 4 from sklearn.model_selection import train_test_split 5 mpl.rcParams['font.sans-serif'] = ['SimHei'] 6 from sklearn.datasets import load_breast_cancer 7 from sklearn.datasets import load_boston 8 from sklearn.linear_model import LinearRegression 9 import sklearn 10 from sklearn.linear_model import Ridge 11 from sklearn.linear_model import Lasso 12 from sklearn.linear_model import ElasticNet 13 def forge数据集(): 14 print("A") 15 X,y=mglearn.datasets.make_forge() 16 print(X.shape) 17 print(y.shape) 18 mglearn.discrete_scatter(X[:, 0], X[:, 1], y) 19 plt.legend(["Class 0", "Class 1"], loc=4) 20 plt.xlabel("第一特征") 21 plt.ylabel("第二特征") 22 #plt.show() 23 24 def wave数据集(): 25 X,y=mglearn.datasets.make_wave(n_samples=40) 26 plt.plot(X, y, 'o') 27 plt.ylim(-3, 3) 28 plt.xlabel("Feature") 29 plt.ylabel("Target") 30 plt.show() 31 32 def 肿瘤数据集(): 33 cancer=load_breast_cancer() 34 print(cancer.keys()) 35 print("AAAA") 36 print(np.bincount(cancer.target)) 37 print("Sample counts per class:\n{}".format( 38 {n: v for n, v in zip(cancer.target_names, np.bincount(cancer.target))})) 39 40 def 波士顿(): 41 boston = load_boston() 42 print("Data shape: {}".format(boston.data.shape)) 43 44 def 线性回归_最小二乘法(): 45 X, y = mglearn.datasets.load_extended_boston() 46 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 47 48 '''最小二乘法''' 49 lr = LinearRegression().fit(X_train, y_train) 50 print("斜率", lr.coef_) 51 print("截距", lr.intercept_) 52 53 '''评测数据''' 54 print("Training set score: {:.2f}".format(lr.score(X_train, y_train))) 55 print("Test set score: {:.2f}".format(lr.score(X_test, y_test))) 56 57 def 岭回归Ridge(): 58 X, y = mglearn.datasets.load_extended_boston() 59 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 60 ridge = Ridge(alpha=10).fit(X_train, y_train) 61 '''岭回归''' 62 print("斜率", ridge.coef_) 63 print("截距", ridge.intercept_) 64 65 '''评测''' 66 print("Training set score: {:.2f}".format(ridge.score(X_train, y_train))) 67 print("Test set score: {:.2f}".format(ridge.score(X_test, y_test))) 68 69 def lasso(): 70 X, y = mglearn.datasets.load_extended_boston() 71 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 72 lasso = Lasso().fit(X_train, y_train) 73 '''lasso''' 74 print("斜率", lasso.coef_) 75 print("截距", lasso.intercept_) 76 77 '''评测''' 78 print("Training set score: {:.2f}".format(lasso.score(X_train, y_train))) 79 print("Test set score: {:.2f}".format(lasso.score(X_test, y_test))) 80 print("Number of features used: {}".format(np.sum(lasso.coef_ != 0))) 81 82 lasso001 = Lasso(alpha=0.01, max_iter=100000).fit(X_train, y_train) 83 print("Training set score: {:.2f}".format(lasso001.score(X_train, y_train))) 84 print("Test set score: {:.2f}".format(lasso001.score(X_test, y_test))) 85 print("Number of features used: {}".format(np.sum(lasso001.coef_ != 0))) 86 87 def ElasticNet_L1正则化_L2正则化(): 88 X, y = mglearn.datasets.load_extended_boston() 89 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 90 lasso = ElasticNet().fit(X_train, y_train) 91 '''lasso''' 92 print("斜率", lasso.coef_) 93 print("截距", lasso.intercept_) 94 if __name__ =='__main__': 95 lasso()