前言:回归任务是监督式机器学习中的最主要的一种类别,与分类不同的是,其目标一般为连续型数值。在sklearn中提供了大量的用于回归的算法。
本文汇总了常见的9种回归算法,方便快速查询使用。(本文使用糖尿病检测数据集,使用MSE作为回归评价指标,以下为MSE得分)
1.线性回归:3424
2.岭回归:3379
3.套索回归:3787
4.弹性网络:4666
5.支持向量机(多项式核):4267
6.K近邻:4243
7.决策树:7762
8.随机森林:3925
9.梯度提升树:4112
1 import numpy as np 2 import pandas as pd 3 import matplotlib as mpl 4 import matplotlib.pyplot as plt 5 import sklearn 6 from sklearn import datasets 7 8 from sklearn.model_selection import train_test_split 9 from sklearn.model_selection import cross_val_score 10 from sklearn.metrics import explained_variance_score 11 from sklearn.metrics import mean_squared_error 12 13 from sklearn.linear_model import LinearRegression 14 from sklearn.linear_model import Ridge 15 from sklearn.linear_model import Lasso 16 from sklearn.linear_model import ElasticNet 17 18 from sklearn.svm import SVR 19 from sklearn.neighbors import KNeighborsRegressor 20 from sklearn.tree import DecisionTreeRegressor 21 from sklearn.ensemble import RandomForestRegressor 22 from sklearn.ensemble import GradientBoostingRegressor 23 24 25 diabetes = sklearn.datasets.load_diabetes() 26 x,y = diabetes.data,diabetes.target 27 x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0) 28 res = [] 29 30 #1. 线性回归 31 regr = LinearRegression() 32 regr.fit(x_train, y_train) 33 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 34 print(-cross_score) 35 y_predict = regr.predict(x_test) 36 score = mean_squared_error(y_test,y_predict) 37 res.append(score) 38 39 #2. 岭回归 40 regr = Ridge() 41 regr.fit(x_train, y_train) 42 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 43 print(-cross_score) 44 y_predict = regr.predict(x_test) 45 score = mean_squared_error(y_test,y_predict) 46 res.append(score) 47 48 #3. 套索回归 49 regr = Lasso() 50 regr.fit(x_train, y_train) 51 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 52 print(-cross_score) 53 y_predict = regr.predict(x_test) 54 score = mean_squared_error(y_test,y_predict) 55 res.append(score) 56 57 #4. 弹性网络 58 regr = ElasticNet(alpha=0.1,l1_ratio=0.5) 59 regr.fit(x_train, y_train) 60 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 61 print(-cross_score) 62 y_predict = regr.predict(x_test) 63 score = mean_squared_error(y_test,y_predict) 64 res.append(score) 65 66 #5. SVR 67 regr = SVR(gamma='scale',kernel="poly")#linear rbf 68 regr.fit(x_train, y_train) 69 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 70 print(-cross_score) 71 y_predict = regr.predict(x_test) 72 score = mean_squared_error(y_test,y_predict) 73 res.append(score) 74 75 #6. K近邻回归 76 regr = KNeighborsRegressor(weights="uniform") 77 regr.fit(x_train, y_train) 78 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 79 print(-cross_score) 80 y_predict = regr.predict(x_test) 81 score = mean_squared_error(y_test,y_predict) 82 res.append(score) 83 84 #7. 决策树回归 85 regr =DecisionTreeRegressor() 86 regr.fit(x_train, y_train) 87 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 88 print(-cross_score) 89 y_predict = regr.predict(x_test) 90 score = mean_squared_error(y_test,y_predict) 91 res.append(score) 92 93 #8. 随机森林回归 94 regr = RandomForestRegressor(n_estimators=100) 95 regr.fit(x_train, y_train) 96 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 97 print(-cross_score) 98 y_predict = regr.predict(x_test) 99 score = mean_squared_error(y_test,y_predict) 100 res.append(score) 101 102 #9. 梯度提升树回归 103 regr = GradientBoostingRegressor() 104 regr.fit(x_train, y_train) 105 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error") 106 print(-cross_score) 107 y_predict = regr.predict(x_test) 108 score = mean_squared_error(y_test,y_predict) 109 res.append(score) 110 111 #10. 得分比较 112 print(res)