day07-线性回归(聚合,目标值为连续性)
# coding=utf-8
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression,SGDRegressor,Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
def linearreg():
"""
线性回归预测
:return:
"""
# 准备数据集
lb = load_boston()
# 分割数据集
x_train,x_test,y_train,y_test = train_test_split(lb.data,lb.target,test_size=0.25)
# 标准化处理,此处因为是使用的数学公式,因此目标值也需要进行标准化处理
std_x = StandardScaler()
x_train = std_x.fit_transform(x_train)
x_test = std_x.transform(x_test)
# 标准化需要二维的
std_y = StandardScaler()
y_train = std_y.fit_transform(y_train.reshape(-1,1))
y_test = std_y.transform(y_test.reshape(-1,1))
# 线性回归(正规方程)预测
ls = LinearRegression()
ls.fit(x_train,y_train)
print("回归预测的权重为:",ls.coef_)
# print("预测的测试集房价为:",std_y.inverse_transform(ls.predict(x_test)))
# print("实际的测试集房价为:",std_y.inverse_transform(y_test))
print("回归预测的均方误差为:",mean_squared_error(std_y.inverse_transform(y_test),
std_y.inverse_transform(ls.predict(x_test))))
# 线性回归(梯度下降)预测
# 适用于数据量多的情况
sgd = SGDRegressor()
sgd.fit(x_train, y_train)
print("梯度下降的权重为:", sgd.coef_)
print("梯度下降的均方误差为:", mean_squared_error(std_y.inverse_transform(y_test),
std_y.inverse_transform(sgd.predict(x_test))))
# 岭回归(正则化的线性回归,减少过拟合)预测
# 适用于异常点较多的情况
rd = Ridge()
rd.fit(x_train, y_train)
print("岭回归的权重为:", rd.coef_)
print("岭回归的均方误差为:", mean_squared_error(std_y.inverse_transform(y_test),
std_y.inverse_transform(rd.predict(x_test))))
return None
if __name__ == '__main__':
linearreg()
结果为:
回归预测的权重为: [[-0.07447075 0.12213742 0.06497228 0.07855073 -0.23006693 0.27476916
0.01880188 -0.34419685 0.27270287 -0.23041354 -0.19707132 0.09306829
-0.47021962]]
回归预测的均方误差为: 19.996451815645703
梯度下降的权重为: [-0.04762776 0.08442672 -0.00824119 0.08563068 -0.14440802 0.32100008
-0.00128022 -0.27437103 0.11195762 -0.0725458 -0.18378179 0.10069572
-0.45243339]
梯度下降的均方误差为: 19.70636577029558
岭回归的权重为: [[-0.07329564 0.12004478 0.06060757 0.0790331 -0.2255518 0.27623048
0.01783785 -0.34024779 0.26279879 -0.22049798 -0.1959955 0.09312339
-0.46810589]]
岭回归的均方误差为: 19.913075666852702
{{uploading-image-453133.png(uploading...)}}