简介

使用多个 线性回归参数对模型进行预测。

code

# load the data
import pandas as pd
import numpy as np
data_train = pd.read_csv('T-R-train.csv')
data_train #数据预览

#define X_train and y_train
X_train = data_train.loc[:,'T']
y_train = data_train.loc[:,'rate']

#可视化数据
%matplotlib inline
from matplotlib import pyplot as plt
fig1 = plt.figure(figsize=(5,5))
plt.scatter(X_train,y_train)
plt.title('raw data')
plt.xlabel('temperature')
plt.ylabel('rate')
plt.show()

#将X_train转换为一维数组(若不转换会因为维度问题而无法建立下面的线性回归模型)
X_train = np.array(X_train).reshape(-1,1)

#建立线性回归模型并对该模型进行预测
from sklearn.linear_model import LinearRegression
lr1 = LinearRegression()
lr1.fit(X_train,y_train)

#加载测试数据
data_test = pd.read_csv('T-R-test.csv')
X_test = data_test.loc[:,'T']
y_test = data_test.loc[:,'rate']
data_test

#这里测试数据也要转换成一维数组
X_test = np.array(X_test).reshape(-1,1)


#make prediction on the training and testing data
y_train_predict = lr1.predict(X_train)
y_test_predict = lr1.predict(X_test)
from sklearn.metrics import r2_score
r2_train = r2_score(y_train,y_train_predict)
r2_test = r2_score(y_test,y_test_predict)
print('training r2:',r2_train)
print('test r2:',r2_test)

#生成新数据
X_range = np.linspace(40,90,300).reshape(-1,1)#新数据X的范围是40-90,然后共有300个点
y_range_predict = lr1.predict(X_range)

#可视化数据
fig2 = plt.figure(figsize=(10,10))
plt.plot(X_range,y_range_predict)
plt.scatter(X_train,y_train)
plt.title('prediction data')
plt.xlabel('temperature')
plt.ylabel('rate')
plt.show()

#多项式模型
#加入多项式特征
from sklearn.preprocessing import PolynomialFeatures
poly2 = PolynomialFeatures(degree=2)#这里degree=2代表的是2次,相应的degree=3代表的就是3次,以此类推
X_2_train = poly2.fit_transform(X_train) #将原来的数据进行转换
X_2_test = poly2.fit_transform(X_test)
poly5 = PolynomialFeatures(degree=5)
X_5_train = poly5.fit_transform(X_train)
X_5_test = poly5.fit_transform(X_test)
print(X_2_train.shape)
print(X_5_train.shape)

#训练以及评估模型
lr2 = LinearRegression()
lr2.fit(X_2_train,y_train)
y_2_train_predict = lr2.predict(X_2_train)
y_2_test_predict = lr2.predict(X_2_test)
r2_2_train = r2_score(y_train,y_2_train_predict)
r2_2_test = r2_score(y_test,y_2_test_predict)


lr5 = LinearRegression()
lr5.fit(X_5_train,y_train)
y_5_train_predict = lr5.predict(X_5_train)
y_5_test_predict = lr5.predict(X_5_test)
r2_5_train = r2_score(y_test,y_5_test_predict)
r2_5_test = r2_score(y_test,y_5_test_predict)

print('training r2_2:',r2_2_train)
print('test r2_2:',r2_2_test)
print('training r2_5:',r2_5_train)
print('test r2_5:',r2_5_test)

#生成新数据
X_2_range = np.linspace(40,90,300).reshape(-1,1)
X_2_range = poly2.transform(X_2_range)
y_2_range_predict = lr2.predict(X_2_range)

X_5_range = np.linspace(40,90,300).reshape(-1,1)
X_5_range = poly5.transform(X_5_range)
y_5_range_predict = lr5.predict(X_5_range)

#可视化数据
fig3 = plt.figure(figsize=(10,10))
plt.plot(X_range,y_2_range_predict)#这里画图用X_range是因为X_2_range和X_5_range的维度过高,无法将图形展示出来。
plt.scatter(X_train,y_train)
plt.scatter(X_test,y_test)

plt.title('polynomial prediction result (2)')
plt.xlabel('temperature')
plt.ylabel('rate')
plt.show()

fig4 = plt.figure(figsize=(10,10))
plt.plot(X_range,y_5_range_predict)
plt.scatter(X_train,y_train)
plt.scatter(X_test,y_test)

plt.title('polynomial prediction result (5)')
plt.xlabel('temperature')
plt.ylabel('rate')
plt.show()

TIP

reshape(-1,1)
-1 表示通配符,多少行计算机计算得出,但是得1列。

image

posted on 2022-04-25 13:25  HDU李少帅  阅读(334)  评论(0编辑  收藏  举报