线性回归之电力预测

 1 import pandas as pd
 2 # pandas 读取数据
 3 data = pd.read_csv("C:/Users/Administrator/Desktop/data/ccpp.csv")
 4 data.head()
 5 
 6 X = data[["AT","V","AP","RH"]]
 7 print(X.shape)
 8 y = data[["PE"]]
 9 print (y.shape)
10 
11 """
12 sklearn.cross_validation是sklearn老版本的模块,新版本都迁移到了model_selection
13 """
14 from sklearn.model_selection import train_test_split
15 # 划分训练集和测试集
16 X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=1)
17 print (X_train.shape)
18 print (y_train.shape)
19 print (X_test.shape)
20 print (y_test.shape)
21 
22 
23 from sklearn.linear_model import LinearRegression
24 linreg = LinearRegression()
25 linreg.fit(X_train,y_train)
26 # 训练模型完毕,查看结果
27 print (linreg.intercept_)# 截距
28 print (linreg.coef_)  #系数
29 
30 
31 y_pred = linreg.predict(X_test)
32 from sklearn import metrics
33 import numpy as np
34 # 使用sklearn来计算mse和Rmse
35 print ("MSE:",metrics.mean_squared_error(y_test, y_pred))
36 print ("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
37 
38 
39 # 交叉验证
40 from sklearn.model_selection import cross_val_predict
41 predicted = cross_val_predict(linreg,X,y,cv=10)
42 print ("MSE:",metrics.mean_squared_error(y, predicted))
43 print ("RMSE:",np.sqrt(metrics.mean_squared_error(y, predicted)))
44 
45 
46 # 画图查看结果
47 import matplotlib.pyplot as plt
48 fig, ax = plt.subplots()
49 ax.scatter(y, predicted)
50 ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
51 ax.set_xlabel('Measured')
52 ax.set_ylabel('Predicted')
53 plt.show()

 

posted @ 2018-09-25 11:56  Parallax  阅读(130)  评论(0编辑  收藏  举报