python--线性回归
首先先安装要用到的包:sklearn,顾名思义机器学习包
import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn import datasets, linear_model data = pd.read_csv('C://Users//leon//Desktop//CCPP.csv') #导入数据 data.head() data.shape X = data[['AT', 'V', 'AP', 'RH']] #用AT, V,AP和RH这4个列作为样本特征 y = data[['PE']] from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) print (X_train.shape) print (y_train.shape) print (X_test.shape) print (y_test.shape) #训练 from sklearn.linear_model import LinearRegression linreg = LinearRegression() #建立模型 linreg.fit(X_train, y_train) print (linreg.intercept_) #输出模型常量 print (linreg.coef_) #自变量系数 y_pred = linreg.predict(X_test) from sklearn import metrics print ("MSE:",metrics.mean_squared_error(y_test, y_pred)) # 通过MSE值进行模型检验 from sklearn.model_selection import cross_val_predict predicted = cross_val_predict(linreg, X, y, cv=10) fig, ax = plt.subplots() ax.scatter(y, predicted) ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax.set_xlabel('Measured') ax.set_ylabel('Predicted') plt.show() #作图观察
通过训练数据集进行预测