sklearn实践_普通线性回归
import pandas as pd import numpy as np import matplotlib.pyplot as plt data = pd.read_csv(r"C:\Users\Oscar\Downloads\Advertising.csv") x = data[["TV","Radio","Newspaper"]] y=data["Sales"] plt.plot(data["TV"],y,"ro",Label="TV") plt.plot(data["Radio"],y,"g^",Label="Radio") plt.plot(data["Newspaper"],y,"bo",Label="Newspaper") plt.legend(loc="lower right") plt.grid() plt.show plt.figure(figsize=(10,10)) plt.subplot(311) plt.plot(data["TV"],y,"ro",Label="TV") plt.title("TV") plt.subplot(312)#plt.subplot(3,1,2) plt.plot(data["Newspaper"],y,"g^",Label="Newspaper") plt.title("Newspaper") plt.subplot(313) plt.plot(data["Radio"],y,"bo",Label="Radio") plt.title("Radio") #建模 feature_cols = ["TV","Radio","Newspaper"] X = data[feature_cols] y = data["Sales"] from sklearn.cross_validation import train_test_split X_train,X_test,y_train,y_test = train_test_split(X,y) from sklearn.linear_model import LinearRegression model = LinearRegression() model.fit(X_train,y_train) print(model) print(model.coef_) print(model.intercept_) y_rep = model.predict(X_test) #评估 from sklearn import metrics import numpy as np sum_mean = 0 for i in range(len(y_rep)): sum_mean+=(y_rep[i]-y_test.values[i])**2 print("RMSE:",np.sqrt(sum_mean/len(y_rep))) #作图 plt.figure() plt.plot(range(len(y_rep)),y_rep,"b",Label="pre") plt.plot(range(len(y_rep)),y_test,"r",Label="test") plt.legend(loc="upper right") plt.xlabel("the number of sales") plt.ylabel("values of sales")