Python3 多元回归(包含属性的向量化)
1 # -*- coding: utf-8 -*- 2 """ 3 Created on Thu Jan 4 19:52:03 2018 4 5 @author: markli 6 """ 7 import numpy as np; 8 import pandas as pd; 9 from sklearn import linear_model; 10 from sklearn.externals import joblib; 11 import sys; 12 13 sys.path.append('../'); 14 path = 'DeliverIncludeClass.csv'; 15 #names=['Miles','NumberDeliveries','TravelTime']; 16 reader = pd.read_csv(path,header=0,names=['Number','Miles','NumberDeliveries','Car','TravelTime']); 17 18 #print(reader); 19 X = np.array(reader[['Miles','NumberDeliveries']]); 20 Y = np.array(reader['TravelTime']); 21 Car = np.array(reader['Car']); #将为类别属性的数据列单独读出来 22 #print(X); 23 #print(Y); 24 car_feature = list(set(Car)); 25 VectoreCar = np.zeros((len(Car),len(car_feature))); 26 #本例中Car属性为类别属性,将其向量化 27 for i in range(len(Car)): 28 VectoreCar[i][Car[i]]=1; 29 30 print(VectoreCar); 31 print(Car); 32 33 #将向量化的类别属性值与其他数据合并 34 X = np.hstack((X,VectoreCar)); 35 print(X); 36 37 38 reg = linear_model.LinearRegression(); 39 reg.fit(X,Y); 40 joblib.dump(reg,'IncludeClassAttrRegression.pkl'); 41 print("系数 %s " %(reg.coef_)); 42 print("常系数 %s" %(reg.intercept_) ); 43 # 44 x_test = [[80,5,0,1,0]]; 45 y_predict = reg.predict(x_test); 46 print(y_predict);
DeliverIncludeClass.csv 文件格式
![](https://images2017.cnblogs.com/blog/735245/201801/735245-20180117192153803-1169580980.png)