Python3 多元回归(包含属性的向量化)

 1 # -*- coding: utf-8 -*-
 2 """
 3 Created on Thu Jan  4 19:52:03 2018
 4 
 5 @author: markli
 6 """
 7 import numpy as np;
 8 import pandas as pd;
 9 from sklearn import linear_model;
10 from sklearn.externals import joblib;
11 import sys;
12 
13 sys.path.append('../');
14 path = 'DeliverIncludeClass.csv';
15 #names=['Miles','NumberDeliveries','TravelTime'];
16 reader = pd.read_csv(path,header=0,names=['Number','Miles','NumberDeliveries','Car','TravelTime']);
17 
18 #print(reader);
19 X = np.array(reader[['Miles','NumberDeliveries']]);
20 Y = np.array(reader['TravelTime']);
21 Car = np.array(reader['Car']); #将为类别属性的数据列单独读出来
22 #print(X);
23 #print(Y);
24 car_feature = list(set(Car));
25 VectoreCar = np.zeros((len(Car),len(car_feature)));
26 #本例中Car属性为类别属性,将其向量化
27 for i in range(len(Car)):
28     VectoreCar[i][Car[i]]=1;
29 
30 print(VectoreCar);
31 print(Car);
32 
33 #将向量化的类别属性值与其他数据合并
34 X = np.hstack((X,VectoreCar));
35 print(X);
36 
37 
38 reg = linear_model.LinearRegression();
39 reg.fit(X,Y);
40 joblib.dump(reg,'IncludeClassAttrRegression.pkl');
41 print("系数 %s " %(reg.coef_));
42 print("常系数 %s" %(reg.intercept_) );
43 #
44 x_test = [[80,5,0,1,0]];
45 y_predict = reg.predict(x_test);
46 print(y_predict);
DeliverIncludeClass.csv 文件格式

 

posted on 2018-01-17 19:23  FightLi  阅读(671)  评论(0编辑  收藏  举报