多元线性回归

#DATASET: https://archive.ics.uci.edu/ml/datasets/Computer+Hardware

import
numpy as np import matplotlib.pyplot as plt import pandas as pd import sklearn import sklearn.preprocessing as pre
df=pd.read_csv('data\machinei\machine.data')
df.head()
 vendornameMYCTMMINMMAXCACHCHMINCHMAXPRPERP
0 adviser 32/60 125 256 6000 256 16 128 198 199
1 amdahl 470v/7 29 8000 32000 32 8 32 269 253
2 amdahl 470v/7a 29 8000 32000 32 8 32 220 253
3 amdahl 470v/7b 29 8000 32000 32 8 32 172 253
4 amdahl 470v/7c 29 8000 16000 32 8 16 132 132
#convert string to num
#X includes the first 8 attributes, Y is the target prediction

x1=pre.LabelEncoder().fit_transform(df.iloc[:,0])
x2=pre.LabelEncoder().fit_transform(df.iloc[:,1])
X=np.zeros((df.iloc[:,:-2].shape[0],8))
X[:,0]=x1
X[:,1]=x2
X[:,2:]=df.iloc[:,2:-2]
Y=df.iloc[:,-2] #goal
#convert string to num
#X includes the first 8 attributes, Y is the target prediction

x1=pre.LabelEncoder().fit_transform(df.iloc[:,0])
x2=pre.LabelEncoder().fit_transform(df.iloc[:,1])
X=np.zeros((df.iloc[:,:-2].shape[0],8))
X[:,0]=x1
X[:,1]=x2
X[:,2:]=df.iloc[:,2:-2]
Y=df.iloc[:,-2] #goal
#compute loss, train parameters

count=0
while True:
    count+=1
    predict_y=np.dot(X,W.T)+b
    rloss=np.dot((Y-predict_y).T,Y-predict_y)/(2*m)
    w_gradient=np.dot((Y-predict_y).T,X)*(-1.0/m)
    b_gradient=np.dot((Y-predict_y).T,np.ones(shape=[m,]))*(-1.0/m)
    #gradient descent
    W-=w_gradient*alpha
    b-=b_gradient*alpha
    if count%10000==0:
        print(rloss)
    if rloss<2500:
        print('MSEloss={}'.format(rloss))
        break
MSEloss=2456.133718645051
W
array([-0.02520069, -0.17961688, -0.00338619,  0.01356295,  0.00726707,
        0.16543731,  0.01494539,  0.13264917])
posted @ 2019-09-18 14:49  runsdeep  阅读(369)  评论(0编辑  收藏  举报