#DATASET: https://archive.ics.uci.edu/ml/datasets/Computer+Hardware
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import sklearn.preprocessing as pre
df=pd.read_csv('data\machinei\machine.data')
df.head()
| vendor | name | MYCT | MMIN | MMAX | CACH | CHMIN | CHMAX | PRP | ERP |
0 |
adviser |
32/60 |
125 |
256 |
6000 |
256 |
16 |
128 |
198 |
199 |
1 |
amdahl |
470v/7 |
29 |
8000 |
32000 |
32 |
8 |
32 |
269 |
253 |
2 |
amdahl |
470v/7a |
29 |
8000 |
32000 |
32 |
8 |
32 |
220 |
253 |
3 |
amdahl |
470v/7b |
29 |
8000 |
32000 |
32 |
8 |
32 |
172 |
253 |
4 |
amdahl |
470v/7c |
29 |
8000 |
16000 |
32 |
8 |
16 |
132 |
132 |
#convert string to num
#X includes the first 8 attributes, Y is the target prediction
x1=pre.LabelEncoder().fit_transform(df.iloc[:,0])
x2=pre.LabelEncoder().fit_transform(df.iloc[:,1])
X=np.zeros((df.iloc[:,:-2].shape[0],8))
X[:,0]=x1
X[:,1]=x2
X[:,2:]=df.iloc[:,2:-2]
Y=df.iloc[:,-2] #goal
#convert string to num
#X includes the first 8 attributes, Y is the target prediction
x1=pre.LabelEncoder().fit_transform(df.iloc[:,0])
x2=pre.LabelEncoder().fit_transform(df.iloc[:,1])
X=np.zeros((df.iloc[:,:-2].shape[0],8))
X[:,0]=x1
X[:,1]=x2
X[:,2:]=df.iloc[:,2:-2]
Y=df.iloc[:,-2] #goal
#compute loss, train parameters
count=0
while True:
count+=1
predict_y=np.dot(X,W.T)+b
rloss=np.dot((Y-predict_y).T,Y-predict_y)/(2*m)
w_gradient=np.dot((Y-predict_y).T,X)*(-1.0/m)
b_gradient=np.dot((Y-predict_y).T,np.ones(shape=[m,]))*(-1.0/m)
#gradient descent
W-=w_gradient*alpha
b-=b_gradient*alpha
if count%10000==0:
print(rloss)
if rloss<2500:
print('MSEloss={}'.format(rloss))
break
MSEloss=2456.133718645051
array([-0.02520069, -0.17961688, -0.00338619, 0.01356295, 0.00726707,
0.16543731, 0.01494539, 0.13264917])