机器学习实战源码-----用线性回归找到最佳拟合曲线
1 #_*_coding:utf-8_*_ 2 import numpy as np 3 import matplotlib.pyplot as plt 4 5 def loadDataSet(fileName): 6 #numFeat表示特征的数量 7 numFeat = len(open(fileName).readline().split("\t")) - 1 8 dataMat = [];labelMat = [] 9 fr = open(fileName) 10 for line in fr.readlines(): 11 lineArr = [] 12 curLine = line.strip().split("\t") 13 for i in range(numFeat): 14 lineArr.append(float(curLine[i])) 15 dataMat.append(lineArr) 16 labelMat.append(float(curLine[-1])) 17 return dataMat,labelMat 18 19 def standRegres(xArr,yArr): 20 xMat = np.mat(xArr);yMat = np.mat(yArr).T 21 xTx = xMat.T * xMat 22 if np.linalg.det(xTx) == 0.0: 23 print "This matrix is singular, cannot do inverse" 24 return 25 ws = xTx.I * (xMat.T * yMat) 26 return ws 27 28 def lwlr(testPoint,xArr,yArr,k = 1.0): 29 xMat = np.mat(xArr);yMat = np.mat(yArr).T 30 m = np.shape(xMat)[0] 31 weights = np.mat(np.eye(m)) 32 for j in range(m): 33 diffMat = testPoint - xMat[j,:] 34 weights[j,j] = np.exp(diffMat * diffMat.T / (-2.0 * k ** 2)) 35 xTx = xMat.T * (weights * xMat) 36 if np.linalg.det(xTx) == 0.0: 37 print "This matrix is singular, cannot do inverse" 38 return 39 ws = xTx.I * (xMat.T * (weights * yMat)) 40 return testPoint * ws 41 42 def lwlrTest(testArr,xArr,yArr,k = 1.0): 43 m = np.shape(testArr)[0] 44 yHat = np.zeros(m) 45 for i in range(m): 46 yHat[i] = lwlr(testArr[i],xArr,yArr,k) 47 return yHat 48 49 if __name__ == "__main__": 50 xArr,yArr = loadDataSet("ex0.txt") 51 yHat = lwlrTest(xArr,xArr,yArr,0.01) 52 xMat = np.mat(xArr) 53 srtInd = xMat[:,1].argsort(0) 54 xSort = xMat[srtInd][:,0,:] 55 56 fig = plt.figure() 57 ax = fig.add_subplot(111) 58 ax.plot(xSort[:,1],yHat[srtInd]) 59 ax.scatter(xMat[:,1].flatten().A[0],np.mat(yArr).T.flatten().A[0],s=2,c="red") 60 plt.show()