机器学习实战-逻辑回归
逻辑回归:简单的来说,在线性回归的基础上加入了Sigmoid函数!
import numpy as np import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties #加载数据集 def loadData(filename): dataMat = [] labelMat = [] with open(filename) as f: for line in f.readlines(): line = line.strip().split() dataMat.append([1,float(line[0]),float(line[1])]) labelMat.append(int(line[2])) return dataMat,labelMat #绘制数据集 def plot(dataMat,labelMat): x0cord1 = [] x0cord2 = [] x1cord1 = [] x1cord2 = [] n = len(labelMat) for i in range(n): if labelMat[i] == 1: x0cord1.append(dataMat[i][1]) x0cord2.append(dataMat[i][2]) else: x1cord1.append(dataMat[i][1]) x1cord2.append(dataMat[i][2]) plt.scatter(x0cord1,x0cord2,c='red',s=20,alpha=0.5,marker='s') plt.scatter(x1cord1,x1cord2,c='green',s=20,alpha=0.5) plt.title('DataSet') plt.xlabel('x1') plt.ylabel('x2') plt.show() #随机梯度上升进行LR训练 def stogradAscent(dataMat,labelMat,num_iter=150): dataMat = np.array(dataMat) m,n = np.shape(dataMat) #矩阵有多少列 m=100 n=3 weights = np.ones(n) #即待优化的参数[1,1,1] weights_array = np.array([]) for j in range(num_iter): dataIndex = list(range(m)) for i in range(m): alpha = 1 / (i+j+1.0) + 0.001 rangeIndex = int(np.random.uniform(0,len(dataIndex))) error = labelMat[rangeIndex] - sigmoid(sum(dataMat[rangeIndex] * weights)) # 一个数 weights = weights + alpha * dataMat[rangeIndex] * error weights_array = np.append(weights_array,weights,axis=0) del(dataIndex[rangeIndex]) weights_array = weights_array.reshape(num_iter*m,n) return weights,weights_array #批量梯度上升进行LR训练 def gradAscent(dataMat,labelMat): dataMartix = np.mat(dataMat) labelMartix = np.mat(labelMat).transpose() n = np.shape(dataMartix)[1] #矩阵有多少列 weights = np.ones((n,1)) #即待优化的参数 alpha = 0.001 maxiter = 500 weights_array = np.array([]) for i in range(maxiter): error = labelMartix - sigmoid(dataMartix * weights) # 100×1 weights = weights + alpha * dataMartix.transpose() * error weights_array = np.append(weights_array, weights) weights_array = weights_array.reshape(maxiter,n) return np.asarray(weights),weights_array def sigmoid(x): return 1 / (1 + np.exp(-x)) def plotWeights(weights_array1,weights_array2): #设置汉字格式 font = FontProperties(fname=r"C:\Windows\Fonts\simsun.ttc", size=14) #将fig画布分隔成1行1列,不共享x轴和y轴,fig画布的大小为(13,8) #当nrow=3,nclos=2时,代表fig画布被分为六个区域,axs[0][0]表示第一行第一列 fig, axs = plt.subplots(nrows=3, ncols=2,sharex=False, sharey=False, figsize=(20,10)) x1 = np.arange(0, len(weights_array1), 1) #绘制w0与迭代次数的关系 axs[0][0].plot(x1,weights_array1[:,0]) axs0_title_text = axs[0][0].set_title(u'梯度上升算法:回归系数与迭代次数关系',FontProperties=font) axs0_ylabel_text = axs[0][0].set_ylabel(u'W0',FontProperties=font) plt.setp(axs0_title_text, size=20, weight='bold', color='black') plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black') #绘制w1与迭代次数的关系 axs[1][0].plot(x1,weights_array1[:,1]) axs1_ylabel_text = axs[1][0].set_ylabel(u'W1',FontProperties=font) plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black') #绘制w2与迭代次数的关系 axs[2][0].plot(x1,weights_array1[:,2]) axs2_xlabel_text = axs[2][0].set_xlabel(u'迭代次数',FontProperties=font) axs2_ylabel_text = axs[2][0].set_ylabel(u'W1',FontProperties=font) plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black') plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black') x2 = np.arange(0, len(weights_array2), 1) #绘制w0与迭代次数的关系 axs[0][1].plot(x2,weights_array2[:,0]) axs0_title_text = axs[0][1].set_title(u'改进的随机梯度上升算法:回归系数与迭代次数关系',FontProperties=font) axs0_ylabel_text = axs[0][1].set_ylabel(u'W0',FontProperties=font) plt.setp(axs0_title_text, size=20, weight='bold', color='black') plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black') #绘制w1与迭代次数的关系 axs[1][1].plot(x2,weights_array2[:,1]) axs1_ylabel_text = axs[1][1].set_ylabel(u'W1',FontProperties=font) plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black') #绘制w2与迭代次数的关系 axs[2][1].plot(x2,weights_array2[:,2]) axs2_xlabel_text = axs[2][1].set_xlabel(u'迭代次数',FontProperties=font) axs2_ylabel_text = axs[2][1].set_ylabel(u'W1',FontProperties=font) plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black') plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black') plt.show() def plotBestFit(weights,dataMat,labelMat): x0cord1 = [] x0cord2 = [] x1cord1 = [] x1cord2 = [] n = len(labelMat) for i in range(n): if labelMat[i] == 1: x0cord1.append(dataMat[i][1]) x0cord2.append(dataMat[i][2]) else: x1cord1.append(dataMat[i][1]) x1cord2.append(dataMat[i][2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(x0cord1,x0cord2,c='red',s=20,alpha=0.5,marker='s') ax.scatter(x1cord1,x1cord2,c='green',s=20,alpha=0.5) # plt.title('DataSet') # plt.xlabel('x1') # plt.ylabel('x2') # plt.show() w = - weights[1] / weights[2] b = -weights[0] / weights[2] x = np.arange(-3,3,0.1) y = w * x + b ax.plot(x,y) plt.show() if __name__=='__main__': dataMat,labelMat = loadData('testSet.txt') # print(dataMat) # plot(dataMat,labelMat) weights,weights_array1 = stogradAscent(dataMat,labelMat) plotBestFit(weights,dataMat,labelMat) print(weights) weights2,weights_array2 = gradAscent(dataMat,labelMat) # print(weights2) plotWeights(weights_array2, weights_array1)
非学无以广才,非志无以成学! 【Magic_chao】
posted on 2018-12-22 23:01 Magic_chao 阅读(355) 评论(0) 编辑 收藏 举报