logistics回归
undefined
logistic回归的基本思想
logistic回归是一种分类方法,用于两分类问题。其基本思想为:
a. 寻找合适的假设函数,即分类函数,用以预测输入数据的判断结果;
b. 构造代价函数,即损失函数,用以表示预测的输出结果与训练数据的实际类别之间的偏差;
c. 最小化代价函数,从而获取最优的模型参数。
1 import numpy 2 from numpy import * 3 import matplotlib.pyplot as plt 4 import random 5 def loadDataSet(filename): 6 fr = open(filename) 7 dataMat = [] 8 labelMat = [] 9 for line in fr.readlines(): 10 lineArr = line.strip().split() 11 dataMat.append( [1.0,float(lineArr[0]),float(lineArr[1])] ) 12 labelMat.append(int(lineArr[2])) 13 return dataMat,labelMat 14 15 #阶跃函数 16 def sigmoid(inX): 17 return 1.0/(1 + numpy.exp(-inX)) 18 19 #基于梯度上升法的logistic回归分类器 20 def gradAscent(dataMatIn,classLabels): 21 dataMatrix = mat(dataMatIn) 22 labelMatrix = mat(classLabels).transpose() 23 m , n = shape(dataMatrix) 24 alpha = 0.001#步长 25 maxCycles = 500 26 weights = ones((n,1)) 27 #对回归系数进行maxCycles次梯度上升 28 for i in range(maxCycles): 29 h = sigmoid(dataMatrix * weights) 30 error = labelMatrix - h 31 weights = weights + alpha * dataMatrix.transpose() * error 32 return weights 33 34 #分析数据:画出决策边界 35 def plotBestFit(weights): 36 dataMat,labelMat = loadDataSet('test.txt') 37 dataArr = array(dataMat) 38 n = list(shape(dataArr))[0] 39 xcord1 = [] ; ycord1 = [] 40 xcord2 = [] ; ycord2 = [] 41 for i in range(n): 42 if int(labelMat[i]) == 1: 43 xcord1.append(dataArr[i,1]) 44 ycord1.append(dataArr[i,2]) 45 else: 46 xcord2.append(dataArr[i,1]) 47 ycord2.append(dataArr[i,2]) 48 fig = plt.figure() 49 ax = fig.add_subplot(111) 50 ax.scatter(xcord1,ycord1,s=30,c='red',marker='s') 51 ax.scatter(xcord2,ycord2,s=30,c='green') 52 53 #最佳拟合直线 54 x = arange(-3.0, 3.0, 0.1) 55 print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',shape(x)) 56 57 y = (-weights[0] - weights[1] * x) / weights[2] 58 print('-----------------------------------------',shape(y)) 59 ax.plot(x,y) 60 plt.xlabel('X1') 61 plt.ylabel('X2') 62 plt.show() 63 64 #随机梯度上升 65 def stocGradAscent0(dataMatrix,classLabels): 66 m , n = numpy.shape(dataMatrix) 67 alpha = 0.01#步长 68 weights = numpy.ones((n)) 69 for i in range(m): 70 h = sigmoid(sum(dataMatrix[i] * weights)) 71 error = classLabels[i] - h 72 weights = weights + alpha * error * dataMatrix[i] 73 return weights 74 75 #改进的随机梯度上升 76 def stocGradAscent1(dataMatrix,classLabels,numIter=150): 77 m , n = shape(dataMatrix) 78 weights = ones(n) 79 dataIndex = list(range(m)) 80 print (dataIndex) 81 for j in range(numIter): 82 for i in range(m): 83 alpha = 4/(1.0+j+i) + 0.1 #alpha每次迭代都要调整 84 randIndex = int(random.uniform(0,len(dataIndex))) 85 h = sigmoid (sum(dataMatrix[randIndex] * weights)) 86 error = classLabels[randIndex] - h 87 weights = weights + alpha * error * dataMatrix[randIndex] 88 del dataIndex[randIndex] 89 print("randIndex",randIndex) 90 print("dataIndex",dataIndex) 91 if randIndex==0: 92 return weights 93 94 95 if __name__ == '__main__': 96 dataArr,labelMat = loadDataSet('test.txt') 97 weights = stocGradAscent1(array(dataArr),labelMat) 98 # weights = gradAscent(dataArr,labelMat) 99 # print(shape(weights)) 100 plotBestFit(weights)
应用:从疝气病预测病马的死亡率
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | import numpy from numpy import * import matplotlib.pyplot as plt import random #阶跃函数 def sigmoid(inX): return 1.0 / ( 1 + numpy.exp( - inX)) #分类回归函数 def classifyVector(inX,weights): prob = sigmoid( sum (inX * weights)) if prob > 0.5 : return 1.0 else : return 0.0 #改进的随机梯度上升算法 def stocGradAscent1(dataMatrix, classLabels, numIter = 150 ): m, n = shape (dataMatrix) weights = ones (n) dataIndex = list ( range (m)) for j in range (numIter): for i in range (m): alpha = 4 / ( 1.0 + j + i) + 0.1 # alpha每次迭代都要调整 randIndex = int (random.uniform ( 0 , len (dataIndex))) h = sigmoid ( sum (dataMatrix[randIndex] * weights)) error = classLabels[randIndex] - h weights = weights + alpha * error * dataMatrix[randIndex] del dataIndex[randIndex] if randIndex = = 0 : return weights #测试,返回错误率 def colicTest(): frTrain = open ( 'horseColicTraining.txt' ) frTest = open ( 'horseColicTest.txt' ) trainingSet = [] trainingLabels = [] for line in frTrain.readlines(): curLine = line.strip().split( '\t' ) lineArr = [] for i in range ( 21 ): lineArr.append( float (curLine[i])) trainingSet.append(lineArr) trainingLabels.append( float (curLine[ 21 ])) trainWeights = stocGradAscent1(array(trainingSet),trainingLabels, 500 ) errorCount = 0 numTestVec = 0 for line in frTest.readlines(): numTestVec + = 1.0 curLine = line.strip().split( '\t' ) lineArr = [] for i in range ( 21 ): lineArr.append( float (curLine[i])) if int (classifyVector(array(lineArr),trainWeights)) ! = int (curLine[ 21 ]): errorCount + = 1 errorRate = ( float (errorCount) / numTestVec) print ( "错误率" ,errorRate) return errorRate def multiTest(): numTests = 10 errorSum = 0.0 for i in range (numTests): errorSum + = colicTest() print ( "%d 次迭代之后,平均错误率为%f" % (numTests,errorSum / float (numTests))) multiTest() |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 使用C#创建一个MCP客户端
· ollama系列1:轻松3步本地部署deepseek,普通电脑可用
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 按钮权限的设计及实现