目录 trainingDigits 中包含了大约 2000 个例子,每个例子内容如下图所示,每个数字大约有 200 个样本;目录 testDigits 中包含了大约 900 个测试数据。
1 2 3 4 5 6 7 8 9 | #将图像文本数据转换为向量 def img2vector(filename): returnVect = zeros(( 1 , 1024 )) fr = open (filename) for i in range ( 32 ): lineStr = fr.readline() for j in range ( 32 ): returnVect[ 0 , 32 * i + j] = int (lineStr[j]) return returnVect |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | #测试算法 def handwritingClassTest(): hwLabels = [] trainingFileList = listdir( 'trainingDigits' ) #加载训练集 m = len (trainingFileList) trainingMat = zeros((m, 1024 )) for i in range (m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split( '.' )[ 0 ] classNumStr = int (fileStr.split( '_' )[ 0 ]) hwLabels.append(classNumStr) trainingMat[i,:] = img2vector( 'trainingDigits/%s' % fileNameStr) testFileList = listdir( 'testDigits' ) #遍历 errorCount = 0.0 mTest = len (testFileList) for i in range (mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split( '.' )[ 0 ] classNumStr = int (fileStr.split( '_' )[ 0 ]) vectorUnderTest = img2vector( 'testDigits/%s' % fileNameStr) classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3 ) print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr) if (classifierResult ! = classNumStr): errorCount + = 1.0 print "\nthe total number of errors is: %d" % errorCount print "\nthe total error rate is: %f" % (errorCount / float (mTest)) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[ 0 ] diffMat = tile(inX, (dataSetSize, 1 )) - dataSet #把inX二维数组化,dataSetSize表示生成数组后的行数,1表示列的倍数。实现了矩阵之间的减法。 sqDiffMat = diffMat * * 2 sqDistances = sqDiffMat. sum (axis = 1 )。 #axis=1:参数等于1,矩阵中行之间的数的求和 distances = sqDistances * * 0.5 sortedDistIndicies = distances.argsort() #argsort():对一个数组进行非降序排序 classCount = {} for i in range (k): voteIlabel = labels[sortedDistIndicies[i]] #访问下标键为voteIlabel的项 classCount[voteIlabel] = classCount.get(voteIlabel, 0 ) + 1 sortedClassCount = sorted (classCount.iteritems(), key = operator.itemgetter( 1 ), reverse = True ) return sortedClassCount[ 0 ][ 0 ] |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | from numpy import * import operator from os import listdir def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[ 0 ] diffMat = tile(inX, (dataSetSize, 1 )) - dataSet #把inX二维数组化,dataSetSize表示生成数组后的行数,1表示列的倍数。实现了矩阵之间的减法。 sqDiffMat = diffMat * * 2 sqDistances = sqDiffMat. sum (axis = 1 )。 #axis=1:参数等于1,矩阵中行之间的数的求和 distances = sqDistances * * 0.5 sortedDistIndicies = distances.argsort() #argsort():对一个数组进行非降序排序 classCount = {} for i in range (k): voteIlabel = labels[sortedDistIndicies[i]] #访问下标键为voteIlabel的项 classCount[voteIlabel] = classCount.get(voteIlabel, 0 ) + 1 sortedClassCount = sorted (classCount.iteritems(), key = operator.itemgetter( 1 ), reverse = True ) return sortedClassCount[ 0 ][ 0 ] #将图像文本数据转换为向量 def img2vector(filename): returnVect = zeros(( 1 , 1024 )) fr = open (filename) for i in range ( 32 ): lineStr = fr.readline() for j in range ( 32 ): returnVect[ 0 , 32 * i + j] = int (lineStr[j]) return returnVect #测试算法 def handwritingClassTest(): hwLabels = [] trainingFileList = listdir( 'trainingDigits' ) #加载训练集 m = len (trainingFileList) trainingMat = zeros((m, 1024 )) for i in range (m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split( '.' )[ 0 ] classNumStr = int (fileStr.split( '_' )[ 0 ]) hwLabels.append(classNumStr) trainingMat[i,:] = img2vector( 'trainingDigits/%s' % fileNameStr) testFileList = listdir( 'testDigits' ) #遍历 errorCount = 0.0 mTest = len (testFileList) for i in range (mTest): fileNameStr = testFileList[i] fileStr = fileNameStr.split( '.' )[ 0 ] classNumStr = int (fileStr.split( '_' )[ 0 ]) vectorUnderTest = img2vector( 'testDigits/%s' % fileNameStr) classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3 ) print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr) if (classifierResult ! = classNumStr): errorCount + = 1.0 print "\nthe total number of errors is: %d" % errorCount print "\nthe total error rate is: %f" % (errorCount / float (mTest)) |
1 2 3 4 5 6 7 8 9 10 11 12 | >>> import kNN >>> kNN.handwritingClassTest() the classifier came back with: 4 , the real answer is : 4 the classifier came back with: 4 , the real answer is : 4 . . . the classifier came back with: 3 , the real answer is : 3 the total number of errors is : 11 the total error rate is : 0.011628 |
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 智能桌面机器人:用.NET IoT库控制舵机并多方法播放表情
· Linux glibc自带哈希表的用例及性能测试
· 深入理解 Mybatis 分库分表执行原理
· 如何打造一个高并发系统?
· .NET Core GC压缩(compact_phase)底层原理浅谈
· DeepSeek火爆全网,官网宕机?本地部署一个随便玩「LLM探索」
· 开发者新选择:用DeepSeek实现Cursor级智能编程的免费方案
· Tinyfox 发生重大改版
· 独立开发经验谈:如何通过 Docker 让潜在客户快速体验你的系统
· 小米CR6606,CR6608,CR6609 启用SSH和刷入OpenWRT 23.05.5