李航《统计学方法》贝叶斯估计分类器实现习题
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | #!/usr/bin/env python # -*- coding:utf-8 -*- """ author: jianbin time:2022/10/26 """ import numpy as np # 构造NB分类器 def Train(X_train, Y_train, feature): global class_num, label alpha = 1 # 拉普拉斯平滑,贝叶斯估计 class_num = 2 # 分类数目 label = [ 1 , - 1 ] # 分类标签 feature_len = 3 # 特征长度 # 构造3x2的列表 feature = [[ 1 , 'S' ], [ 2 , 'M' ], [ 3 , 'L' ]] prior_probability = np.zeros(class_num) # 初始化先验概率 conditional_probability = np.zeros((class_num, feature_len, 2 )) # 初始化条件概率 positive_count = 0 # 统计正类 negative_count = 0 # 统计负类 for i in range ( len (Y_train)): if Y_train[i] = = 1 : positive_count + = 1 else : negative_count + = 1 prior_probability[ 0 ] = (positive_count + alpha) / ( len (Y_train) + class_num * alpha) # 得到正类的先验概率 prior_probability[ 1 ] = (negative_count + alpha) / ( len (Y_train) + class_num * alpha) # 得到负类的先验概率 print ( "正负先验概率:" , prior_probability[ 0 ], prior_probability[ 1 ]) # 0.6, 04 # conditional_probability是一个2*3*2的三维列表, # 第一维是类别分类, 第二维和第三维是一个3*2的特征分类 # 分为两个类别 for i in range (class_num): # 2 # 对特征按行遍历 for j in range (feature_len): # 3 # 遍历数据集,并依次做判断 for k in range ( len (Y_train)): # 15 # 这里判断类别是否相同 if Y_train[k] = = label[i]: # 相同类别 # 这里判断数字是否相同,第k个样本的第一个特征 if X_train[k][ 0 ] = = feature[j][ 0 ]: conditional_probability[i][j][ 0 ] + = 1 # 这里判断字母是否相同,第k个样本的第二个特征 if X_train[k][ 1 ] = = feature[j][ 1 ]: conditional_probability[i][j][ 1 ] + = 1 # print(" conditional_probability[%d][%d][0] :" % (i, j), # conditional_probability[i][j][0]) # print(" conditional_probability[%d][%d][1] :" % (i, j), # conditional_probability[i][j][1]) class_label_num = [positive_count, negative_count] # 存放各类型的数目9, 6 print (class_label_num) for i in range (class_num): for j in range (feature_len): # 求得第i类j行第一个特征的条件概率 conditional_probability[i][j][ 0 ] = \ (conditional_probability[i][j][ 0 ] + alpha) / (class_label_num[i] + feature_len * alpha) # 求得第i类j行第二个特征的条件概率 conditional_probability[i][j][ 1 ] = \ (conditional_probability[i][j][ 1 ] + alpha) / (class_label_num[i] + feature_len * alpha) return prior_probability, conditional_probability # 给定数据进行分类 def Predict(testset, prior_probability, conditional_probability, feature): result = np.zeros( len (label)) for i in range (class_num): for j in range ( len (feature)): # 判断第一个特征 if feature[j][ 0 ] = = testset[ 0 ]: conditionalA = conditional_probability[i][j][ 0 ] # 判断第二个特征 if feature[j][ 1 ] = = testset[ 1 ]: conditionalB = conditional_probability[i][j][ 1 ] result[i] = conditionalA * conditionalB * prior_probability[i] result = np.vstack([result, label]) # np.vstack()沿着竖直方向将矩阵堆叠起来 return result def main(): X_train = [[ 1 , 'S' ], [ 1 , 'M' ], [ 1 , 'M' ], [ 1 , 'S' ], [ 1 , 'S' ], [ 2 , 'S' ], [ 2 , 'M' ], [ 2 , 'M' ], [ 2 , 'L' ], [ 2 , 'L' ], [ 3 , 'L' ], [ 3 , 'M' ], [ 3 , 'M' ], [ 3 , 'L' ], [ 3 , 'L' ]] Y_train = [ - 1 , - 1 , 1 , 1 , - 1 , - 1 , - 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , - 1 ] # 构造3x2的列表 feature = [[ 1 , 'S' ], [ 2 , 'M' ], [ 3 , 'L' ]] testset = [ 2 , 'S' ] prior_probability, conditional_probability = Train(X_train, Y_train, feature) result = Predict(testset, prior_probability, conditional_probability, feature) print (result) if __name__ = = '__main__' : main() |
参考博客:https://cloud.tencent.com/developer/article/1505695
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek “源神”启动!「GitHub 热点速览」
· 我与微信审核的“相爱相杀”看个人小程序副业
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)
· spring官宣接入deepseek,真的太香了~