李航《统计学方法》贝叶斯估计分类器实现习题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
author: jianbin
time:2022/10/26
"""
 
import numpy as np
 
# 构造NB分类器
def Train(X_train, Y_train, feature):
    global class_num, label
    alpha = 1  # 拉普拉斯平滑,贝叶斯估计
    class_num = 2     # 分类数目
    label = [1, -1]   # 分类标签
    feature_len = 3   # 特征长度
    # 构造3x2的列表
    feature = [[1, 'S'],
               [2, 'M'],
               [3, 'L']]
 
    prior_probability = np.zeros(class_num)  # 初始化先验概率
    conditional_probability = np.zeros((class_num, feature_len, 2))  # 初始化条件概率
 
    positive_count = 0   # 统计正类
    negative_count = 0   # 统计负类
    for i in range(len(Y_train)):
        if Y_train[i] == 1:
            positive_count += 1
        else:
            negative_count += 1
    prior_probability[0] = (positive_count + alpha) / (len(Y_train) + class_num * alpha)  # 得到正类的先验概率
    prior_probability[1] = (negative_count + alpha) / (len(Y_train) + class_num * alpha)  # 得到负类的先验概率
    print("正负先验概率:", prior_probability[0], prior_probability[1])  # 0.6, 04
 
    # conditional_probability是一个2*3*2的三维列表,
    # 第一维是类别分类, 第二维和第三维是一个3*2的特征分类
 
    # 分为两个类别
    for i in range(class_num):  # 2
        # 对特征按行遍历
        for j in range(feature_len):  # 3
            # 遍历数据集,并依次做判断
            for k in range(len(Y_train)):  # 15
                # 这里判断类别是否相同
                if Y_train[k] == label[i]:   # 相同类别
                    # 这里判断数字是否相同,第k个样本的第一个特征
                    if X_train[k][0] == feature[j][0]:
                        conditional_probability[i][j][0] += 1
                    # 这里判断字母是否相同,第k个样本的第二个特征
                    if X_train[k][1] == feature[j][1]:
                        conditional_probability[i][j][1] += 1
 
                # print(" conditional_probability[%d][%d][0] :" % (i, j),
                #               conditional_probability[i][j][0])
                # print(" conditional_probability[%d][%d][1] :" % (i, j),
                #               conditional_probability[i][j][1])
 
    class_label_num = [positive_count, negative_count]  # 存放各类型的数目9, 6
    print(class_label_num)
    for i in range(class_num):
        for j in range(feature_len):
            # 求得第i类j行第一个特征的条件概率
            conditional_probability[i][j][0] = \
                (conditional_probability[i][j][0] + alpha) / (class_label_num[i] + feature_len * alpha)
            # 求得第i类j行第二个特征的条件概率
            conditional_probability[i][j][1] = \
                (conditional_probability[i][j][1] + alpha) / (class_label_num[i] + feature_len * alpha)
 
    return prior_probability, conditional_probability
 
 
# 给定数据进行分类
def Predict(testset, prior_probability, conditional_probability, feature):
    result = np.zeros(len(label))
    for i in range(class_num):
        for j in range(len(feature)):
            # 判断第一个特征
            if feature[j][0] == testset[0]:
                conditionalA = conditional_probability[i][j][0]
            # 判断第二个特征
            if feature[j][1] == testset[1]:
                conditionalB = conditional_probability[i][j][1]
        result[i] = conditionalA * conditionalB * prior_probability[i]
 
    result = np.vstack([result, label])  # np.vstack()沿着竖直方向将矩阵堆叠起来
 
    return result
 
 
def main():
    X_train = [[1, 'S'], [1, 'M'], [1, 'M'], [1, 'S'], [1, 'S'],
               [2, 'S'], [2, 'M'], [2, 'M'], [2, 'L'], [2, 'L'],
               [3, 'L'], [3, 'M'], [3, 'M'], [3, 'L'], [3, 'L']]
    Y_train = [-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1]
 
    # 构造3x2的列表
    feature = [[1, 'S'],
               [2, 'M'],
               [3, 'L']]
 
    testset = [2, 'S']
 
    prior_probability, conditional_probability = Train(X_train, Y_train, feature)
 
    result = Predict(testset, prior_probability, conditional_probability, feature)
 
    print(result)
 
 
if __name__ == '__main__':
    main()

  

参考博客:https://cloud.tencent.com/developer/article/1505695

posted @   路人加  阅读(29)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek “源神”启动!「GitHub 热点速览」
· 我与微信审核的“相爱相杀”看个人小程序副业
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)
· spring官宣接入deepseek,真的太香了~
点击右上角即可分享
微信分享提示