MLP实现minist数据集分类任务

1. 数据集

minist手写体数字数据集

2. 代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
'''
Description:
Author: zhangyh
Date: 2024-05-04 15:21:49
LastEditTime: 2024-05-04 22:36:26
LastEditors: zhangyh
'''
 
import numpy as np
 
class MlpClassifier:   
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size1 = hidden_size1
        self.hidden_size2 = hidden_size2
        self.output_size = output_size
        self.learning_rate = learning_rate
 
        self.W1 = np.random.randn(input_size, hidden_size1) * 0.01
        self.b1 = np.zeros((1, hidden_size1))
        self.W2 = np.random.randn(hidden_size1, hidden_size2) * 0.01
        self.b2 = np.zeros((1, hidden_size2))
        self.W3 = np.random.randn(hidden_size2, output_size) * 0.01
        self.b3 = np.zeros((1, output_size))
     
    def softmax(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)
     
    def relu(self, x):
        return np.maximum(x, 0)
     
    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)
     
    def cross_entropy_loss(self, y_true, y_pred):
        m = y_true.shape[0]
        return -np.sum(y_true * np.log(y_pred + 1e-8)) / m
     
    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.relu(self.Z2)
        self.Z3 = np.dot(self.A2, self.W3) + self.b3
        self.A3 = self.softmax(self.Z3)
        return self.A3
     
    def backward(self, X, y):
        m = X.shape[0]
        dZ3 = self.A3 - y
        dW3 = np.dot(self.A2.T, dZ3) / m
        db3 = np.sum(dZ3, axis=0, keepdims=True) / m
        dA2 = np.dot(dZ3, self.W3.T)
        dZ2 = dA2 * self.relu_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m
         
        # Update weights and biases
        self.W3 -= self.learning_rate * dW3
        self.b3 -= self.learning_rate * db3
        self.W2 -= self.learning_rate * dW2
        self.b2 -= self.learning_rate * db2
        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1
 
    # 计算精确度
    def accuracy(self, y_pred, y):
        predictions = np.argmax(y_pred, axis=1)
        correct_predictions = np.sum(predictions == np.argmax(y, axis=1))   
        return correct_predictions / y.shape[0]
     
    def train(self, X, y, epochs=100, batch_size=64):
        print('Training...')   
        m = X.shape[0]
        for epoch in range(epochs):
            for i in range(0, m, batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]
                 
                # Forward propagation
                y_pred = self.forward(X_batch)
                 
                # Backward propagation
                self.backward(X_batch, y_batch)
             
            if (epoch+1) % 10 == 0:
                loss = self.cross_entropy_loss(y, self.forward(X))
                acc = self.accuracy(y_pred, y_batch)
                print(f'Epoch {epoch+1}/{epochs}, Loss: {loss}, Training-Accuracy: {acc}')  
 
    def test(self, X, y):
        print('Testing...')
        y_pred = self.forward(X)
        acc = self.accuracy(y_pred, y)   
        return acc
 
 
if __name__ == '__main__'
 
    import tensorflow as tf
 
    # 加载MNIST数据集
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
 
    # 将图像转换为向量形式
    X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
    X_test = X_test.reshape(X_test.shape[0], -1) / 255.0
    # 将标签进行 one-hot 编码
    num_classes = 10
    y_train = tf.keras.utils.to_categorical(y_train, num_classes)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes)
 
    # 打印转换后的结果
    # 训练集维度: (60000, 784) (60000, 10)
    # 测试集维度: (10000, 784) (10000, 10)
    model = MlpClassifier(784, 128, 128, 10)
 
    model.train(X_train, y_train)  
 
    test_acc = model.test(X_test, y_test) 
    print(f'Test-Accuracy: {test_acc}')
  

  

3. 运行结果

1
2
3
4
5
6
7
8
9
10
11
12
13
Training...
Epoch 10/100, Loss: 0.3617846299623725, Training-Accuracy: 0.9375
Epoch 20/100, Loss: 0.1946690996652946, Training-Accuracy: 1.0
Epoch 30/100, Loss: 0.13053815227522408, Training-Accuracy: 1.0
Epoch 40/100, Loss: 0.09467908427578901, Training-Accuracy: 1.0
Epoch 50/100, Loss: 0.07120217251250453, Training-Accuracy: 1.0
Epoch 60/100, Loss: 0.055233734086591456, Training-Accuracy: 1.0
Epoch 70/100, Loss: 0.04369171830999816, Training-Accuracy: 1.0
Epoch 80/100, Loss: 0.03469674775956587, Training-Accuracy: 1.0
Epoch 90/100, Loss: 0.027861857647949812, Training-Accuracy: 1.0
Epoch 100/100, Loss: 0.0225212692988995, Training-Accuracy: 1.0
Testing...
Test-Accuracy: 0.9775

  

posted @   映辉  阅读(44)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现
历史上的今天:
2023-05-06 Linux 系统用户登录时很慢怎么办
点击右上角即可分享
微信分享提示