PyTorch入门——异或问题

实验环境

  • CUDA版本:nvcc --version
    nvcc: NVIDIA (R) Cuda compiler driver
    Copyright (c) 2005-2021 NVIDIA Corporation
    Built on Sun_Feb_14_21:12:58_PST_2021
    Cuda compilation tools, release 11.2, V11.2.152
    Build cuda_11.2.r11.2/compiler.29618528_0
    
  • Python版本:python --version
    Python 3.9.12
    
  • 系统版本:cat /proc/version
    Linux version 5.4.0-109-generic (buildd@ubuntu) (gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.1)) #123-Ubuntu SMP Fri Apr 8 09:10:54 UTC 2022
    
  • 安装依赖pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113

代码:

import math
import os

import torch
import torch.utils.data as data
from torch import Tensor, cuda, device, nn
from torch import optim as optim

os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
DEVICE = device("cuda" if cuda.is_available() else "cpu")


def print_bar(
    epoch: int,
    epochs: int,
    step: int,
    batch_size: int,
    etc=None,
    bar_size: int = 50
) -> None:
    """打印进度条。

    Parameters
    ----------
    epoch : int
        迭代轮次
    epochs : int
        迭代轮数
    step : int
        当前批次步数
    batch_size : int
        批大小
    etc : _type_, optional
        后缀信息, by default None
    bar_size : int, optional
        打印长度, by default 50
    """
    process = math.ceil(bar_size*step/batch_size)  # 进度条长度
    strs = [
        ('' if step else '\033[A')+f"Epoch {epoch+1}/{epochs}",
        f"|\033[1;30;47m{' ' * process}\033[0m{' ' * (bar_size-process)}|",
    ]
    if etc is not None:
        strs.append(str(etc))
    print(" ".join(strs))


class Model(nn.Module):
    def __init__(self, layers: 'list[tuple[nn.Module,callable]]',):
        super(Model, self).__init__()
        self.layers = nn.ModuleList([layer for layer, _ in layers]).to(
            DEVICE)  # 将模型放入显卡,list是不能进显卡的,所以用ModuleList
        self.activations = [f for _, f in layers]  # 存储激活函数
        self.deep = len(layers)  # 计算模型深度

    def forward(self, x: Tensor) -> Tensor:  # 调用模型时可以model(x)也可以model.forward(x),两者都会调用这个方法
        a = x.to(DEVICE)  # 样本装入显卡计算
        for i in range(self.deep):  # 执行每一层运算得到输出
            a = self.layers[i](a)
            activation = self.activations[i]
            if activation:
                a = activation(a)
        return a


def train_step(
    model: nn.Module,
    x: Tensor,
    y: Tensor,
    optimizer: optim.Optimizer,
    loss_func: nn.modules.loss._Loss
):
    """单步训练。

    Parameters
    ----------
    model : nn.Module
        模型对象
    x : Tensor
        输入训练数据
    y : Tensor
        数据标签
    optimizer : optim.Optimizer
        优化器
    loss_func : nn.modules.loss._Loss
        损失函数

    Returns
    -------
    Any
        损失函数输出。
    """
    x = x.to(DEVICE)
    y = y.to(DEVICE)
    o = model(x)  # 正向传播
    loss = loss_func(o, y.squeeze(dim=0))  # 这里y应该比o少一维,squeeze去除多余维度
    optimizer.zero_grad()  # 梯度清零
    loss.backward(retain_graph=True)  # 反向传播求梯度
    optimizer.step()  # 优化器更新一次参数,具体更新哪些参数看优化器初始化
    return loss.item()  # 返回损失值


def train(
    model: Model,
    dataset: data.Dataset,
    batch_size: int,
    epochs: int,
    optimizer: optim.Optimizer,
    loss_func: nn.modules.loss._Loss
) -> None:
    """训练。

    Parameters
    ----------
    model : Model
        模型对象
    dataset : data.Dataset
        训练集
    batch_size : int
        每批数据量
    epochs : int
        迭代轮数
    optimizer : optim.Optimizer
        优化器
    loss_func : nn.modules.loss._Loss
        损失函数
    """
    model.train(mode=True)  # 设置标志属性值,开始训练
    loader = data.DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=True
    )  # 结合数据集和采样器,并提供给定数据集的可迭代对象。
    loss = None
    for epoch in range(epochs):
        for step, (x, y) in enumerate(loader):  # x和y的类型都是Tensor
            print_bar(epoch, epochs, step, batch_size, loss)  # 打印损失
            loss = train_step(
                model=model,
                x=x,
                y=y,
                optimizer=optimizer,
                loss_func=loss_func,
            )


X = [[1., 0.], [1., 1.], [0., 1.], [0., 0.]]
Y = [[1, ], [0, ], [1, ], [0, ]]
DATA = data.TensorDataset(torch.FloatTensor(X), torch.FloatTensor(Y))
if __name__ == "__main__":
    print(DEVICE)  # 打印“cuda”字样说明是显卡计算
    torch.backends.cudnn.enable = True  # 启动cudnn
    layers = [
        (nn.Linear(in_features=2, out_features=2), torch.sigmoid),
        (nn.Linear(in_features=2, out_features=1), torch.sigmoid),
    ]
    model = Model(layers)
    opt = optim.Adam(model.parameters(), lr=0.1)  # 优化器初始化,并且指定要优化哪些参数
    loss_func = nn.MSELoss()
    print('训练前权值:')
    for layer, _ in layers:
        print(layer.weight.tolist())
    train(model, DATA, 12, 1000, opt, loss_func)
    for x in X:
        y = model.forward(torch.FloatTensor(x))  # 测试
        print(x, y.tolist())
    print('训练结束权值:')
    for layer, _ in layers:
        print(layer.weight.tolist())

控制台

cuda
训练前权值:
[[-0.2903967499732971, -0.5596481561660767], [-0.3761187195777893, -0.3520408272743225]]
Epoch 1/1000 |                                                  |
Epoch 2/1000 |                                                  | 0.2529093623161316
Epoch 3/1000 |                                                  | 0.25983208417892456
......
Epoch 1000/1000 |                                                  | 0.013415724039077759
[1.0, 0.0] [0.8732957243919373]
[1.0, 1.0] [0.16902561485767365]
[0.0, 1.0] [0.8731175065040588]
[0.0, 0.0] [0.1027318686246872]
训练结束权值:
[[-5.067379951477051, -5.065510272979736], [-3.1121909618377686, -3.113528251647949]]
[[-6.406204700469971, 5.747752666473389]]

转载源

posted on 2022-07-20 19:58  星云*  阅读(166)  评论(0编辑  收藏  举报