pytorch测试cuda是否可用的简单代码
#!/usr/bin/env python # -*- coding:utf-8-*- # file: test_mnist_centerloss.py # @author: jory.d # @contact: dangxusheng163@163.com # @time: 2019/11/29 11:11 # @desc: """ conda install pytorch=1.11 torchvision=0.12 -c conda-forge python 3.8 pytorch=1.11.0=cuda112py38habe9d5a_202 torchvision=0.12.0=cuda112py38h46b2766_1 cudatoolkit=11.8 cudnn=8.9.7 """ import os import os.path as osp os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(0) import time import torch import torch.nn as nn import torch.nn.functional as F import torchvision.transforms as TF import torchvision.datasets as datasets import torch.optim.lr_scheduler as lr_scheduler from torch.utils.data import DataLoader import cv2 batch_size = 128 base_lr = 0.005 test_batch = 256 weight_decay = 0.0001 epochs = 10 gradient_clip = 5 num_classes = 10 input_size = 56 input_ch = 1 last_epoch = -1 eval_freq = 2 dataset_root = '/home/dangxs/projects/ai_dataset' save_model_path = './mnist_test/mnist_classifer.pth' onnx_model_path = './mnist_test/mnist_classifer.onnx' os.makedirs(osp.dirname(save_model_path), exist_ok=True) def get_mnist_loader(transform, is_train=True): """ mnist 数据集加载 目录结构如下: -- a --MNIST_Data --MNIST --processed --training.pt --test.pt --raw --t10k-images-idx3-ubyte --t10k-labels-idx1-ubyte --train-images-idx3-ubyte --train-labels-idx1-ubyte """ train_loader = DataLoader( datasets.MNIST( # ../MNIST_Data/mnist/MNIST/processed/training.pt # ../MNIST_Data/mnist/MNIST/processed/raw/t10k-images-idx3-ubyte dataset_root + "/mnist/MNIST_Data", # "../MNIST_Data/mnist", train=is_train, download=False, transform=transform, ), batch_size=batch_size if is_train else test_batch, shuffle=True, num_workers=4, pin_memory=True ) return train_loader def get_cifar10_loader(transform, is_train=True): train_loader = DataLoader( datasets.CIFAR10( # ../CIFAR10_Data/cifar10/cifar-10-batches-py/data_batch_1 # ../CIFAR10_Data/cifar10/cifar-10-batches-py/data_batch_2 dataset_root + "/CIFAR10_Data/cifar10", train=is_train, download=False, transform=transform, ), batch_size=batch_size if is_train else test_batch, shuffle=True, num_workers=4, pin_memory=True ) return train_loader class Classifier(nn.Module): def __init__(self, in_c, in_size, n_classes): super(Classifier, self).__init__() self.layers = nn.Sequential( nn.Conv2d(in_c, 16, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2), # 1/2 nn.Conv2d(16, 32, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2), # 1/2 nn.Conv2d(32, 64, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2), # 1/2 ) scale = 2 ** 3 out_conv_size = in_size // scale self.fc = nn.Linear(64 * out_conv_size * out_conv_size, n_classes) def forward(self, x): x = self.layers(x) # print("x.shape: ", x.shape) x = x.view(x.size(0), -1) x = self.fc(x) return x def get_model(): # net = minst_model.Classifier(num_classes) # net = minst_model_bak.Classifier(num_classes) # net = reneXt_Net2_mnist.Classifier(input_ch, input_size, num_classes) # net = resneXt_Net2.Classifier(input_ch, input_size, num_classes) # net = reneXt_Net2.Classifier(input_ch, input_size, num_classes) net = Classifier(1, input_size, num_classes) return net def get_current_lr(optimizer): for param_group in optimizer.param_groups: return param_group['lr'] def get_model_parameters(model): """ 获取模型的总参数, 可训练参数, 模型大小MB :param model: :return: """ total_params = sum(param.numel() for param in model.parameters()) train_params = sum(param.numel() for param in model.parameters() if param.requires_grad) model_size = total_params * 4 / 1024 / 1024 return total_params, train_params, model_size def get_scheduler(optimizer): lr_policy = 'linear' if lr_policy == 'linear': start_epoch = 5 # 从 5 epoch 开始衰减 niter_decay = 2 # 每隔 2 epoch 衰减一次 def lambda_rule(epoch): lr_l = 1.0 - max(0, epoch - start_epoch) / float(niter_decay + 1) lr_l = max(0.000001, lr_l) return lr_l scheduler = lr_scheduler.LambdaLR(optimizer, lambda_rule) elif lr_policy == 'step': decay_step = 30 # 每过 30 epoch 衰减一次 decay_factor = 0.1 # 衰减因子, # 当epoch每过stop_size时, 学习率都变为初始学习率的gamma倍 # Assuming optimizer uses lr = 0.05 for all groups # lr = 0.05 if epoch < 30 # lr = 0.005 if 30 <= epoch < 60 # lr = 0.0005 if 60 <= epoch < 90 scheduler = lr_scheduler.StepLR(optimizer, step_size=decay_step, gamma=decay_factor) elif lr_policy == 'exp': decay_factor = 0.1 # 衰减因子, scheduler = lr_scheduler.ExponentialLR(optimizer, decay_factor, last_epoch=-1) elif lr_policy == 'plateau': # 当网络的评价指标不在提升的时候,可以通过降低网络的学习率来提高网络性能 # patience : 容忍网路的性能不提升的次数,高于这个次数就降低学习率 # factor : 学习率每次降低多少,new_lr = old_lr * factor # threshold : 测量新最佳值的阈值,仅关注重大变化。 # min_lr,学习率的下限 scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, min_lr=0.000001, mode='min', factor=0.2, threshold=0.01, patience=5) elif lr_policy == 'cosine': # 余弦退火学习率策略 """ 以余弦函数为周期,并在每个周期最大值时重新设置学习率。以初始学习率为最大学习率, 以 2∗Tmax2∗Tmax 为周期,在一个周期内先下降,后上升。 T_max(int) : 一次学习率周期的迭代次数,即 T_max 个 epoch 之后重新设置学习率。 eta_min : 最小学习率,即在一个周期中,学习率最小会下降到 eta_min,默认值为 0。 """ scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0.000001) else: return NotImplementedError('learning rate policy [%s] is not implemented', lr_policy) return scheduler def main(): global last_epoch # define netowrk net = get_model().cuda() total_params, train_params, model_size = get_model_parameters(net) print(" == total parameters: " + str(total_params)) print(" == train parameters: " + str(train_params)) print(" == total model size: %s MB" % model_size) criterion = nn.CrossEntropyLoss().cuda() # softmax loss optimizer = torch.optim.SGD(net.parameters(), lr=base_lr, momentum=0.9, weight_decay=weight_decay, nesterov=True) # define lr policy lr_adjust_scheduler = get_scheduler(optimizer) # load training data, do data augmentation and get data loader transform_train = TF.Compose([ TF.Resize((input_size,input_size)), TF.Grayscale(1), # # ================================================================================================ # TF.TenCrop(28), # this is a list of PIL Images # TF.Lambda(lambda crops: torch.stack([TF.ToTensor()(crop) for crop in crops])), # returns a 4D tensor # TF.Lambda(lambda crops: torch.stack([TF.Normalize([0.5], [0.5])(crop) for crop in crops])) # # ================================================================================================ TF.ToTensor(), TF.Normalize([0.5], [0.5]) ]) transform_test = TF.Compose([ TF.Resize((input_size, input_size)), TF.Grayscale(1), TF.ToTensor(), TF.Normalize([0.5], [0.5]) ]) train_loader = get_mnist_loader(transform_train, True) test_loader = get_mnist_loader(transform_test, False) # train_loader = get_cifar10_loader(transform_train, True) # test_loader = get_cifar10_loader(transform_test, False) print(" ======= Training =======\n") for epoch in range(last_epoch + 1, epochs): train(train_loader, net, criterion, optimizer, epoch) if epoch == 0 or (epoch + 1) % eval_freq == 0 or epoch == epochs - 1: validate(test_loader, net, criterion, optimizer, epoch) lr_adjust_scheduler.step() torch.cuda.empty_cache() def train(train_loader, net, criterion, optimizer, epoch): start = time.time() net.train() train_loss, correct, total = 0, 0, 0 print(" === Epoch: [{}/{}] === ".format(epoch + 1, epochs)) for batch_index, (inputs, targets) in enumerate(train_loader): # move tensor to GPU inputs, targets = inputs.cuda(), targets.cuda() # # ============================================================== # # batchs_size 变成原来的10倍。label也要做处理 # this_batch_size = targets.size(0) # bs, ncrops, c, h, w = inputs.size() # input is a 5d tensor, target is 2d # inputs = inputs.view(-1, c, h, w) # targets1 = torch.zeros([inputs.size(0)]).long().cuda() # for i in range(this_batch_size): # targets1[i * 10:(i + 1) * 10] = targets[i] # # # shuffle x & y # shuffle_idx = torch.randperm(targets1.size(0)) # inputs = inputs[shuffle_idx] # targets = targets1[shuffle_idx] # # ============================================================== outputs = net(inputs) loss = criterion(outputs, targets) # zero the gradient buffers optimizer.zero_grad() # backward loss.backward() # clip gradient nn.utils.clip_grad_norm_(net.parameters(), gradient_clip) # update weight optimizer.step() # count the loss and acc train_loss += loss.item() predicted = torch.argmax(outputs, dim=-1) total += targets.size(0) correct += predicted.eq(targets).sum().item() if (batch_index + 1) % 50 == 0: print(" == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}".format( batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer))) end = time.time() print(" == cost time: {:.4f}s".format(end - start)) train_loss = train_loss / (batch_index + 1) train_acc = correct / total return train_loss, train_acc def validate(test_loader, net, criterion, optimizer, epoch): global best_prec net.eval() test_loss, correct, total = 0, 0, 0 print(" === Validate ===".format(epoch + 1, epochs)) with torch.no_grad(): for batch_index, (inputs, targets) in enumerate(test_loader): inputs, targets = inputs.cuda(), targets.cuda() outputs = net(inputs) loss = criterion(outputs, targets) probs = F.softmax(outputs, dim=-1) test_loss += loss.item() predicted = torch.argmax(probs, dim=-1) total += targets.size(0) correct += predicted.eq(targets).sum().item() # save checkpoint state = { 'state_dict': net.state_dict(), 'last_epoch': epoch, 'optimizer': optimizer.state_dict(), } torch.save(state, save_model_path) print(" == test loss: {:.3f} | test acc: {:6.3f}%".format( test_loss / (batch_index + 1), 100.0 * correct / total)) test_loss = test_loss / (batch_index + 1) test_acc = correct / total return test_loss, test_acc def export_onnx(): model = get_model().cuda() assert osp.exists(save_model_path) checkpoint = torch.load(save_model_path) model.load_state_dict(checkpoint['state_dict'], strict=True) model.eval() dummy_input = torch.randn(1, input_ch, input_size, input_size).cuda() torch.onnx.export(model, dummy_input, onnx_model_path) from PIL import Image def print_mid_layers(): model = get_model().cuda() assert osp.exists(save_model_path) checkpoint = torch.load(save_model_path) model.load_state_dict(checkpoint['state_dict'], strict=True) model.eval() print('loaded is done.') root = '/home/lpadas1/share/HDD/jory.d/dataset' image_path = root + '/face_expression_dataset/others/download_faces_nolabels/17-0_1.bmp' assert osp.exists(image_path) transform_test = TF.Compose([ TF.Resize((input_size, input_size)), TF.Grayscale(1), TF.ToTensor(), TF.Normalize([0.5], [0.5]), ]) image_tensor = transform_test(Image.open(image_path).convert('RGB')) # # print(image_tensor.size()) image_tensor = torch.unsqueeze(image_tensor, 0).cuda().float() def hook(module, input, output): md_output = output.clone() md_output = torch.flatten(md_output)[:10] print(f'{module}: {md_output}') # 注册hook function for name, md in model.named_modules(): if name == '': continue if len(md._modules) > 0: continue else: md.register_forward_hook(hook) with torch.no_grad(): outputs = model(image_tensor) probs = torch.nn.functional.softmax(outputs, dim=-1) predicted = torch.argmax(probs, dim=-1) print('fc output: ', outputs) print('softmax output: ', probs) print('pred: ', predicted) if __name__ == '__main__': # print_mid_layers() # export_onnx() main() # net = Classifier(1,28,10) # input_shape = (1, 28, 28) # input = torch.randn(1, *input_shape) # out = net(input) # print(out)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人