huggingface vit训练CIFAR10数据集代码 ,可以改dataset训练自己的数据
上代码,使用hugging face fineturn vit模型
自己写的代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | from transformers import ViTImageProcessor, ViTForImageClassification from PIL import Image import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader from torchvision.datasets import MNIST,CIFAR10 from torchvision.transforms import ToTensor from torchvision.models import resnet101 from tqdm import tqdm # 设置设备 device = torch.device( "cuda" if torch.cuda.is_available() else "cpu" ) #device = torch.device("mps") # torch.device("cpu") # 加载 MNIST 数据集 train_dataset = CIFAR10(root = "/data/xinyuuliu/datas" , train = True , transform = ToTensor(), download = True ) test_dataset = CIFAR10(root = "/data/xinyuuliu/datas" , train = False , transform = ToTensor()) def collate_fn(batch): """ 对batch数据进行处理 :param batch: [一个getitem的结果,getitem的结果,getitem的结果] :return: 元组 """ reviews,labels = zip ( * batch) # print(reviews) # print(labels) # reviews = torch.Tensor(reviews) labels = torch.Tensor(labels) return reviews,labels # 创建数据加载器 train_loader = DataLoader(train_dataset, batch_size = 32 , shuffle = True ,collate_fn = collate_fn) test_loader = DataLoader(test_dataset, batch_size = 32 , shuffle = False ,collate_fn = collate_fn) # url = 'http://images.cocodataset.org/val2017/000000039769.jpg' # image = Image.open(requests.get(url, stream=True).raw) processor = ViTImageProcessor.from_pretrained( 'google/vit-base-patch16-224' ) model = ViTForImageClassification.from_pretrained( 'google/vit-base-patch16-224' ) model.config.classifier = 'mlp' model.config.num_labels = 10 # print(model.get_output_embeddings) # print(model.classifier) model.classifier = nn.Linear( 768 , 10 ) print (model.classifier) parameters = list (model.parameters()) for x in parameters[: - 1 ]: x.requires_grad = False model.to(device) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr = 0.0001 ) def train(model, dataloader, optimizer, criterion): model.train() running_loss = 0.0 for inputs, labels in tqdm(dataloader, desc = "Training" ): # print(inputs) inputs = processor(images = inputs, return_tensors = "pt" ) inputs[ 'pixel_values' ] = inputs[ 'pixel_values' ].to(device) labels = labels.to(device) # print(inputs['pixel_values'].shape) # print(labels.shape) optimizer.zero_grad() outputs = model( * * inputs) logits = outputs.logits # print(logits,labels) loss = criterion(logits, labels. long ()) loss.backward() optimizer.step() # model predicts one of the 1000 ImageNet classes # predicted_class_idx = logits.argmax(-1).item() # print("Predicted class:", model.config.id2label[predicted_class_idx]) running_loss + = loss.item() * inputs[ 'pixel_values' ].size( 0 ) epoch_loss = running_loss / len (dataloader.dataset) return epoch_loss def evaluate(model, dataloader): model. eval () correct = 0 total = 0 with torch.no_grad(): for inputs, labels in tqdm(dataloader, desc = "Evaluating" ): inputs = processor(images = inputs, return_tensors = "pt" ) inputs[ 'pixel_values' ] = inputs[ 'pixel_values' ].to(device) labels = labels.to(device) outputs = model( * * inputs) logits = outputs.logits predicted = logits.argmax( - 1 ) total + = labels.size( 0 ) correct + = (predicted = = labels). sum ().item() accuracy = correct / total * 100 return accuracy # 训练和评估 num_epochs = 10 for epoch in range (num_epochs): print (f "Epoch {epoch+1}/{num_epochs}" ) train_loss = train(model, train_loader, optimizer, criterion) print (f "Training Loss: {train_loss:.4f}" ) test_acc = evaluate(model, test_loader) print (f "Test Accuracy: {test_acc:.2f}%" ) |
chatgpt生成的代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | import torch import torch.nn as nn import torch.optim as optim import torchvision.transforms as transforms from torch.utils.data import DataLoader from torchvision.datasets import CIFAR10 from transformers import ViTModel, ViTForImageClassification from tqdm import tqdm # 设置随机种子 torch.manual_seed( 42 ) # 定义超参数 batch_size = 32 num_epochs = 10 learning_rate = 1e - 4 device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu' ) # 数据预处理 transform = transforms.Compose([ transforms.Resize(( 224 , 224 )), transforms.ToTensor(), ]) # 加载CIFAR-10数据集 train_dataset = CIFAR10(root = '/data/xinyuuliu/datas' , train = True , download = True , transform = transform) test_dataset = CIFAR10(root = '/data/xinyuuliu/datas' , train = False , download = True , transform = transform) # 创建数据加载器 train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True ) test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False ) # 加载预训练的ViT模型 vit_model = ViTForImageClassification.from_pretrained( 'google/vit-base-patch16-224' ).to(device) # 替换分类头 num_classes = 10 vit_model.config.classifier = 'mlp' vit_model.config.num_labels = num_classes vit_model.classifier = nn.Linear(vit_model.config.hidden_size, num_classes).to(device) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(vit_model.parameters(), lr = learning_rate) # 微调ViT模型 for epoch in range (num_epochs): print ( "epoch:" ,epoch) vit_model.train() train_loss = 0.0 train_correct = 0 bar = tqdm(train_loader,total = len (train_loader)) for images, labels in bar: images = images.to(device) labels = labels.to(device) # 前向传播 outputs = vit_model(images) loss = criterion(outputs.logits, labels) # 反向传播和优化 optimizer.zero_grad() loss.backward() optimizer.step() train_loss + = loss.item() _, predicted = torch. max (outputs.logits, 1 ) train_correct + = (predicted = = labels). sum ().item() # 在训练集上计算准确率 train_accuracy = 100.0 * train_correct / len (train_dataset) # 在测试集上进行评估 vit_model. eval () test_loss = 0.0 test_correct = 0 with torch.no_grad(): bar = tqdm(test_loader,total = len (test_loader)) for images, labels in bar: images = images.to(device) labels = labels.to(device) outputs = vit_model(images) loss = criterion(outputs.logits, labels) test_loss + = loss.item() _, predicted = torch. max (outputs.logits, 1 ) test_correct + = (predicted = = labels). sum ().item() # 在测试集上计算准确率 test_accuracy = 100.0 * test_correct / len (test_dataset) # 打印每个epoch的训练损失、训练准确率和测试准确率 print (f 'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%' ) |
多思考也是一种努力,做出正确的分析和选择,因为我们的时间和精力都有限,所以把时间花在更有价值的地方。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?