点此进入CSDN

点此添加QQ好友 加载失败时会显示




huggingface vit训练CIFAR10数据集代码 ,可以改dataset训练自己的数据

 

上代码,使用hugging face fineturn vit模型

自己写的代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from transformers import ViTImageProcessor, ViTForImageClassification
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST,CIFAR10
from torchvision.transforms import ToTensor
from torchvision.models import resnet101
from tqdm import tqdm
   
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("mps") 
# torch.device("cpu")
   
# 加载 MNIST 数据集
train_dataset = CIFAR10(root="/data/xinyuuliu/datas", train=True, transform=ToTensor(), download=True)
test_dataset = CIFAR10(root="/data/xinyuuliu/datas", train=False, transform=ToTensor())
  
  
def collate_fn(batch):
    """
    对batch数据进行处理
    :param batch: [一个getitem的结果,getitem的结果,getitem的结果]
    :return: 元组
    """
    reviews,labels = zip(*batch)
    # print(reviews)
    # print(labels)
    # reviews = torch.Tensor(reviews)
    labels = torch.Tensor(labels)
  
    return reviews,labels
# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False,collate_fn=collate_fn)
  
# url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
# image = Image.open(requests.get(url, stream=True).raw)
  
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
model.config.classifier = 'mlp'
model.config.num_labels = 10
# print(model.get_output_embeddings)
# print(model.classifier)
model.classifier = nn.Linear(768,10)
print(model.classifier)
 
parameters = list(model.parameters())
for x in parameters[:-1]:
    x.requires_grad = False
 
model.to(device)
  
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
  
def train(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(dataloader, desc="Training"):
        # print(inputs)
        inputs = processor(images=inputs, return_tensors="pt")
        inputs['pixel_values'] = inputs['pixel_values'].to(device)
        labels = labels.to(device)
        # print(inputs['pixel_values'].shape)
        # print(labels.shape)
        optimizer.zero_grad()
  
        outputs = model(**inputs)
        logits = outputs.logits
  
        # print(logits,labels)
        loss = criterion(logits, labels.long())
        loss.backward()
        optimizer.step()
        # model predicts one of the 1000 ImageNet classes
        # predicted_class_idx = logits.argmax(-1).item()
        # print("Predicted class:", model.config.id2label[predicted_class_idx])
        running_loss += loss.item() * inputs['pixel_values'].size(0)
       
    epoch_loss = running_loss / len(dataloader.dataset)
    return epoch_loss
  
def evaluate(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Evaluating"):
            inputs = processor(images=inputs, return_tensors="pt")
            inputs['pixel_values'] = inputs['pixel_values'].to(device)
            labels = labels.to(device)
               
            outputs = model(**inputs)
            logits = outputs.logits
  
            predicted= logits.argmax(-1)
               
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
       
    accuracy = correct / total * 100
    return accuracy
  
  
# 训练和评估
num_epochs = 10
   
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss = train(model, train_loader, optimizer, criterion)
    print(f"Training Loss: {train_loss:.4f}")
  
    test_acc = evaluate(model, test_loader)
    print(f"Test Accuracy: {test_acc:.2f}%")

  

chatgpt生成的代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from transformers import ViTModel, ViTForImageClassification
from tqdm import tqdm
 
# 设置随机种子
torch.manual_seed(42)
 
# 定义超参数
batch_size = 32
num_epochs = 10
learning_rate = 1e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
# 数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
 
# 加载CIFAR-10数据集
train_dataset = CIFAR10(root='/data/xinyuuliu/datas', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='/data/xinyuuliu/datas', train=False, download=True, transform=transform)
 
# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
 
# 加载预训练的ViT模型
vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224').to(device)
 
# 替换分类头
num_classes = 10
vit_model.config.classifier = 'mlp'
vit_model.config.num_labels = num_classes
vit_model.classifier = nn.Linear(vit_model.config.hidden_size, num_classes).to(device)
 
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vit_model.parameters(), lr=learning_rate)
 
# 微调ViT模型
for epoch in range(num_epochs):
    print("epoch:",epoch)
    vit_model.train()
    train_loss = 0.0
    train_correct = 0
 
    bar = tqdm(train_loader,total=len(train_loader))
    for images, labels in bar:
        images = images.to(device)
        labels = labels.to(device)
 
        # 前向传播
        outputs = vit_model(images)
        loss = criterion(outputs.logits, labels)
 
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
 
        train_loss += loss.item()
        _, predicted = torch.max(outputs.logits, 1)
        train_correct += (predicted == labels).sum().item()
 
    # 在训练集上计算准确率
    train_accuracy = 100.0 * train_correct / len(train_dataset)
 
    # 在测试集上进行评估
    vit_model.eval()
    test_loss = 0.0
    test_correct = 0
 
    with torch.no_grad():
        bar = tqdm(test_loader,total=len(test_loader))
        for images, labels in bar:
            images = images.to(device)
            labels = labels.to(device)
 
            outputs = vit_model(images)
            loss = criterion(outputs.logits, labels)
 
            test_loss += loss.item()
            _, predicted = torch.max(outputs.logits, 1)
            test_correct += (predicted == labels).sum().item()
 
    # 在测试集上计算准确率
    test_accuracy = 100.0 * test_correct / len(test_dataset)
 
    # 打印每个epoch的训练损失、训练准确率和测试准确率
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%')

  

posted @   高颜值的殺生丸  阅读(836)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?

作者信息

昵称:

刘新宇

园龄:4年6个月


粉丝:1209


QQ:522414928

点击右上角即可分享
微信分享提示