PointNet 分类部分复现

只用了modelnet40的第一个部分训练的

大致思路和图像一样,只不过用的conv1d,在数据集处理上也稍有不同

 

my_Dataset.py:

import numpy as np
import h5py
import torch
import random
import torchvision.transforms as transforms
from torch.utils import data


#dataset是否要做个转置呢并变成tensor
class Dataset(data.Dataset):
    def __init__(self, root):
        super(Dataset, self).__init__()
        file = h5py.File(root, 'r')
        self.data = file['data'][:]
        #label要展开并变成一维的
        self.label = file['label'][:].reshape(-1, file['label'].shape[0]).squeeze(0)
    def __getitem__(self, index):
        #一个点云坐标输入进来时是n * 3,不像图片是n * m * 3,点云坐标没有高度,x、y、z分别代表三个通道
        #图片用ToTensor变成3 * n * m, 点云坐标转置一下就行了,变成3 * n,然后再变成tensor
        return torch.tensor(self.data[index].T), self.label[index]
    def __len__(self):
        return len(self.label)
View Code

 

configuratiion.py:

import torch


class config():
    batch_size = 4
    num_epochs = 10
    num_classes = 40
    num_workers = 8
    checkpoints_root = 'C:/Users/Dell/PycharmProjects/PointNet/checkpoints'
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    log_dir = 'C:/Users/Dell/PycharmProjects/PointNet/checkpoints/log'
View Code

 

Model.py:

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
from configuration import config

device = 'cuda' if torch.cuda.is_available() else 'cpu'

con = config()

#T-Net:将点云传进来,生成一个矩阵,+E后return

class T_Net(nn.Module):
    def __init__(self, k):
        super().__init__()
        self.k = k
        self.conv1 = nn.Conv1d(self.k, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, self.k * self.k)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)


    def forward(self, x):
        bs = x.size(0)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.max_pool1d(x, x.size(-1))
        x = x.view(x.size(0), -1)
        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)
        #生成一个3 * 3单位矩阵E,并将其扩充为bs个3 * 3的,repeat中的两个1表示3 * 13 * 1,
        #即为两个系数k、m,使得扩充后行列分别为,行 * k, 列 * m
        E = torch.eye(self.k, requires_grad = True).repeat(bs, 1, 1)
        E.to(device)
        matrix = x.view(-1, self.k, self.k) + E
        return matrix


class PNet(nn.Module):
    def __init__(self):
        super(PNet, self).__init__()
        self.input_transform = T_Net(k = 3)
        self.feature_transform = T_Net(k = 64)
        self.conv1 = nn.Conv1d(3, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)

        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, con.num_classes)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)

        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        input_matrix = self.input_transform(x)
        x = torch.bmm(torch.transpose(x, 1, 2), input_matrix).transpose(1, 2)
        x = F.relu(self.bn1(self.conv1(x)))
        feature_matrix = self.feature_transform(x)
        x = torch.bmm(x.transpose(1, 2), feature_matrix).transpose(1, 2)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.bn3(self.conv3(x))
        #3 * n的点云坐标经过一系列卷积层之后,变成了1024 * n的
        #经过max_pool求1024维的每一维最大值,变成了1024 * 1
        #max_pool1d的第二个参数表示池化的范围,当然是n,-1表示size的倒数第一个
        x = F.max_pool1d(x, x.size(-1))

        x = x.view(x.size(0), -1)
        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.dropout(self.fc2(x))))
        x = self.fc3(x)
        return x, input_matrix, feature_matrix
View Code

test.py:

import torch
import torch.nn
import torch.utils.data.dataloader as Dataloader
from configuration import config
from my_Dataset import Dataset
from Model import PNet
import os


if __name__ == '__main__':
    con = config()
    model = PNet()
    checkpoint = torch.load(os.path.join(con.checkpoints_root, 'checkpoint_10.pkl'))
    model.load_state_dict(checkpoint['model'])
    model.to(con.device)
    dataset = Dataset('H:/DataSet/modelnet40_ply_hdf5_2048/ply_data_test0.h5')
    dataloader = Dataloader.DataLoader(dataset, batch_size=2, shuffle = True)
    cnt = 0
    for data, label in dataloader:
        data.to(con.device)
        output = model(data)[0]
        pred = torch.max(output, 1)[1]
        print(pred, label)
        cnt += 1
        if cnt == 20:
            break
View Code

train.py:

import torch
import torch.nn as nn
import torch.utils.data.dataloader as Dataloader
from tqdm import tqdm
from my_Dataset import Dataset
from Model import PNet
from configuration import config
import os
from tensorboardX import SummaryWriter


con = config()

def loss_funtion(output, label, input_matrix, featrue_matrix, alpha = 0.0001):
    loss = nn.CrossEntropyLoss()
    bs = output.size(0)
    E_3 = torch.eye(3, requires_grad = True).repeat(bs, 1, 1)
    E_64 = torch.eye(64, requires_grad = True).repeat(bs, 1, 1)
    E_3.to(con.device)
    E_64.to(con.device)
    diff3 = E_3 - torch.bmm(input_matrix, input_matrix.transpose(1, 2))
    diff64 = E_64 - torch.bmm(featrue_matrix, featrue_matrix.transpose(1, 2))
    #注意label要是int64类型
    label = label.type(torch.LongTensor)
    return loss(output, label) + alpha * (torch.norm(diff3) + torch.norm(diff64)) / float(bs)



if __name__ == '__main__':
    data_path = 'H:/DataSet/modelnet40_ply_hdf5_2048/ply_data_train0.h5'
    dataset = Dataset(data_path)
    dataloader = Dataloader.DataLoader(dataset, batch_size = con.batch_size, shuffle = True, num_workers = con.num_workers)
    model = PNet()
    model.to(con.device)
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
    tbwriter = SummaryWriter(logdir = con.log_dir)
    for epoch in range(con.num_epochs):
        total_loss = 0
        total_true = 0
        cnt = 0
        total_img = 0
        for data, label in tqdm(dataloader):
            data.to(con.device)
            label.to(con.device)
            optimizer.zero_grad()
            output, input_matrix, feature_matrix = model(data)
            loss_value = loss_funtion(output, label, input_matrix, feature_matrix)
            loss_value.backward()
            optimizer.step()
            print(type(output), output)
            pred = torch.max(output, 1)[1]
            total_true += torch.sum(pred == label)
            total_loss += loss_value
            cnt += 1
            total_img += len(label)
        tbwriter.add_scalar('Loss', total_loss / float(cnt), epoch)
        tbwriter.add_scalar('Accuracy', total_true / float(total_img), epoch)

        print('Loss:{:.4f}, Accuracy:{:.2f}'.format(total_loss / float(cnt), total_true / float(total_img)))
        if (epoch + 1) % 10 == 0:
            state = {
                'model': model.state_dict()
            }
            torch.save(state, os.path.join(con.checkpoints_root, 'checkpoint_{}.pkl'.format(epoch + 1)))


    print('Train Accepted')
View Code

 

posted @ 2021-10-04 21:56  WTSRUVF  阅读(119)  评论(0编辑  收藏  举报