PointNet笔记

1.PointNet笔记2024-07-18

2.PointNet++笔记2024-08-11

可能遇到的问题

在windows上运行pointnet的代码时，可能会遇到一些问题：
1.比如提示OSError: no file with expected extension，这是因为可视化的show3d_balls.py文件运行不了，具体的解决方法可以看这篇文章：https://blog.51cto.com/u_16213693/7738038。
2.由于作者的pointnet所用的pytorch版本比较低，可能会出现：AttributeError: module ‘distutils‘ has no attribute ‘version的报错，具体的解决方法可以参考https://blog.csdn.net/qq_42076902/article/details/129261266。

代码详解

点云数据集加载部分不在这里过多解释，具体的注释代码请见

从网络结构图可以知道，分割网络其实也用到了分类网络的一部分。拿分割部分来说，程序的入口在文件 train_segmentation.py 中，其中seger = PointNetDenseCls(k=num_classes, feature_transform=opt.feature_transform) 这段代码代表了初始化一个分割模型，接下来我们进入到这个模型中去查看具体细节。
按住Ctrl＋指针点击PointNetDenseCls，进入到 model.py 中，再看PointNetDenseCls这个类之前，我们先根据这个文件中定义的类把整个网络架构梳理一遍：首先我们先看图中的蓝色部分，也就是分类网络，这时进入到PointNetfeat这个类中查看，代码和注释如下：

# 这是一个三维旋转变换网络
class STN3d(nn.Module):
    def __init__(self):
        super(STN3d, self).__init__()
        # 使用大小为1*1的卷积核进行升维
        self.conv1 = torch.nn.Conv1d(3, 64, 1)
        self.conv2 = torch.nn.Conv1d(64, 128, 1)
        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 9)
        self.relu = nn.ReLU()

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)

    # 网络向前传播
    def forward(self, x):
        # 获取点的数量，假设输入的x是(32, 3, 2500)
        batchsize = x.size()[0] # 32
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x))) # shape:(32, 1024, 2500)
        x = torch.max(x, 2, keepdim=True)[0] # shape:(32, 1024, 1)
        x = x.view(-1, 1024) # (32, 1024)

        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)
        # 下面这段代码用于计算旋转平移矩阵，但是后续的论文证明这个方法并无太大意义，故不做太多解释
        iden = Variable(torch.from_numpy(np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]).astype(np.float32))).view(1, 9).repeat(
            batchsize, 1)
        if x.is_cuda:
            iden = iden.cuda()
        x = x + iden
        x = x.view(-1, 3, 3)
        return x

# k维度旋转变换网络，与上面的大同小异。
class STNkd(nn.Module):
    def __init__(self, k=64):
        super(STNkd, self).__init__()
        self.conv1 = torch.nn.Conv1d(k, 64, 1)
        self.conv2 = torch.nn.Conv1d(64, 128, 1)
        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, k * k)
        self.relu = nn.ReLU()

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)

        self.k = k

    def forward(self, x):
        batchsize = x.size()[0]
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = torch.max(x, 2, keepdim=True)[0]
        x = x.view(-1, 1024)

        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)

        iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1, self.k * self.k).repeat(
            batchsize, 1)
        if x.is_cuda:
            iden = iden.cuda()
        x = x + iden
        x = x.view(-1, self.k, self.k)
        return x


# 分类网络的前面的部分
class PointNetfeat(nn.Module):
    def __init__(self, global_feat=True, feature_transform=False):
        # 表示调用PointNetfeat的父类nn.Module的init方法
        super(PointNetfeat, self).__init__()
        # 定义一个变换网络
        self.stn = STN3d()
        self.conv1 = torch.nn.Conv1d(3, 64, 1)
        self.conv2 = torch.nn.Conv1d(64, 128, 1)
        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.global_feat = global_feat
        self.feature_transform = feature_transform
        # 是否要用到特征变换网络？
        if self.feature_transform:
            self.fstn = STNkd(k=64)

    def forward(self, x): #(32, 3, 2500)
        n_pts = x.size()[2] # 2500
        trans = self.stn(x) # 学习一个特征变换网络
        x = x.transpose(2, 1) # (32, 2500, 3)
        x = torch.bmm(x, trans) # 批量矩阵乘法，trans为(32, 3, 3)
        x = x.transpose(2, 1) # 再交换回来 (32, 3, 2500)
        x = F.relu(self.bn1(self.conv1(x)))

        # 是否运用到特征变换？ 如果用了，下面的步骤与前面的逻辑相同
        if self.feature_transform:
            trans_feat = self.fstn(x)
            x = x.transpose(2, 1)
            x = torch.bmm(x, trans_feat)
            x = x.transpose(2, 1)
        else:
            trans_feat = None

        pointfeat = x
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.bn3(self.conv3(x))
        # 应用最大池化操作，变为(32, 1024, 1)
        x = torch.max(x, 2, keepdim=True)[0]
        x = x.view(-1, 1024)
        if self.global_feat:
            return x, trans, trans_feat
        else:
            x = x.view(-1, 1024, 1).repeat(1, 1, n_pts) # 变为 (32, 1024, 2500)
            return torch.cat([x, pointfeat], 1), trans, trans_feat


# 分类网络的最终部分
class PointNetCls(nn.Module):
    def __init__(self, k=2, feature_transform=False):
        super(PointNetCls, self).__init__()
        self.feature_transform = feature_transform
        self.feat = PointNetfeat(global_feat=True, feature_transform=feature_transform)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, k)
        self.dropout = nn.Dropout(p=0.3)
        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.relu = nn.ReLU()

    def forward(self, x):
        x, trans, trans_feat = self.feat(x)
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.bn2(self.dropout(self.fc2(x))))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1), trans, trans_feat


# 这个类是分割部分总架构
class PointNetDenseCls(nn.Module):
    def __init__(self, k=2, feature_transform=False):
        super(PointNetDenseCls, self).__init__()
        self.k = k # k代表类别个数，默认值是2
        self.feature_transform = feature_transform
        self.feat = PointNetfeat(global_feat=False, feature_transform=feature_transform)
        self.conv1 = torch.nn.Conv1d(1088, 512, 1) # 1024 + 64
        self.conv2 = torch.nn.Conv1d(512, 256, 1)
        self.conv3 = torch.nn.Conv1d(256, 128, 1)
        self.conv4 = torch.nn.Conv1d(128, self.k, 1)
        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.bn3 = nn.BatchNorm1d(128)

    def forward(self, x):
        batchsize = x.size()[0]
        n_pts = x.size()[2]
        x, trans, trans_feat = self.feat(x)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.conv4(x)
        x = x.transpose(2, 1).contiguous()
        x = F.log_softmax(x.view(-1, self.k), dim=-1) # 表示在最后一个维度上应用 对数softmax函数
        x = x.view(batchsize, n_pts, self.k)
        return x, trans, trans_feat

train_segmentation.py的注释

from __future__ import print_function
import argparse
import sys

sys.path.append("../")
import os
import random
import torch
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from pointnet.dataset import ShapeNetDataset
from pointnet.model import PointNetDenseCls, feature_transform_regularizer
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


if __name__ == '__main__':
    # 创建命令行解析器
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--batchSize', type=int, default=16, help='input batch size')
    parser.add_argument(
        '--workers', type=int, help='number of data loading workers', default=4)
    parser.add_argument(
        '--nepoch', type=int, default=25, help='number of epochs to train for')
    parser.add_argument('--outf', type=str, default='seg', help='output folder')
    parser.add_argument('--model', type=str, default='', help='model path')
    parser.add_argument('--dataset', type=str, required=True, help="dataset path")
    parser.add_argument('--class_choice', type=str, default='Chair', help="class_choice")
    parser.add_argument('--feature_transform', action='store_true', help="use feature transform")

    # 解析输入的命令行参数，并存储在opt中
    opt = parser.parse_args()
    print(opt)

    # 为opt对象添加一个manualSeed属性，将其设置为1-10000之间的一个随机值。
    opt.manualSeed = random.randint(1, 10000)
    # 这一行代码使用生成的随机种子来初始化 Python 内置的 random 模块的随机数生成器
    random.seed(opt.manualSeed)
    # 这一行代码使用生成的随机种子来初始化 PyTorch 的随机数生成器
    torch.manual_seed(opt.manualSeed)

    # 创建一个数据集实例(进入到ShapeNetDataset)中继续看
    dataset = ShapeNetDataset(
        root=opt.dataset,
        classification=False,
        class_choice=[opt.class_choice])
    # 数据集加载器
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=opt.batchSize,
        shuffle=True,
        num_workers=int(opt.workers))

    test_dataset = ShapeNetDataset(
        root=opt.dataset,
        classification=False,
        class_choice=[opt.class_choice],
        split='test',
        data_augmentation=False)
    testdataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=opt.batchSize,
        shuffle=True,
        num_workers=int(opt.workers))

    print(len(dataset), len(test_dataset))
    num_classes = dataset.num_seg_classes
    print('classes', num_classes)
    try:
        os.makedirs(opt.outf)
    except OSError:
        pass

    blue = lambda x: '\033[94m' + x + '\033[0m'

    # 初始化分割模型，PointNetDenseCls为模型
    seger = PointNetDenseCls(k=num_classes, feature_transform=opt.feature_transform)

    # 是否有预训练的模型？
    if opt.model != '':
        seger.load_state_dict(torch.load(opt.model))

    # 使用Adam优化算法创建一个优化器
    optimizer = optim.Adam(seger.parameters(), lr=0.001, betas=(0.9, 0.999))

    # 创建学习率调度器
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    seger.cuda()

    num_batch = len(dataset) / opt.batchSize

    # 开始训练模型
    for epoch in range(opt.nepoch):
        scheduler.step()
        for i, data in enumerate(dataloader, 0):
            # 从数据中解析出点和标签
            points, target = data
            # 交换维度，便于后续操作
            points = points.transpose(2, 1)
            # 利用GPU来学习
            points, target = points.cuda(), target.cuda()
            # 清零优化器中所有模型参数的梯度。这是因为梯度在 PyTorch 中默认是累加的。
            optimizer.zero_grad()
            # 将模型设置为训练模式，这将启用模型中的训练相关操作，如 Dropout。
            seger = seger.train()
            # 进行向前传播，得到预测值等信息。
            pred, trans, trans_feat = seger(points)
            pred = pred.view(-1, num_classes)
            target = target.view(-1, 1)[:, 0] - 1
            # 计算损失，使用负对数似然损失
            loss = F.nll_loss(pred, target)
            if opt.feature_transform:
                loss += feature_transform_regularizer(trans_feat) * 0.001
            # 反向传播
            loss.backward()
            # 使用优化器更新模型参数，根据计算的梯度进行优化。
            optimizer.step()
            # 得到每个样本预测类别的索引
            pred_choice = pred.data.max(1)[1]
            # 得到正确预测的样本数量
            correct = pred_choice.eq(target.data).cpu().sum()
            print('[%d: %d/%d] train loss: %f accuracy: %f' % (
                epoch, i, num_batch, loss.item(), correct.item() / float(opt.batchSize * 2500)))

            # 测试部分
            if i % 10 == 0:
                j, data = next(enumerate(testdataloader, 0))
                points, target = data
                points = points.transpose(2, 1)
                points, target = points.cuda(), target.cuda()
                seger = seger.eval()
                pred, _, _ = seger(points)
                pred = pred.view(-1, num_classes)
                target = target.view(-1, 1)[:, 0] - 1
                loss = F.nll_loss(pred, target)
                pred_choice = pred.data.max(1)[1]
                correct = pred_choice.eq(target.data).cpu().sum()
                print('[%d: %d/%d] %s loss: %f accuracy: %f' % (
                    epoch, i, num_batch, blue('test'), loss.item(), correct.item() / float(opt.batchSize * 2500)))

        torch.save(seger.state_dict(), '%s/seg_model_%s_%d.pth' % (opt.outf, opt.class_choice, epoch))

# benchmark mIOU
    # 初始化一个空列表，用于存储每个形状的Iou值
    shape_ious = []
    for i, data in tqdm(enumerate(testdataloader, 0)):
        points, target = data
        points = points.transpose(2, 1)
        points, target = points.cuda(), target.cuda()
        # 将模型seger设置为评估模式，以确保模型在推理时的行为与训练时不同，例如关闭dropout等
        seger = seger.eval()
        pred, _, _ = seger(points)
        pred_choice = pred.data.max(2)[1]

        pred_np = pred_choice.cpu().data.numpy()
        target_np = target.cpu().data.numpy() - 1

        for shape_idx in range(target_np.shape[0]):
            parts = range(num_classes)  # np.unique(target_np[shape_idx])
            part_ious = []
            for part in parts:
                I = np.sum(np.logical_and(pred_np[shape_idx] == part, target_np[shape_idx] == part))
                U = np.sum(np.logical_or(pred_np[shape_idx] == part, target_np[shape_idx] == part))
                if U == 0:
                    iou = 1  # If the union of groundtruth and prediction points is empty, then count part IoU as 1
                else:
                    iou = I / float(U)
                part_ious.append(iou)
            shape_ious.append(np.mean(part_ious))

    print("mIOU for class {}: {}".format(opt.class_choice, np.mean(shape_ious)))

dataset.py文件中的shapenetdataset类的注释

# ShapeNetDataset继承自data.Dataset
class ShapeNetDataset(data.Dataset):
    def __init__(self,
                 root,
                 npoints=2500,
                 classification=False,
                 class_choice=None,
                 split='train',
                 data_augmentation=True):
        self.npoints = npoints
        self.root = root
        self.catfile = os.path.join(self.root, 'synsetoffset2category.txt')
        self.cat = {}
        self.data_augmentation = data_augmentation
        self.classification = classification
        self.seg_classes = {}

        # 注意这里的class_choice默认是Chair

        with open(self.catfile, 'r') as f:
            for line in f:
                # 按照空格分割  例如:Chair 03001627
                ls = line.strip().split()
                # 字典 key value
                self.cat[ls[0]] = ls[1]

        # 这是一个字典推导式，过滤 self.cat 字典，只保留那些键（k）在 class_choice 列表中的键值对。在这里，只取Chair
        if class_choice is not None:
            self.cat = {k: v for k, v in self.cat.items() if k in class_choice}

        # 键值对互换  03001627 Chair
        self.id2cat = {v: k for k, v in self.cat.items()}

        self.meta = {}
        splitfile = os.path.join(self.root, 'train_test_split', 'shuffled_{}_file_list.json'.format(split))
        # 从json文件中解析数据
        filelist = json.load(open(splitfile, 'r'))
        # 默认情况下item为键
        for item in self.cat:
            self.meta[item] = []

        for file in filelist:
            _, category, uuid = file.split('/') #例如 _ ,  03001627, 7fe08cd7a9b76c1dcbde89e0c48a01bf
            if category in self.cat.values():
                self.meta[self.id2cat[category]].append((os.path.join(self.root, category, 'points', uuid + '.pts'),
                                                         os.path.join(self.root, category, 'points_label',
                                                                      uuid + '.seg')))

        self.datapath = []
        # 下面的item是名称而不是数字
        for item in self.cat:
            for fn in self.meta[item]:
                self.datapath.append((item, fn[0], fn[1]))
        # zip(['Chair', 'Lamp', 'Table'], [0, 1, 2])
        # ->
        # [('Chair', 0), ('Lamp', 1), ('Table', 2)]
        # ->
        # {
        #     'Chair': 0,
        #     'Lamp': 1,
        #     'Table': 2
        # }
        # 以上是下面这行代码的注释, 最终classes是一个字典，结果为{'Chair' : 0}
        self.classes = dict(zip(sorted(self.cat), range(len(self.cat))))
        # print(self.classes)

        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../misc/num_seg_classes.txt'), 'r') as f:
            for line in f:
                ls = line.strip().split()
                self.seg_classes[ls[0]] = int(ls[1])
        self.num_seg_classes = self.seg_classes[list(self.cat.keys())[0]]
        print(self.seg_classes, self.num_seg_classes)

    def __getitem__(self, index):
        fn = self.datapath[index]
        cls = self.classes[self.datapath[index][0]]
        point_set = np.loadtxt(fn[1]).astype(np.float32)
        seg = np.loadtxt(fn[2]).astype(np.int64)
        # print(point_set.shape, seg.shape)

        # 重新采样点集，数量为self.npoints
        choice = np.random.choice(len(seg), self.npoints, replace=True)
        # 注意这里point_set是二维数组，第二维为3
        point_set = point_set[choice, :]

        # 将点云进行中心化处理，中心化是将点云质心移动到原点， 注意这里axis=0意为列，并不是行，np.mean(point_set, axis=0)
        # 计算的是每一列的均值，结果是一个形状为 (D,) 的数组(原数组为(N,D))，np.expand_dims(..., 0) 将这个均值数组扩展一个维度，
        # 变成一个形状为 (1, D) 的数组。这个操作是为了与 point_set 进行广播运算。结果是每个点都减去了整体的质心，使得新的点云数据中心在原点。
        point_set = point_set - np.expand_dims(np.mean(point_set, axis=0), 0)

        # np.sum(point_set ** 2, axis=1) 计算每个点的平方和，结果是一个形状为 (N,) 的数组，其中 N 是点的数量。例如，如果 point_set
        # 中的一个点是 [x, y, z]，则平方和为 x^2 + y^2 + z^2。
        # np.sqrt(...) 计算这些平方和的平方根，得到每个点到原点的欧氏距离。
        # np.max(...) 找到这些距离中的最大值, 原作者在np.max函数中加了个0，但是这应该是不必要的，去掉。
        dist = np.max(np.sqrt(np.sum(point_set ** 2, axis=1)))

        # 归一化点云数据 point_set / dist 将 point_set 中的每个点除以最大距离 dist，结果是将所有点缩放到单位球内，使得最大距离为1。
        point_set = point_set / dist

        # 数据增强，通过随机旋转和抖动点云数据，使模型在训练过程中能够见到更多样化的数据，从而提高模型的泛化能力。
        if self.data_augmentation:
            # 生成随机旋转角度，范围为(0, 2π)
            theta = np.random.uniform(0, np.pi * 2)
            # 通过生成的随机角度 theta 构建一个二维旋转矩阵。旋转矩阵用于在二维平面上旋转点。这里的旋转是针对 x 和 z 轴进行的
            # （在三维空间中忽略了 y 轴），因为点云的旋转一般在水平面上更常见。
            rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
            # 选择点云数据的 x 和 z 维度（即列索引 0 和 2），并对它们应用旋转矩阵进行旋转。
            # point_set[:, [0, 2]] 选择 point_set 中所有点的 x 和 z 坐标。
            # dot(rotation_matrix) 对选中的坐标应用旋转矩阵进行旋转。
            point_set[:, [0, 2]] = point_set[:, [0, 2]].dot(rotation_matrix)  # random rotation
            # 通过向点云数据添加随机噪声，进一步增强数据的多样性。
            # np.random.normal(0, 0.02, size=point_set.shape) 生成与 point_set 形状相同的随机噪声，噪声服从均值为 0、标准差为 0.02 的正态分布。
            # point_set += ... 将生成的随机噪声添加到原始点云数据中。
            point_set += np.random.normal(0, 0.02, size=point_set.shape)  # random jitter

        seg = seg[choice]
        point_set = torch.from_numpy(point_set)
        seg = torch.from_numpy(seg)
        cls = torch.from_numpy(np.array([cls]).astype(np.int64))

        if self.classification:
            return point_set, cls
        else:
            return point_set, seg

    def __len__(self):
        return len(self.datapath)