PointNet笔记
可能遇到的问题
在windows上运行pointnet的代码时,可能会遇到一些问题:
1.比如提示OSError: no file with expected extension, 这是因为可视化的show3d_balls.py文件运行不了, 具体的解决方法可以看这篇文章:https://blog.51cto.com/u_16213693/7738038。
2.由于作者的pointnet所用的pytorch版本比较低,可能会出现:AttributeError: module ‘distutils‘ has no attribute ‘version的报错,具体的解决方法可以参考https://blog.csdn.net/qq_42076902/article/details/129261266。
代码详解
点云数据集加载部分不在这里过多解释,具体的注释代码请见
从网络结构图可以知道,分割网络其实也用到了分类网络的一部分。拿分割部分来说,程序的入口在文件 train_segmentation.py 中,其中seger = PointNetDenseCls(k=num_classes, feature_transform=opt.feature_transform)
这段代码代表了初始化一个分割模型,接下来我们进入到这个模型中去查看具体细节。
按住Ctrl+指针点击PointNetDenseCls,进入到 model.py 中,再看PointNetDenseCls这个类之前,我们先根据这个文件中定义的类把整个网络架构梳理一遍:首先我们先看图中的蓝色部分,也就是分类网络,这时进入到PointNetfeat这个类中查看,代码和注释如下:
# 这是一个三维旋转变换网络
class STN3d(nn.Module):
def __init__(self):
super(STN3d, self).__init__()
# 使用大小为1*1的卷积核进行升维
self.conv1 = torch.nn.Conv1d(3, 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
self.fc1 = nn.Linear(1024, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, 9)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.bn4 = nn.BatchNorm1d(512)
self.bn5 = nn.BatchNorm1d(256)
# 网络向前传播
def forward(self, x):
# 获取点的数量,假设输入的x是(32, 3, 2500)
batchsize = x.size()[0] # 32
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x))) # shape:(32, 1024, 2500)
x = torch.max(x, 2, keepdim=True)[0] # shape:(32, 1024, 1)
x = x.view(-1, 1024) # (32, 1024)
x = F.relu(self.bn4(self.fc1(x)))
x = F.relu(self.bn5(self.fc2(x)))
x = self.fc3(x)
# 下面这段代码用于计算旋转平移矩阵,但是后续的论文证明这个方法并无太大意义,故不做太多解释
iden = Variable(torch.from_numpy(np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]).astype(np.float32))).view(1, 9).repeat(
batchsize, 1)
if x.is_cuda:
iden = iden.cuda()
x = x + iden
x = x.view(-1, 3, 3)
return x
# k维度旋转变换网络,与上面的大同小异。
class STNkd(nn.Module):
def __init__(self, k=64):
super(STNkd, self).__init__()
self.conv1 = torch.nn.Conv1d(k, 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
self.fc1 = nn.Linear(1024, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, k * k)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.bn4 = nn.BatchNorm1d(512)
self.bn5 = nn.BatchNorm1d(256)
self.k = k
def forward(self, x):
batchsize = x.size()[0]
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x = torch.max(x, 2, keepdim=True)[0]
x = x.view(-1, 1024)
x = F.relu(self.bn4(self.fc1(x)))
x = F.relu(self.bn5(self.fc2(x)))
x = self.fc3(x)
iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1, self.k * self.k).repeat(
batchsize, 1)
if x.is_cuda:
iden = iden.cuda()
x = x + iden
x = x.view(-1, self.k, self.k)
return x
# 分类网络的前面的部分
class PointNetfeat(nn.Module):
def __init__(self, global_feat=True, feature_transform=False):
# 表示调用PointNetfeat的父类nn.Module的init方法
super(PointNetfeat, self).__init__()
# 定义一个变换网络
self.stn = STN3d()
self.conv1 = torch.nn.Conv1d(3, 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.global_feat = global_feat
self.feature_transform = feature_transform
# 是否要用到特征变换网络?
if self.feature_transform:
self.fstn = STNkd(k=64)
def forward(self, x): #(32, 3, 2500)
n_pts = x.size()[2] # 2500
trans = self.stn(x) # 学习一个特征变换网络
x = x.transpose(2, 1) # (32, 2500, 3)
x = torch.bmm(x, trans) # 批量矩阵乘法,trans为(32, 3, 3)
x = x.transpose(2, 1) # 再交换回来 (32, 3, 2500)
x = F.relu(self.bn1(self.conv1(x)))
# 是否运用到特征变换? 如果用了,下面的步骤与前面的逻辑相同
if self.feature_transform:
trans_feat = self.fstn(x)
x = x.transpose(2, 1)
x = torch.bmm(x, trans_feat)
x = x.transpose(2, 1)
else:
trans_feat = None
pointfeat = x
x = F.relu(self.bn2(self.conv2(x)))
x = self.bn3(self.conv3(x))
# 应用最大池化操作,变为(32, 1024, 1)
x = torch.max(x, 2, keepdim=True)[0]
x = x.view(-1, 1024)
if self.global_feat:
return x, trans, trans_feat
else:
x = x.view(-1, 1024, 1).repeat(1, 1, n_pts) # 变为 (32, 1024, 2500)
return torch.cat([x, pointfeat], 1), trans, trans_feat
# 分类网络的最终部分
class PointNetCls(nn.Module):
def __init__(self, k=2, feature_transform=False):
super(PointNetCls, self).__init__()
self.feature_transform = feature_transform
self.feat = PointNetfeat(global_feat=True, feature_transform=feature_transform)
self.fc1 = nn.Linear(1024, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, k)
self.dropout = nn.Dropout(p=0.3)
self.bn1 = nn.BatchNorm1d(512)
self.bn2 = nn.BatchNorm1d(256)
self.relu = nn.ReLU()
def forward(self, x):
x, trans, trans_feat = self.feat(x)
x = F.relu(self.bn1(self.fc1(x)))
x = F.relu(self.bn2(self.dropout(self.fc2(x))))
x = self.fc3(x)
return F.log_softmax(x, dim=1), trans, trans_feat
# 这个类是分割部分总架构
class PointNetDenseCls(nn.Module):
def __init__(self, k=2, feature_transform=False):
super(PointNetDenseCls, self).__init__()
self.k = k # k代表类别个数,默认值是2
self.feature_transform = feature_transform
self.feat = PointNetfeat(global_feat=False, feature_transform=feature_transform)
self.conv1 = torch.nn.Conv1d(1088, 512, 1) # 1024 + 64
self.conv2 = torch.nn.Conv1d(512, 256, 1)
self.conv3 = torch.nn.Conv1d(256, 128, 1)
self.conv4 = torch.nn.Conv1d(128, self.k, 1)
self.bn1 = nn.BatchNorm1d(512)
self.bn2 = nn.BatchNorm1d(256)
self.bn3 = nn.BatchNorm1d(128)
def forward(self, x):
batchsize = x.size()[0]
n_pts = x.size()[2]
x, trans, trans_feat = self.feat(x)
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x = self.conv4(x)
x = x.transpose(2, 1).contiguous()
x = F.log_softmax(x.view(-1, self.k), dim=-1) # 表示在最后一个维度上应用 对数softmax函数
x = x.view(batchsize, n_pts, self.k)
return x, trans, trans_feat
train_segmentation.py的注释
from __future__ import print_function
import argparse
import sys
sys.path.append("../")
import os
import random
import torch
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from pointnet.dataset import ShapeNetDataset
from pointnet.model import PointNetDenseCls, feature_transform_regularizer
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if __name__ == '__main__':
# 创建命令行解析器
parser = argparse.ArgumentParser()
parser.add_argument(
'--batchSize', type=int, default=16, help='input batch size')
parser.add_argument(
'--workers', type=int, help='number of data loading workers', default=4)
parser.add_argument(
'--nepoch', type=int, default=25, help='number of epochs to train for')
parser.add_argument('--outf', type=str, default='seg', help='output folder')
parser.add_argument('--model', type=str, default='', help='model path')
parser.add_argument('--dataset', type=str, required=True, help="dataset path")
parser.add_argument('--class_choice', type=str, default='Chair', help="class_choice")
parser.add_argument('--feature_transform', action='store_true', help="use feature transform")
# 解析输入的命令行参数,并存储在opt中
opt = parser.parse_args()
print(opt)
# 为opt对象添加一个manualSeed属性,将其设置为1-10000之间的一个随机值。
opt.manualSeed = random.randint(1, 10000)
# 这一行代码使用生成的随机种子来初始化 Python 内置的 random 模块的随机数生成器
random.seed(opt.manualSeed)
# 这一行代码使用生成的随机种子来初始化 PyTorch 的随机数生成器
torch.manual_seed(opt.manualSeed)
# 创建一个数据集实例(进入到ShapeNetDataset)中继续看
dataset = ShapeNetDataset(
root=opt.dataset,
classification=False,
class_choice=[opt.class_choice])
# 数据集加载器
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=opt.batchSize,
shuffle=True,
num_workers=int(opt.workers))
test_dataset = ShapeNetDataset(
root=opt.dataset,
classification=False,
class_choice=[opt.class_choice],
split='test',
data_augmentation=False)
testdataloader = torch.utils.data.DataLoader(
test_dataset,
batch_size=opt.batchSize,
shuffle=True,
num_workers=int(opt.workers))
print(len(dataset), len(test_dataset))
num_classes = dataset.num_seg_classes
print('classes', num_classes)
try:
os.makedirs(opt.outf)
except OSError:
pass
blue = lambda x: '\033[94m' + x + '\033[0m'
# 初始化分割模型,PointNetDenseCls为模型
seger = PointNetDenseCls(k=num_classes, feature_transform=opt.feature_transform)
# 是否有预训练的模型?
if opt.model != '':
seger.load_state_dict(torch.load(opt.model))
# 使用Adam优化算法创建一个优化器
optimizer = optim.Adam(seger.parameters(), lr=0.001, betas=(0.9, 0.999))
# 创建学习率调度器
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
seger.cuda()
num_batch = len(dataset) / opt.batchSize
# 开始训练模型
for epoch in range(opt.nepoch):
scheduler.step()
for i, data in enumerate(dataloader, 0):
# 从数据中解析出点和标签
points, target = data
# 交换维度,便于后续操作
points = points.transpose(2, 1)
# 利用GPU来学习
points, target = points.cuda(), target.cuda()
# 清零优化器中所有模型参数的梯度。这是因为梯度在 PyTorch 中默认是累加的。
optimizer.zero_grad()
# 将模型设置为训练模式,这将启用模型中的训练相关操作,如 Dropout。
seger = seger.train()
# 进行向前传播,得到预测值等信息。
pred, trans, trans_feat = seger(points)
pred = pred.view(-1, num_classes)
target = target.view(-1, 1)[:, 0] - 1
# 计算损失,使用负对数似然损失
loss = F.nll_loss(pred, target)
if opt.feature_transform:
loss += feature_transform_regularizer(trans_feat) * 0.001
# 反向传播
loss.backward()
# 使用优化器更新模型参数,根据计算的梯度进行优化。
optimizer.step()
# 得到每个样本预测类别的索引
pred_choice = pred.data.max(1)[1]
# 得到正确预测的样本数量
correct = pred_choice.eq(target.data).cpu().sum()
print('[%d: %d/%d] train loss: %f accuracy: %f' % (
epoch, i, num_batch, loss.item(), correct.item() / float(opt.batchSize * 2500)))
# 测试部分
if i % 10 == 0:
j, data = next(enumerate(testdataloader, 0))
points, target = data
points = points.transpose(2, 1)
points, target = points.cuda(), target.cuda()
seger = seger.eval()
pred, _, _ = seger(points)
pred = pred.view(-1, num_classes)
target = target.view(-1, 1)[:, 0] - 1
loss = F.nll_loss(pred, target)
pred_choice = pred.data.max(1)[1]
correct = pred_choice.eq(target.data).cpu().sum()
print('[%d: %d/%d] %s loss: %f accuracy: %f' % (
epoch, i, num_batch, blue('test'), loss.item(), correct.item() / float(opt.batchSize * 2500)))
torch.save(seger.state_dict(), '%s/seg_model_%s_%d.pth' % (opt.outf, opt.class_choice, epoch))
# benchmark mIOU
# 初始化一个空列表,用于存储每个形状的Iou值
shape_ious = []
for i, data in tqdm(enumerate(testdataloader, 0)):
points, target = data
points = points.transpose(2, 1)
points, target = points.cuda(), target.cuda()
# 将模型seger设置为评估模式,以确保模型在推理时的行为与训练时不同,例如关闭dropout等
seger = seger.eval()
pred, _, _ = seger(points)
pred_choice = pred.data.max(2)[1]
pred_np = pred_choice.cpu().data.numpy()
target_np = target.cpu().data.numpy() - 1
for shape_idx in range(target_np.shape[0]):
parts = range(num_classes) # np.unique(target_np[shape_idx])
part_ious = []
for part in parts:
I = np.sum(np.logical_and(pred_np[shape_idx] == part, target_np[shape_idx] == part))
U = np.sum(np.logical_or(pred_np[shape_idx] == part, target_np[shape_idx] == part))
if U == 0:
iou = 1 # If the union of groundtruth and prediction points is empty, then count part IoU as 1
else:
iou = I / float(U)
part_ious.append(iou)
shape_ious.append(np.mean(part_ious))
print("mIOU for class {}: {}".format(opt.class_choice, np.mean(shape_ious)))
dataset.py文件中的shapenetdataset类的注释
# ShapeNetDataset继承自data.Dataset
class ShapeNetDataset(data.Dataset):
def __init__(self,
root,
npoints=2500,
classification=False,
class_choice=None,
split='train',
data_augmentation=True):
self.npoints = npoints
self.root = root
self.catfile = os.path.join(self.root, 'synsetoffset2category.txt')
self.cat = {}
self.data_augmentation = data_augmentation
self.classification = classification
self.seg_classes = {}
# 注意这里的class_choice默认是Chair
with open(self.catfile, 'r') as f:
for line in f:
# 按照空格分割 例如:Chair 03001627
ls = line.strip().split()
# 字典 key value
self.cat[ls[0]] = ls[1]
# 这是一个字典推导式,过滤 self.cat 字典,只保留那些键(k)在 class_choice 列表中的键值对。在这里,只取Chair
if class_choice is not None:
self.cat = {k: v for k, v in self.cat.items() if k in class_choice}
# 键值对互换 03001627 Chair
self.id2cat = {v: k for k, v in self.cat.items()}
self.meta = {}
splitfile = os.path.join(self.root, 'train_test_split', 'shuffled_{}_file_list.json'.format(split))
# 从json文件中解析数据
filelist = json.load(open(splitfile, 'r'))
# 默认情况下item为键
for item in self.cat:
self.meta[item] = []
for file in filelist:
_, category, uuid = file.split('/') #例如 _ , 03001627, 7fe08cd7a9b76c1dcbde89e0c48a01bf
if category in self.cat.values():
self.meta[self.id2cat[category]].append((os.path.join(self.root, category, 'points', uuid + '.pts'),
os.path.join(self.root, category, 'points_label',
uuid + '.seg')))
self.datapath = []
# 下面的item是名称而不是数字
for item in self.cat:
for fn in self.meta[item]:
self.datapath.append((item, fn[0], fn[1]))
# zip(['Chair', 'Lamp', 'Table'], [0, 1, 2])
# ->
# [('Chair', 0), ('Lamp', 1), ('Table', 2)]
# ->
# {
# 'Chair': 0,
# 'Lamp': 1,
# 'Table': 2
# }
# 以上是下面这行代码的注释, 最终classes是一个字典,结果为{'Chair' : 0}
self.classes = dict(zip(sorted(self.cat), range(len(self.cat))))
# print(self.classes)
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../misc/num_seg_classes.txt'), 'r') as f:
for line in f:
ls = line.strip().split()
self.seg_classes[ls[0]] = int(ls[1])
self.num_seg_classes = self.seg_classes[list(self.cat.keys())[0]]
print(self.seg_classes, self.num_seg_classes)
def __getitem__(self, index):
fn = self.datapath[index]
cls = self.classes[self.datapath[index][0]]
point_set = np.loadtxt(fn[1]).astype(np.float32)
seg = np.loadtxt(fn[2]).astype(np.int64)
# print(point_set.shape, seg.shape)
# 重新采样点集,数量为self.npoints
choice = np.random.choice(len(seg), self.npoints, replace=True)
# 注意这里point_set是二维数组,第二维为3
point_set = point_set[choice, :]
# 将点云进行中心化处理,中心化是将点云质心移动到原点, 注意这里axis=0意为列,并不是行,np.mean(point_set, axis=0)
# 计算的是每一列的均值,结果是一个形状为 (D,) 的数组(原数组为(N,D)),np.expand_dims(..., 0) 将这个均值数组扩展一个维度,
# 变成一个形状为 (1, D) 的数组。这个操作是为了与 point_set 进行广播运算。结果是每个点都减去了整体的质心,使得新的点云数据中心在原点。
point_set = point_set - np.expand_dims(np.mean(point_set, axis=0), 0)
# np.sum(point_set ** 2, axis=1) 计算每个点的平方和,结果是一个形状为 (N,) 的数组,其中 N 是点的数量。例如,如果 point_set
# 中的一个点是 [x, y, z],则平方和为 x^2 + y^2 + z^2。
# np.sqrt(...) 计算这些平方和的平方根,得到每个点到原点的欧氏距离。
# np.max(...) 找到这些距离中的最大值, 原作者在np.max函数中加了个0,但是这应该是不必要的,去掉。
dist = np.max(np.sqrt(np.sum(point_set ** 2, axis=1)))
# 归一化点云数据 point_set / dist 将 point_set 中的每个点除以最大距离 dist,结果是将所有点缩放到单位球内,使得最大距离为1。
point_set = point_set / dist
# 数据增强,通过随机旋转和抖动点云数据,使模型在训练过程中能够见到更多样化的数据,从而提高模型的泛化能力。
if self.data_augmentation:
# 生成随机旋转角度,范围为(0, 2π)
theta = np.random.uniform(0, np.pi * 2)
# 通过生成的随机角度 theta 构建一个二维旋转矩阵。旋转矩阵用于在二维平面上旋转点。这里的旋转是针对 x 和 z 轴进行的
# (在三维空间中忽略了 y 轴),因为点云的旋转一般在水平面上更常见。
rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
# 选择点云数据的 x 和 z 维度(即列索引 0 和 2),并对它们应用旋转矩阵进行旋转。
# point_set[:, [0, 2]] 选择 point_set 中所有点的 x 和 z 坐标。
# dot(rotation_matrix) 对选中的坐标应用旋转矩阵进行旋转。
point_set[:, [0, 2]] = point_set[:, [0, 2]].dot(rotation_matrix) # random rotation
# 通过向点云数据添加随机噪声,进一步增强数据的多样性。
# np.random.normal(0, 0.02, size=point_set.shape) 生成与 point_set 形状相同的随机噪声,噪声服从均值为 0、标准差为 0.02 的正态分布。
# point_set += ... 将生成的随机噪声添加到原始点云数据中。
point_set += np.random.normal(0, 0.02, size=point_set.shape) # random jitter
seg = seg[choice]
point_set = torch.from_numpy(point_set)
seg = torch.from_numpy(seg)
cls = torch.from_numpy(np.array([cls]).astype(np.int64))
if self.classification:
return point_set, cls
else:
return point_set, seg
def __len__(self):
return len(self.datapath)