Voxel R-CNN 代码解析
1. 网络主体架构
以voxel_rcnn_car.yaml
为例。
主要包括:
- VFE(体素编码网络),这里采用下
MeanVFE
; - BACKBONE_3D,这里采用
VoxelBackBone8x
; - MAP_TO_BEV,这里采用
HeightCompression
,NUM_BEV_FEATURES
为256; - BACKBONE_2D,这里采用
BaseBEVBackbone
; - DENSE_HEAD,这里采用
AnchorHeadSingle
; - ROI_HEAD,这里采用
VoxelRCNNHead
;
2. VFE 模块
这里采用的MeanVFE, 这里的depth_downsample_factor
为None,因为在cfgs/dataset_configs/kitti_dataset.yaml
中没有定义。
MeanVFE的实现:
这里
3. BACKBONE_3D 模块
这里采用VoxelBackBone8x
class VoxelBackBone8x(nn.Module):
def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
super().__init__()
self.model_cfg = model_cfg
norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)
self.sparse_shape = grid_size[::-1] + [1, 0, 0] # grid_size (1408,1600,40) input_channels : 4
self.conv_input = spconv.SparseSequential(
spconv.SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'),
norm_fn(16),
nn.ReLU(),
) # 定义了一个稀疏卷积
block = post_act_block
self.conv1 = spconv.SparseSequential(
block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1'),
)
self.conv2 = spconv.SparseSequential(
# [1600, 1408, 41] <- [800, 704, 21]
block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'),
block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
)
self.conv3 = spconv.SparseSequential(
# [800, 704, 21] <- [400, 352, 11]
block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
)
self.conv4 = spconv.SparseSequential(
# [400, 352, 11] <- [200, 176, 5]
block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
)
last_pad = 0
last_pad = self.model_cfg.get('last_pad', last_pad)
self.conv_out = spconv.SparseSequential(
# [200, 150, 5] -> [200, 150, 2]
spconv.SparseConv3d(64, 128, (3, 1, 1), stride=(2, 1, 1), padding=last_pad,
bias=False, indice_key='spconv_down2'),
norm_fn(128),
nn.ReLU(),
)
self.num_point_features = 128
self.backbone_channels = {
'x_conv1': 16,
'x_conv2': 32,
'x_conv3': 64,
'x_conv4': 64
}
def forward(self, batch_dict):
"""
Args:
batch_dict:
batch_size: int
vfe_features: (num_voxels, C)
voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
Returns:
batch_dict:
encoded_spconv_tensor: sparse tensor
"""
voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords']
batch_size = batch_dict['batch_size']
input_sp_tensor = spconv.SparseConvTensor(
features=voxel_features,
indices=voxel_coords.int(),
spatial_shape=self.sparse_shape,
batch_size=batch_size
)
x = self.conv_input(input_sp_tensor)
x_conv1 = self.conv1(x)
x_conv2 = self.conv2(x_conv1)
x_conv3 = self.conv3(x_conv2)
x_conv4 = self.conv4(x_conv3)
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out = self.conv_out(x_conv4)
batch_dict.update({
'encoded_spconv_tensor': out,
'encoded_spconv_tensor_stride': 8
})
batch_dict.update({
'multi_scale_3d_features': {
'x_conv1': x_conv1,
'x_conv2': x_conv2,
'x_conv3': x_conv3,
'x_conv4': x_conv4,
}
})
batch_dict.update({
'multi_scale_3d_strides': {
'x_conv1': 1,
'x_conv2': 2,
'x_conv3': 4,
'x_conv4': 8,
}
})
return batch_dict
关于3d稀疏卷积的理解,可以看https://zhuanlan.zhihu.com/p/382365889
4. MAP_to_BEV 模块
采用HeightCompression
, 输入通道数为256
5. 2D BACKBONE2D 模块
首先经过两个卷积块,通道数变换256->64->128;
两个卷积块的输出分别经过两个反卷积,步长分别为1和2,然后输出通道均为128,经过拼接为256通道作为2d backbone提取特征,接下来送入到dense_head
6. dense head
在AnchorHeadTemplate
中完成dense head 以及对应loss的建立:
首先是box_coder
, 这里选择ResidualCoder
,之后通过AnchorGenerator 生成候选锚点,之后再建立损失函数(包含分类损失,box回归损失,方向损失)。
6.1 AnchorGenerator
class AnchorGenerator(object):
def __init__(self, anchor_range, anchor_generator_config):
super().__init__()
self.anchor_generator_cfg = anchor_generator_config
self.anchor_range = anchor_range # array([ 0. , -40. , -3. , 70.4, 40. , 1. ], dtype=float32)
self.anchor_sizes = [config['anchor_sizes'] for config in anchor_generator_config]
self.anchor_rotations = [config['anchor_rotations'] for config in anchor_generator_config] # anchor旋转角
self.anchor_heights = [config['anchor_bottom_heights'] for config in anchor_generator_config] # anchor高度
self.align_center = [config.get('align_center', False) for config in anchor_generator_config] # 是否将anchor的中心对齐到网格的中心
assert len(self.anchor_sizes) == len(self.anchor_rotations) == len(self.anchor_heights)
self.num_of_anchor_sets = len(self.anchor_sizes)
def generate_anchors(self, grid_sizes):
assert len(grid_sizes) == self.num_of_anchor_sets #grid_sizes [array([176, 200])] 代表预设的锚点网格,每个格子生成一个anchor
all_anchors = []
num_anchors_per_location = []
for grid_size, anchor_size, anchor_rotation, anchor_height, align_center in zip(
grid_sizes, self.anchor_sizes, self.anchor_rotations, self.anchor_heights, self.align_center):
num_anchors_per_location.append(len(anchor_rotation) * len(anchor_size) * len(anchor_height)) # 每个位置的anchor数量
if align_center: # anchor对齐网格的中心, 更关注网格中心的对象
x_stride = (self.anchor_range[3] - self.anchor_range[0]) / grid_size[0]
y_stride = (self.anchor_range[4] - self.anchor_range[1]) / grid_size[1]
x_offset, y_offset = x_stride / 2, y_stride / 2
else: # 均匀划分为grid_size[0] -1 和 grid_size[1] - 1, 对于边缘的对象可能有更好的覆盖
x_stride = (self.anchor_range[3] - self.anchor_range[0]) / (grid_size[0] - 1)
y_stride = (self.anchor_range[4] - self.anchor_range[1]) / (grid_size[1] - 1)
x_offset, y_offset = 0, 0
x_shifts = torch.arange(
self.anchor_range[0] + x_offset, self.anchor_range[3] + 1e-5, step=x_stride, dtype=torch.float32,
).cuda() # anchor的x轴上的坐标(真实坐标)
y_shifts = torch.arange(
self.anchor_range[1] + y_offset, self.anchor_range[4] + 1e-5, step=y_stride, dtype=torch.float32,
).cuda() # anchor在y轴上的分布
z_shifts = x_shifts.new_tensor(anchor_height) # 锚点的高度是固定的
num_anchor_size, num_anchor_rotation = anchor_size.__len__(), anchor_rotation.__len__()
anchor_rotation = x_shifts.new_tensor(anchor_rotation) # anchor的旋转角度
anchor_size = x_shifts.new_tensor(anchor_size) # 锚点的大小
x_shifts, y_shifts, z_shifts = torch.meshgrid([
x_shifts, y_shifts, z_shifts
]) # [x_grid, y_grid, z_grid] 得到对应anchor的x,y,z轴坐标
anchors = torch.stack((x_shifts, y_shifts, z_shifts), dim=-1) # [x, y, z, 3]
anchors = anchors[:, :, :, None, :].repeat(1, 1, 1, anchor_size.shape[0], 1) # [176,200,1,1,3]
anchor_size = anchor_size.view(1, 1, 1, -1, 3).repeat([*anchors.shape[0:3], 1, 1]) # [176,200,1,1,3]
anchors = torch.cat((anchors, anchor_size), dim=-1)
anchors = anchors[:, :, :, :, None, :].repeat(1, 1, 1, 1, num_anchor_rotation, 1) # 增加旋转角维度
anchor_rotation = anchor_rotation.view(1, 1, 1, 1, -1, 1).repeat([*anchors.shape[0:3], num_anchor_size, 1, 1])
anchors = torch.cat((anchors, anchor_rotation), dim=-1) # [x, y, z, num_size, num_rot, 7]
anchors = anchors.permute(2, 1, 0, 3, 4, 5).contiguous() # [z, y, x, num_size, num_rot, 7] torch.Size([1, 200, 176, 1, 2, 7])
#anchors = anchors.view(-1, anchors.shape[-1])
anchors[..., 2] += anchors[..., 5] / 2 # shift to box centers 7个特征为x,y,z, anchor_sizes, anchor_rotation, 让锚点落在box中心
all_anchors.append(anchors)
return all_anchors, num_anchors_per_location
如果你觉得博客内容有帮助,请收藏书签。
版权声明:转载文章之后必须在文章页面给出原文连接(创意共享3.0许可证)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· 单线程的Redis速度为什么快?
· 展开说说关于C#中ORM框架的用法!
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
· SQL Server 2025 AI相关能力初探