CUDA 3D点云的 VoxelFilter
参考PointPiler中实现
先
cudaMalloc 初始化device变量
cudaMemset 赋初值
cudaMemcpy devicetohost
// points点云
// voxels体素滤波点云
// 每个voxel中点云数量
// 滤波voxel大小
// points点云个数
// Dx Dy Dz横纵垂坐标点云刻度范围(固定大小)
// min_x 横坐标滤波范围最小值(固定大小如前方60m为 0-60)
// min_y 固定大小如左右方20m为 -20-20
__global__ void voxelize_kernel(float* points, float* voxels, int* voxel_count,
float leaf_size, int num_points, int Dx, int Dy,
int Dz, float min_x, float min_y, float min_z) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < num_points) {
int hx = std::floor((points[3 * idx] - min_x) / leaf_size);
int hy = std::floor((points[3 * idx + 1] - min_y) / leaf_size);
int hz = std::floor((points[3 * idx + 2] - min_z) / leaf_size);
if (hx >= 0 && hx < Dx && hy >= 0 && hy < Dy && hz >= 0 && hz < Dz) {
int count = atomicAdd(&voxel_count[hx + Dx * hy + Dx * Dy * hz], 1);
if (count < MAX_POINTS_PER_V) {
int ind{(hx + Dx * hy + Dx * Dy * hz) * MAX_POINTS_PER_V + count};
voxels[3 * ind] = points[3 * idx];
voxels[3 * ind + 1] = points[3 * idx + 1];
voxels[3 * ind + 2] = points[3 * idx + 2];
}
}
}
__syncthreads();
}
// voxel_count voxel中点云数量
// voxel_counts voxel计数值
__global__ void avg(float* voxels_in, float* voxels_out,
int* voxel_count, int* voxel_counts, int n) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < n) {
if (voxel_count[idx] > 3) {
int count = atomicAdd(&voxel_counts[0], 1);
for (int j{0}; j < MAX_POINTS_PER_V; j++) {
voxels_out[3 * count] += voxels_in[3 * (idx * MAX_POINTS_PER_V + j)];
voxels_out[3 * count + 1] += voxels_in[3 * (idx * MAX_POINTS_PER_V + j) + 1];
voxels_out[3 * count + 2] += voxels_in[3 * (idx * MAX_POINTS_PER_V + j) + 2];
}
if (voxel_count[idx] > MAX_POINTS_PER_V) {
voxels_out[3 * count] /= MAX_POINTS_PER_V;
voxels_out[3 * count + 1] /= MAX_POINTS_PER_V;
voxels_out[3 * count + 2] /= MAX_POINTS_PER_V;
} else {
voxels_out[3 * count] /= voxel_count[idx];
voxels_out[3 * count + 1] /= voxel_count[idx];
voxels_out[3 * count + 2] /= voxel_count[idx];
}
}
}
}