cuda实践3
share memory 使用
template <int BLOCK_SIZE> __global__ void caculateShelter_cuda( float *uv_triangulation_0, float *uv_triangulation, float *w_triangulation, float *w_triangulation_center, float *position_panorama_vect, int triangule_num_d, int panorama_num_d, int imgHeight_d, int imgWidth_d, float dis_threshold, int*inside_ptr_d, int* result) { int distance_threshold = dis_threshold; int times = triangule_num_d; int num = imgHeight_d * imgWidth_d; int x = threadIdx.x; int y = threadIdx.y; int Row = blockIdx.y * BLOCK_SIZE + threadIdx.y; int Col = blockIdx.x * BLOCK_SIZE + threadIdx.x; //int blockId = blockIdx.x + blockIdx.y * gridDim.x; //int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x; float searchPoint[2]; searchPoint[0] = Col * 1.0 / imgWidth_d; searchPoint[1] = (imgHeight_d - Row) * 1.0 / imgHeight_d; int threadId = Row * imgWidth_d + Col; int inside = -1; int index_temp = -1; int step = BLOCK_SIZE * BLOCK_SIZE; int grid_size = (times / step) + 1; for (int t = 0; t < grid_size; t++) { __shared__ float tile_uv[BLOCK_SIZE*BLOCK_SIZE * 6]; if ((t*step + BLOCK_SIZE * y + x) < times) { tile_uv[BLOCK_SIZE * y * 6 + x] = uv_triangulation_0[t*step + BLOCK_SIZE * y + x]; //注意越域 tile_uv[BLOCK_SIZE * y * 6 + 1 * BLOCK_SIZE + x] = uv_triangulation_0[1 * times + t * step + BLOCK_SIZE * y + x]; tile_uv[BLOCK_SIZE * y * 6 + 2 * BLOCK_SIZE + x] = uv_triangulation_0[2 * times + t * step + BLOCK_SIZE * y + x]; tile_uv[BLOCK_SIZE * y * 6 + 3 * BLOCK_SIZE + x] = uv_triangulation_0[3 * times + t * step + BLOCK_SIZE * y + x]; tile_uv[BLOCK_SIZE * y * 6 + 4 * BLOCK_SIZE + x] = uv_triangulation_0[4 * times + t * step + BLOCK_SIZE * y + x]; tile_uv[BLOCK_SIZE * y * 6 + 5 * BLOCK_SIZE + x] = uv_triangulation_0[5 * times + t * step + BLOCK_SIZE * y + x]; } else { tile_uv[BLOCK_SIZE * y * 6 + x] = 0.0; //注意越域 tile_uv[BLOCK_SIZE * y * 6 + 1 * BLOCK_SIZE + x] = 0.0; tile_uv[BLOCK_SIZE * y * 6 + 2 * BLOCK_SIZE + x] = 0.0; tile_uv[BLOCK_SIZE * y * 6 + 3 * BLOCK_SIZE + x] = 0.0; tile_uv[BLOCK_SIZE * y * 6 + 4 * BLOCK_SIZE + x] = 0.0; tile_uv[BLOCK_SIZE * y * 6 + 5 * BLOCK_SIZE + x] = 0.0; } __syncthreads(); for (int k = 0; k < step; ++k) { int indexy = k / BLOCK_SIZE; int indexx = k % BLOCK_SIZE; float A[2], B[2], C[2]; A[0] = tile_uv[BLOCK_SIZE * indexy * 6 + indexx]; A[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 1 * BLOCK_SIZE + indexx]; B[0] = tile_uv[BLOCK_SIZE * indexy * 6 + 2 * BLOCK_SIZE + indexx]; B[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 3 * BLOCK_SIZE + indexx]; C[0] = tile_uv[BLOCK_SIZE * indexy * 6 + 4 * BLOCK_SIZE + indexx]; C[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 5 * BLOCK_SIZE + indexx]; if ((t*step+k) >= times) { break; } bool inornot = pointInTriangle_cuda(A, B, C, searchPoint); if (inornot && inside==-1 && inside==-1) //inside /on { index_temp = t * step + k; inside = 0; break; } } __syncthreads(); ////////////////////////////////////////////// //float A[2], B[2], C[2]; //A[0] = uv_triangulation_0[t]; //A[1] = uv_triangulation_0[1 * times + t]; //B[0] = uv_triangulation_0[2 * times + t]; //B[1] = uv_triangulation_0[3 * times + t]; //C[0] = uv_triangulation_0[4 * times + t]; //C[1] = uv_triangulation_0[5 * times + t]; //if (pointInTriangle_cuda(A, B, C, searchPoint)) //inside /on //{ // index_temp = t; // inside = 0; // break; //} } inside_ptr_d[2 * threadId] = inside; inside_ptr_d[2 * threadId + 1] = index_temp; if (inside == 0 && threadId < num) { int tr_index = index_temp; float pt3d[3]; float uv_triangulation_temp[6]; uv_triangulation_temp[0] = uv_triangulation[6 * tr_index]; uv_triangulation_temp[1] = uv_triangulation[6 * tr_index + 1]; uv_triangulation_temp[2] = uv_triangulation[6 * tr_index + 2]; uv_triangulation_temp[3] = uv_triangulation[6 * tr_index + 3]; uv_triangulation_temp[4] = uv_triangulation[6 * tr_index + 4]; uv_triangulation_temp[5] = uv_triangulation[6 * tr_index + 5]; float w_triangulation_temp[9]; w_triangulation_temp[0] = w_triangulation[9 * tr_index]; w_triangulation_temp[1] = w_triangulation[9 * tr_index + 1]; w_triangulation_temp[2] = w_triangulation[9 * tr_index + 2]; w_triangulation_temp[3] = w_triangulation[9 * tr_index + 3]; w_triangulation_temp[4] = w_triangulation[9 * tr_index + 4]; w_triangulation_temp[5] = w_triangulation[9 * tr_index + 5]; w_triangulation_temp[6] = w_triangulation[9 * tr_index + 6]; w_triangulation_temp[7] = w_triangulation[9 * tr_index + 7]; w_triangulation_temp[8] = w_triangulation[9 * tr_index + 8]; caculateMappingTriangle2dTo3d(uv_triangulation_temp, searchPoint, w_triangulation_temp, pt3d); // rewrite in cuda int not_in_shelter_num = 0; //Shelter check for (int m = 0; m < panorama_num_d; m++) { if (not_in_shelter_num >= PanoramaNUM) { break; } // add threshold ditance (pt3d to optical_center) //... // add threshold angle of triangle face normal and vector(pt3d to optical_center) //... float optical_center[3]; optical_center[0] = position_panorama_vect[m * 3]; optical_center[1] = position_panorama_vect[m * 3 +1]; optical_center[2] = position_panorama_vect[m * 3 +2]; //caculate shelter //caculate near triangles of the ray int ret0 = 0; float redius_ = distance3d(optical_center, pt3d); if (redius_ > distance_threshold) { continue; } float mid_pt[3]; mid_pt[0] = (optical_center[0] + pt3d[0])*0.5; mid_pt[1] = (optical_center[1] + pt3d[1])*0.5; mid_pt[2] = (optical_center[2] + pt3d[2])*0.5; for (int i = 0; i < times; i++) { float triangle[9]; triangle[0] = w_triangulation[9 * i]; triangle[1] = w_triangulation[9 * i + 1]; triangle[2] = w_triangulation[9 * i + 2]; triangle[3] = w_triangulation[9 * i + 3]; triangle[4] = w_triangulation[9 * i + 4]; triangle[5] = w_triangulation[9 * i + 5]; triangle[6] = w_triangulation[9 * i + 6]; triangle[7] = w_triangulation[9 * i + 7]; triangle[8] = w_triangulation[9 * i + 8]; float pt_temp[3]; pt_temp[0] = w_triangulation_center[3 * i]; pt_temp[1] = w_triangulation_center[3 * i + 1]; pt_temp[2] = w_triangulation_center[3 * i + 2]; float temp1 = distance3d(pt_temp, mid_pt); if (temp1 > 0.45*redius_ || i == tr_index) { continue; } int ret1 = rayTracingShelterCaculate_cuda2(pt3d, optical_center, triangle); if (ret1 == 0) // in shelter { ret0 = -1; break; } } if (ret0 == 0) { result[threadId*PanoramaNUM + not_in_shelter_num] = m+1; //not in shelter not_in_shelter_num++; } } } }