cuda实践3

share memory 使用

 

template <int BLOCK_SIZE> __global__ void caculateShelter_cuda(
    float *uv_triangulation_0,
    float *uv_triangulation,
    float *w_triangulation,
    float *w_triangulation_center,
    float *position_panorama_vect, int triangule_num_d, int panorama_num_d, int imgHeight_d, int imgWidth_d, float dis_threshold, int*inside_ptr_d,  int* result)
{
    int distance_threshold = dis_threshold;
    int times = triangule_num_d;
    int num = imgHeight_d * imgWidth_d;

    int x = threadIdx.x;
    int y = threadIdx.y;
    int Row = blockIdx.y * BLOCK_SIZE + threadIdx.y;
    int Col = blockIdx.x * BLOCK_SIZE + threadIdx.x;
    //int blockId = blockIdx.x + blockIdx.y * gridDim.x;
    //int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
    float searchPoint[2];
    searchPoint[0] = Col * 1.0 / imgWidth_d;
    searchPoint[1] = (imgHeight_d - Row) * 1.0 / imgHeight_d;

    int threadId = Row * imgWidth_d + Col;

    int inside = -1;
    int index_temp = -1;
    int step = BLOCK_SIZE * BLOCK_SIZE;
    int grid_size = (times / step) + 1;

    for (int t = 0; t < grid_size; t++)
    {            
        __shared__ float tile_uv[BLOCK_SIZE*BLOCK_SIZE * 6];
        if ((t*step + BLOCK_SIZE * y + x) < times)
        {
            tile_uv[BLOCK_SIZE * y * 6 + x] = uv_triangulation_0[t*step + BLOCK_SIZE * y + x];    //注意越域
            tile_uv[BLOCK_SIZE * y * 6 + 1 * BLOCK_SIZE + x] = uv_triangulation_0[1 * times + t * step + BLOCK_SIZE * y + x];
            tile_uv[BLOCK_SIZE * y * 6 + 2 * BLOCK_SIZE + x] = uv_triangulation_0[2 * times + t * step + BLOCK_SIZE * y + x];
            tile_uv[BLOCK_SIZE * y * 6 + 3 * BLOCK_SIZE + x] = uv_triangulation_0[3 * times + t * step + BLOCK_SIZE * y + x];
            tile_uv[BLOCK_SIZE * y * 6 + 4 * BLOCK_SIZE + x] = uv_triangulation_0[4 * times + t * step + BLOCK_SIZE * y + x];
            tile_uv[BLOCK_SIZE * y * 6 + 5 * BLOCK_SIZE + x] = uv_triangulation_0[5 * times + t * step + BLOCK_SIZE * y + x];
        }
        else
        {
            tile_uv[BLOCK_SIZE * y * 6 + x] = 0.0;    //注意越域
            tile_uv[BLOCK_SIZE * y * 6 + 1 * BLOCK_SIZE + x] = 0.0;
            tile_uv[BLOCK_SIZE * y * 6 + 2 * BLOCK_SIZE + x] = 0.0;
            tile_uv[BLOCK_SIZE * y * 6 + 3 * BLOCK_SIZE + x] = 0.0;
            tile_uv[BLOCK_SIZE * y * 6 + 4 * BLOCK_SIZE + x] = 0.0;
            tile_uv[BLOCK_SIZE * y * 6 + 5 * BLOCK_SIZE + x] = 0.0;
        }
        __syncthreads();

        for (int k = 0; k < step; ++k)
        {
            int indexy = k / BLOCK_SIZE;
            int indexx = k % BLOCK_SIZE;

            float A[2], B[2], C[2];
            A[0] = tile_uv[BLOCK_SIZE * indexy * 6 + indexx];
            A[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 1 * BLOCK_SIZE + indexx];
            B[0] = tile_uv[BLOCK_SIZE * indexy * 6 + 2 * BLOCK_SIZE + indexx];
            B[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 3 * BLOCK_SIZE + indexx];
            C[0] = tile_uv[BLOCK_SIZE * indexy * 6 + 4 * BLOCK_SIZE + indexx];
            C[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 5 * BLOCK_SIZE + indexx];

            if ((t*step+k) >= times)
            {
                break;
            }
            bool inornot = pointInTriangle_cuda(A, B, C, searchPoint);
            if (inornot && inside==-1 && inside==-1)   //inside /on 
            {
                index_temp = t * step + k;
                inside = 0;
                break;
            }
        }    
        __syncthreads();

        //////////////////////////////////////////////
        //float A[2], B[2], C[2];
        //A[0] = uv_triangulation_0[t];
        //A[1] = uv_triangulation_0[1 * times + t];
        //B[0] = uv_triangulation_0[2 * times + t];
        //B[1] = uv_triangulation_0[3 * times + t];
        //C[0] = uv_triangulation_0[4 * times + t];
        //C[1] = uv_triangulation_0[5 * times + t];
        //if (pointInTriangle_cuda(A, B, C, searchPoint))   //inside /on 
        //{
        //    index_temp = t;
        //    inside = 0;
        //    break;
        //}
    }

    inside_ptr_d[2 * threadId] = inside;
    inside_ptr_d[2 * threadId + 1] = index_temp;

    if (inside == 0 && threadId < num)
    {
        int tr_index = index_temp;
        float pt3d[3];
        float uv_triangulation_temp[6];
        uv_triangulation_temp[0] = uv_triangulation[6 * tr_index];
        uv_triangulation_temp[1] = uv_triangulation[6 * tr_index + 1];
        uv_triangulation_temp[2] = uv_triangulation[6 * tr_index + 2];
        uv_triangulation_temp[3] = uv_triangulation[6 * tr_index + 3];
        uv_triangulation_temp[4] = uv_triangulation[6 * tr_index + 4];
        uv_triangulation_temp[5] = uv_triangulation[6 * tr_index + 5];

        float w_triangulation_temp[9];
        w_triangulation_temp[0] = w_triangulation[9 * tr_index];
        w_triangulation_temp[1] = w_triangulation[9 * tr_index + 1];
        w_triangulation_temp[2] = w_triangulation[9 * tr_index + 2];
        w_triangulation_temp[3] = w_triangulation[9 * tr_index + 3];
        w_triangulation_temp[4] = w_triangulation[9 * tr_index + 4];
        w_triangulation_temp[5] = w_triangulation[9 * tr_index + 5];
        w_triangulation_temp[6] = w_triangulation[9 * tr_index + 6];
        w_triangulation_temp[7] = w_triangulation[9 * tr_index + 7];
        w_triangulation_temp[8] = w_triangulation[9 * tr_index + 8];

        caculateMappingTriangle2dTo3d(uv_triangulation_temp, searchPoint, w_triangulation_temp, pt3d); // rewrite in cuda

        int not_in_shelter_num = 0;
        //Shelter check
        for (int m = 0; m < panorama_num_d; m++)
        {
            if (not_in_shelter_num >= PanoramaNUM)
            {
                break;
            }
            // add threshold ditance (pt3d to optical_center)
            //...
            // add threshold  angle of triangle face normal and vector(pt3d to optical_center)
            //...
            float optical_center[3];
            optical_center[0] = position_panorama_vect[m * 3];
            optical_center[1] = position_panorama_vect[m * 3 +1];
            optical_center[2] = position_panorama_vect[m * 3 +2];

            //caculate shelter
            //caculate near triangles of the ray
            int ret0 = 0;
            float redius_ = distance3d(optical_center, pt3d);
            if (redius_ > distance_threshold)
            {
                continue;
            }

            float mid_pt[3];
            mid_pt[0] = (optical_center[0] + pt3d[0])*0.5;
            mid_pt[1] = (optical_center[1] + pt3d[1])*0.5;
            mid_pt[2] = (optical_center[2] + pt3d[2])*0.5;

            for (int i = 0; i < times; i++)
            {
                float triangle[9];
                triangle[0] = w_triangulation[9 * i];
                triangle[1] = w_triangulation[9 * i + 1];
                triangle[2] = w_triangulation[9 * i + 2];
                triangle[3] = w_triangulation[9 * i + 3];
                triangle[4] = w_triangulation[9 * i + 4];
                triangle[5] = w_triangulation[9 * i + 5];
                triangle[6] = w_triangulation[9 * i + 6];
                triangle[7] = w_triangulation[9 * i + 7];
                triangle[8] = w_triangulation[9 * i + 8];
                
                float pt_temp[3];
                pt_temp[0] = w_triangulation_center[3 * i];
                pt_temp[1] = w_triangulation_center[3 * i + 1];
                pt_temp[2] = w_triangulation_center[3 * i + 2];

                float temp1 = distance3d(pt_temp, mid_pt);
                if (temp1 > 0.45*redius_ || i == tr_index)
                {
                    continue;
                }

                int ret1 = rayTracingShelterCaculate_cuda2(pt3d, optical_center, triangle);
                if (ret1 == 0)  // in shelter
                {
                    ret0 = -1;
                    break;
                }
            }
     
            if (ret0 == 0)
            {
                result[threadId*PanoramaNUM + not_in_shelter_num] = m+1;    //not in shelter
                not_in_shelter_num++;
            }
        }
    }

}

 

posted @ 2020-08-04 11:44  玥茹苟  阅读(169)  评论(0编辑  收藏  举报