CUDA Thread Indexing

1D grid of 1D blocks

__device__ int getGlobalIdx_1D_1D()
{
return blockIdx.x *blockDim.x + threadIdx.x;
}



1D grid of 2D blocks

__device__ int getGlobalIdx_1D_2D()
{
return blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
}



1D grid of 3D blocks

__device__ int getGlobalIdx_1D_3D()
{
return blockIdx.x * blockDim.x * blockDim.y * blockDim.z 
+ threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
}

{
return blockIdx.x * blockDim.x * blockDim.y * blockDim.z 
+ threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
}



2D grid of 1D blocks

 __device__ int getGlobalIdx_2D_1D()
{
int blockId   = blockIdx.y * gridDim.x + blockIdx.x; 
int threadId = blockId * blockDim.x + threadIdx.x; 
return threadId;
}

{
int blockId   = blockIdx.y * gridDim.x + blockIdx.x; 
int threadId = blockId * blockDim.x + threadIdx.x; 
return threadId;
}



2D grid of 2D blocks  

__device__ int getGlobalIdx_2D_2D()
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x; 
int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
return threadId;
}

2D grid of 3D blocks

__device__ int getGlobalIdx_2D_3D()
{
int blockId = blockIdx.x 
+ blockIdx.y * gridDim.x; 
int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
  + (threadIdx.z * (blockDim.x * blockDim.y))
  + (threadIdx.y * blockDim.x)
  + threadIdx.x;
return threadId;
}



3D grid of 1D blocks

__device__ int getGlobalIdx_3D_1D()
{
int blockId = blockIdx.x 
+ blockIdx.y * gridDim.x 
+ gridDim.x * gridDim.y * blockIdx.z; 
int threadId = blockId * blockDim.x + threadIdx.x;
return threadId;
}



3D grid of 2D blocks

__device__ int getGlobalIdx_3D_2D()
{
int blockId = blockIdx.x 
        + blockIdx.y * gridDim.x 
+ gridDim.x * gridDim.y * blockIdx.z; 
int threadId = blockId * (blockDim.x * blockDim.y)
 + (threadIdx.y * blockDim.x)
 + threadIdx.x;
return threadId;
}



3D grid of 3D blocks

__device__ int getGlobalIdx_3D_3D()
{
int blockId = blockIdx.x 
+ blockIdx.y * gridDim.x 
+ gridDim.x * gridDim.y * blockIdx.z; 
int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
 + (threadIdx.z * (blockDim.x * blockDim.y))
 + (threadIdx.y * blockDim.x)
 + threadIdx.x;
return threadId;
}

  

posted on 2015-07-11 18:59  袁军峰  阅读(277)  评论(0编辑  收藏  举报

导航