Direct3D互操作性

  Direct3D互操作性支持Direct3D 9,Direct3D 10,和Direct3D 11。

  一个CUDA上下文一次只能和一个Direct3D设备互操作,且CUDA上下文和Direct3D设备必须在同一个GPU上创建,而且Direct3D设备必须使用D3DCREATE_HARDWARE_VERTEXPROCESSING标签创建。

  和Direct3D的互操作性要求:在任何其它的运行时函数调用前,使用cudaD3D9SetDirect3DDevice(),cudaD3D10SetDirect3DDevice() 和cudaD3D11SetDirect3DDevice()指定Direct3D设备。可用cudaD3D9GetDevice(),cudaD3D10GetDevice() 和 cudaD3D11GetDevice()检索关联到一些适配器的CUDA设备。

  可以被映射到CUDA地址空间的Direct3D资源有Direct3D缓冲区,纹理和表面。可以使用cudaGraphicsD3D9RegisterResource(), cudaGraphicsD3D10RegisterResource()和cudaGraphicsD3D11RegisterResource()注册这些资源。

  下面的代码使用内核动态的修改一个存储在顶点缓冲对象中的二维width*height网格顶点。

  Direct3D 9版本

IDirect3D9* D3D;
IDirect3DDevice9
* device;
struct CUSTOMVERTEX {
FLOAT x, y, z;
DWORD color;
};
IDirect3DVertexBuffer9
* positionsVB;
struct cudaGraphicsResource* positionsVB_CUDA;

int main() {
// Initialize Direct3D
D3D = Direct3DCreate9(D3D_SDK_VERSION);
// Get a CUDA-enabled adapter
unsigned int adapter = 0;
for (; adapter < g_pD3D->GetAdapterCount(); adapter++) {
D3DADAPTER_IDENTIFIER9 adapterId;
g_pD3D
->GetAdapterIdentifier(adapter, 0, &adapterId);
int dev;
if (cudaD3D9GetDevice(&dev, adapterId.DeviceName) == cudaSuccess)
break;
}

// Create device
...
D3D
->CreateDevice(adapter, D3DDEVTYPE_HAL, hWnd, D3DCREATE_HARDWARE_VERTEXPROCESSING, &params, &device);
// Register device with CUDA
cudaD3D9SetDirect3DDevice(device);
// Create vertex buffer and register it with CUDA
unsigned int size = width * height * sizeof(CUSTOMVERTEX);
device
->CreateVertexBuffer(size, 0, D3DFVF_CUSTOMVERTEX, D3DPOOL_DEFAULT,&positionsVB, 0);
cudaGraphicsD3D9RegisterResource(
&positionsVB_CUDA, positionsVB, c
void Render() {
// Map vertex buffer for writing from CUDA
float4* positions;
cudaGraphicsMapResources(
1, &positionsVB_CUDA, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((
void**)&positions, &num_bytes, positionsVB_CUDA));
// Execute kernel
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(width
/ dimBlock.x, height / dimBlock.y, 1);
createVertices
<<<dimGrid, dimBlock>>>(positions, time, width, height);
// Unmap vertex buffer
cudaGraphicsUnmapResources(1, &positionsVB_CUDA, 0);
// Draw and present
...
}

void releaseVB() {
cudaGraphicsUnregisterResource(positionsVB_CUDA);
positionsVB
->Release();
}

__global__
void createVertices(float4* positions, float time, unsigned int width, unsigned int height) {
unsigned
int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned
int y = blockIdx.y * blockDim.y + threadIdx.y;
// Calculate uv coordinates
float u = x / (float)width;
float v = y / (float)height;