爨爨爨好

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

立方体纹理贴图

▶ 源代码。用纹理方法把元素按原顺序从 CUDA3D 数组中取出来,求个相反数放入全局内存,输出。

  1 #include <stdio.h>
  2 #include "cuda_runtime.h"
  3 #include "device_launch_parameters.h"
  4 #include <helper_functions.h>
  5 #include <helper_cuda.h>
  6 
  7 #define MIN_EPSILON_ERROR 5e-3f
  8 
  9 texture<float, cudaTextureTypeCubemap> tex;
 10 
 11 __global__ void transformKernel(float *g_odata, int width)
 12 {
 13     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
 14     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
 15 
 16     float u = ((x + 0.5f) / (float)width) * 2.f - 1.f;// [0, width-1] 间隔 1 的坐标变换为 [-1+1/width,1-1/width] 间隔 1/width 的坐标
 17     float v = ((y + 0.5f) / (float)width) * 2.f - 1.f;
 18 
 19     float cx, cy, cz;
 20 
 21     for (unsigned int face = 0; face < 6; face++)
 22     {
 23         if (face == 0)// x 正层
 24         {
 25             cx = 1;
 26             cy = -v;
 27             cz = -u;
 28         }
 29         else if (face == 1)// x 负层
 30         {
 31             cx = -1;
 32             cy = -v;
 33             cz = u;
 34         }
 35         else if (face == 2)// y 正层
 36         {
 37             cx = u;
 38             cy = 1;
 39             cz = v;
 40         }
 41         else if (face == 3)// y 负层
 42         {
 43             cx = u;
 44             cy = -1;
 45             cz = -v;
 46         }
 47         else if (face == 4)// z 正层
 48         {
 49             cx = u;
 50             cy = -v;
 51             cz = 1;
 52         }
 53         else if (face == 5)// z 负层
 54         {
 55             cx = -u;
 56             cy = -v;
 57             cz = -1;
 58         }
 59         g_odata[face*width*width + y*width + x] = - texCubemap(tex, cx, cy, cz);// 纹理数据读取到全局内存中输出
 60     }
 61 }
 62 
 63 int main(int argc, char** argv)
 64 {
 65     unsigned int width = 64, num_faces = 6, num_layers = 1;
 66     unsigned int cubemap_size = width * width * num_faces;
 67     unsigned int size = cubemap_size * num_layers * sizeof(float);
 68     float *h_data = (float *)malloc(size);
 69     float *h_data_ref = (float *)malloc(size);  // 理论输出
 70     float *d_data = NULL;
 71     cudaMalloc((void **)&d_data, size);
 72 
 73     for (int i = 0; i < (int)(cubemap_size * num_layers); i++)
 74         h_data[i] = (float)i;
 75     for (unsigned int layer = 0; layer < num_layers; layer++)
 76     {
 77         for (int i = 0; i < (int)(cubemap_size); i++)
 78             h_data_ref[layer*cubemap_size + i] = -h_data[layer*cubemap_size + i] + layer;
 79     }
 80 
 81     printf("\n\t\Input data.n\t");
 82     for (int i = 0; i < width * num_faces * num_layers; i++)
 83     {
 84         printf("%2.1f ", h_data[i]);
 85         if ((i + 1) % width == 0)
 86             printf("\n\t");
 87         if ((i + 1) % (width *width) == 0)
 88             printf("\n\t");
 89     }
 90     printf("\n\tIdeal output data\n\t");
 91     for (int i = 0; i < width * num_faces * num_layers; i++)
 92     {
 93         printf("%2.1f ", h_data_ref[i]);
 94         if ((i + 1) % width == 0)
 95             printf("\n\t");
 96         if ((i + 1) % (width *width) == 0)
 97             printf("\n\t");
 98     }
 99 
100     // 设置 CUDA 3D 数组参数和数据拷贝
101     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
102     cudaArray *cu_3darray;
103     cudaMalloc3DArray(&cu_3darray, &channelDesc, make_cudaExtent(width, width, num_faces), cudaArrayCubemap);
104     cudaMemcpy3DParms myparms = { 0 };
105     myparms.srcPos = make_cudaPos(0, 0, 0);
106     myparms.dstPos = make_cudaPos(0, 0, 0);
107     myparms.srcPtr = make_cudaPitchedPtr(h_data, width * sizeof(float), width, width);
108     myparms.dstArray = cu_3darray;
109     myparms.extent = make_cudaExtent(width, width, num_faces);
110     myparms.kind = cudaMemcpyHostToDevice;
111     cudaMemcpy3D(&myparms);
112 
113     // 设置纹理参数并绑定
114     tex.addressMode[0] = cudaAddressModeWrap;
115     tex.addressMode[1] = cudaAddressModeWrap;
116     tex.filterMode = cudaFilterModeLinear;
117     tex.normalized = true;
118     cudaBindTextureToArray(tex, cu_3darray, channelDesc);
119 
120     dim3 dimBlock(8, 8, 1);
121     dim3 dimGrid(width / dimBlock.x, width / dimBlock.y, 1);
122     printf("\n\tCubemap data of %d * %d * %d: Grid size is %d x %d, each block has 8 x 8 threads.\n", width, width, num_layers, dimGrid.x, dimGrid.y);
123     transformKernel << < dimGrid, dimBlock >> >(d_data, width);// 预跑
124     cudaDeviceSynchronize();
125 
126     StopWatchInterface *timer = NULL;// 新的计时工具
127     sdkCreateTimer(&timer);
128     sdkStartTimer(&timer);
129 
130     transformKernel << < dimGrid, dimBlock, 0 >> >(d_data, width);
131     cudaDeviceSynchronize();
132 
133     sdkStopTimer(&timer);
134     printf("\n\Time: %.3f msec, %.2f Mtexlookups/sec\n", sdkGetTimerValue(&timer), (cubemap_size / (sdkGetTimerValue(&timer) / 1000.0f) / 1e6));
135     sdkDeleteTimer(&timer);
136 
137     // 返回计算结果并检验
138     memset(h_data, 0, size);
139     cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
140     if (checkCmdLineFlag(argc, (const char **)argv, "regression"))
141         sdkWriteFile<float>("./data/regression.dat", h_data, width * width, 0.0f, false);
142     else
143         printf("Comparing kernel output to expected data return %d\n", compareData(h_data, h_data_ref, cubemap_size, MIN_EPSILON_ERROR, 0.0f));
144 
145     printf("\n\tActual output data\n\t");
146     for (int i = 0; i < width * num_faces * num_layers; i++)
147     {
148         printf("%2.1f ", h_data[i]);
149         if ((i + 1) % width == 0)
150             printf("\n\t");
151         if ((i + 1) % (width * width) == 0)
152             printf("\n\t");
153     }
154 
155     free(h_data);
156     free(h_data_ref);
157     cudaFree(d_data);
158     cudaFreeArray(cu_3darray);
159 
160     getchar();
161     return 0;
162 }

▶ 输出结果

    Input data.n    0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0 58.0 59.0 60.0 61.0 62.0 63.0
    64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0 124.0 125.0 126.0 127.0
    128.0 129.0 130.0 131.0 132.0 133.0 134.0 135.0 136.0 137.0 138.0 139.0 140.0 141.0 142.0 143.0 144.0 145.0 146.0 147.0 148.0 149.0 150.0 151.0 152.0 153.0 154.0 155.0 156.0 157.0 158.0 159.0 160.0 161.0 162.0 163.0 164.0 165.0 166.0 167.0 168.0 169.0 170.0 171.0 172.0 173.0 174.0 175.0 176.0 177.0 178.0 179.0 180.0 181.0 182.0 183.0 184.0 185.0 186.0 187.0 188.0 189.0 190.0 191.0
    192.0 193.0 194.0 195.0 196.0 197.0 198.0 199.0 200.0 201.0 202.0 203.0 204.0 205.0 206.0 207.0 208.0 209.0 210.0 211.0 212.0 213.0 214.0 215.0 216.0 217.0 218.0 219.0 220.0 221.0 222.0 223.0 224.0 225.0 226.0 227.0 228.0 229.0 230.0 231.0 232.0 233.0 234.0 235.0 236.0 237.0 238.0 239.0 240.0 241.0 242.0 243.0 244.0 245.0 246.0 247.0 248.0 249.0 250.0 251.0 252.0 253.0 254.0 255.0
    256.0 257.0 258.0 259.0 260.0 261.0 262.0 263.0 264.0 265.0 266.0 267.0 268.0 269.0 270.0 271.0 272.0 273.0 274.0 275.0 276.0 277.0 278.0 279.0 280.0 281.0 282.0 283.0 284.0 285.0 286.0 287.0 288.0 289.0 290.0 291.0 292.0 293.0 294.0 295.0 296.0 297.0 298.0 299.0 300.0 301.0 302.0 303.0 304.0 305.0 306.0 307.0 308.0 309.0 310.0 311.0 312.0 313.0 314.0 315.0 316.0 317.0 318.0 319.0
    320.0 321.0 322.0 323.0 324.0 325.0 326.0 327.0 328.0 329.0 330.0 331.0 332.0 333.0 334.0 335.0 336.0 337.0 338.0 339.0 340.0 341.0 342.0 343.0 344.0 345.0 346.0 347.0 348.0 349.0 350.0 351.0 352.0 353.0 354.0 355.0 356.0 357.0 358.0 359.0 360.0 361.0 362.0 363.0 364.0 365.0 366.0 367.0 368.0 369.0 370.0 371.0 372.0 373.0 374.0 375.0 376.0 377.0 378.0 379.0 380.0 381.0 382.0 383.0

    Ideal output data
    0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -11.0 -12.0 -13.0 -14.0 -15.0 -16.0 -17.0 -18.0 -19.0 -20.0 -21.0 -22.0 -23.0 -24.0 -25.0 -26.0 -27.0 -28.0 -29.0 -30.0 -31.0 -32.0 -33.0 -34.0 -35.0 -36.0 -37.0 -38.0 -39.0 -40.0 -41.0 -42.0 -43.0 -44.0 -45.0 -46.0 -47.0 -48.0 -49.0 -50.0 -51.0 -52.0 -53.0 -54.0 -55.0 -56.0 -57.0 -58.0 -59.0 -60.0 -61.0 -62.0 -63.0
    -64.0 -65.0 -66.0 -67.0 -68.0 -69.0 -70.0 -71.0 -72.0 -73.0 -74.0 -75.0 -76.0 -77.0 -78.0 -79.0 -80.0 -81.0 -82.0 -83.0 -84.0 -85.0 -86.0 -87.0 -88.0 -89.0 -90.0 -91.0 -92.0 -93.0 -94.0 -95.0 -96.0 -97.0 -98.0 -99.0 -100.0 -101.0 -102.0 -103.0 -104.0 -105.0 -106.0 -107.0 -108.0 -109.0 -110.0 -111.0 -112.0 -113.0 -114.0 -115.0 -116.0 -117.0 -118.0 -119.0 -120.0 -121.0 -122.0 -123.0 -124.0 -125.0 -126.0 -127.0
    -128.0 -129.0 -130.0 -131.0 -132.0 -133.0 -134.0 -135.0 -136.0 -137.0 -138.0 -139.0 -140.0 -141.0 -142.0 -143.0 -144.0 -145.0 -146.0 -147.0 -148.0 -149.0 -150.0 -151.0 -152.0 -153.0 -154.0 -155.0 -156.0 -157.0 -158.0 -159.0 -160.0 -161.0 -162.0 -163.0 -164.0 -165.0 -166.0 -167.0 -168.0 -169.0 -170.0 -171.0 -172.0 -173.0 -174.0 -175.0 -176.0 -177.0 -178.0 -179.0 -180.0 -181.0 -182.0 -183.0 -184.0 -185.0 -186.0 -187.0 -188.0 -189.0 -190.0 -191.0
    -192.0 -193.0 -194.0 -195.0 -196.0 -197.0 -198.0 -199.0 -200.0 -201.0 -202.0 -203.0 -204.0 -205.0 -206.0 -207.0 -208.0 -209.0 -210.0 -211.0 -212.0 -213.0 -214.0 -215.0 -216.0 -217.0 -218.0 -219.0 -220.0 -221.0 -222.0 -223.0 -224.0 -225.0 -226.0 -227.0 -228.0 -229.0 -230.0 -231.0 -232.0 -233.0 -234.0 -235.0 -236.0 -237.0 -238.0 -239.0 -240.0 -241.0 -242.0 -243.0 -244.0 -245.0 -246.0 -247.0 -248.0 -249.0 -250.0 -251.0 -252.0 -253.0 -254.0 -255.0
    -256.0 -257.0 -258.0 -259.0 -260.0 -261.0 -262.0 -263.0 -264.0 -265.0 -266.0 -267.0 -268.0 -269.0 -270.0 -271.0 -272.0 -273.0 -274.0 -275.0 -276.0 -277.0 -278.0 -279.0 -280.0 -281.0 -282.0 -283.0 -284.0 -285.0 -286.0 -287.0 -288.0 -289.0 -290.0 -291.0 -292.0 -293.0 -294.0 -295.0 -296.0 -297.0 -298.0 -299.0 -300.0 -301.0 -302.0 -303.0 -304.0 -305.0 -306.0 -307.0 -308.0 -309.0 -310.0 -311.0 -312.0 -313.0 -314.0 -315.0 -316.0 -317.0 -318.0 -319.0
    -320.0 -321.0 -322.0 -323.0 -324.0 -325.0 -326.0 -327.0 -328.0 -329.0 -330.0 -331.0 -332.0 -333.0 -334.0 -335.0 -336.0 -337.0 -338.0 -339.0 -340.0 -341.0 -342.0 -343.0 -344.0 -345.0 -346.0 -347.0 -348.0 -349.0 -350.0 -351.0 -352.0 -353.0 -354.0 -355.0 -356.0 -357.0 -358.0 -359.0 -360.0 -361.0 -362.0 -363.0 -364.0 -365.0 -366.0 -367.0 -368.0 -369.0 -370.0 -371.0 -372.0 -373.0 -374.0 -375.0 -376.0 -377.0 -378.0 -379.0 -380.0 -381.0 -382.0 -383.0

    Cubemap data of 64 * 64 * 1: Grid size is 8 x 8, each block has 8 x 8 threads.

Time: 0.098 msec, 249.50 Mtexlookups/sec
Comparing kernel output to expected data return 1

    Actual output data
    -0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -11.0 -12.0 -13.0 -14.0 -15.0 -16.0 -17.0 -18.0 -19.0 -20.0 -21.0 -22.0 -23.0 -24.0 -25.0 -26.0 -27.0 -28.0 -29.0 -30.0 -31.0 -32.0 -33.0 -34.0 -35.0 -36.0 -37.0 -38.0 -39.0 -40.0 -41.0 -42.0 -43.0 -44.0 -45.0 -46.0 -47.0 -48.0 -49.0 -50.0 -51.0 -52.0 -53.0 -54.0 -55.0 -56.0 -57.0 -58.0 -59.0 -60.0 -61.0 -62.0 -63.0
    -64.0 -65.0 -66.0 -67.0 -68.0 -69.0 -70.0 -71.0 -72.0 -73.0 -74.0 -75.0 -76.0 -77.0 -78.0 -79.0 -80.0 -81.0 -82.0 -83.0 -84.0 -85.0 -86.0 -87.0 -88.0 -89.0 -90.0 -91.0 -92.0 -93.0 -94.0 -95.0 -96.0 -97.0 -98.0 -99.0 -100.0 -101.0 -102.0 -103.0 -104.0 -105.0 -106.0 -107.0 -108.0 -109.0 -110.0 -111.0 -112.0 -113.0 -114.0 -115.0 -116.0 -117.0 -118.0 -119.0 -120.0 -121.0 -122.0 -123.0 -124.0 -125.0 -126.0 -127.0
    -128.0 -129.0 -130.0 -131.0 -132.0 -133.0 -134.0 -135.0 -136.0 -137.0 -138.0 -139.0 -140.0 -141.0 -142.0 -143.0 -144.0 -145.0 -146.0 -147.0 -148.0 -149.0 -150.0 -151.0 -152.0 -153.0 -154.0 -155.0 -156.0 -157.0 -158.0 -159.0 -160.0 -161.0 -162.0 -163.0 -164.0 -165.0 -166.0 -167.0 -168.0 -169.0 -170.0 -171.0 -172.0 -173.0 -174.0 -175.0 -176.0 -177.0 -178.0 -179.0 -180.0 -181.0 -182.0 -183.0 -184.0 -185.0 -186.0 -187.0 -188.0 -189.0 -190.0 -191.0
    -192.0 -193.0 -194.0 -195.0 -196.0 -197.0 -198.0 -199.0 -200.0 -201.0 -202.0 -203.0 -204.0 -205.0 -206.0 -207.0 -208.0 -209.0 -210.0 -211.0 -212.0 -213.0 -214.0 -215.0 -216.0 -217.0 -218.0 -219.0 -220.0 -221.0 -222.0 -223.0 -224.0 -225.0 -226.0 -227.0 -228.0 -229.0 -230.0 -231.0 -232.0 -233.0 -234.0 -235.0 -236.0 -237.0 -238.0 -239.0 -240.0 -241.0 -242.0 -243.0 -244.0 -245.0 -246.0 -247.0 -248.0 -249.0 -250.0 -251.0 -252.0 -253.0 -254.0 -255.0
    -256.0 -257.0 -258.0 -259.0 -260.0 -261.0 -262.0 -263.0 -264.0 -265.0 -266.0 -267.0 -268.0 -269.0 -270.0 -271.0 -272.0 -273.0 -274.0 -275.0 -276.0 -277.0 -278.0 -279.0 -280.0 -281.0 -282.0 -283.0 -284.0 -285.0 -286.0 -287.0 -288.0 -289.0 -290.0 -291.0 -292.0 -293.0 -294.0 -295.0 -296.0 -297.0 -298.0 -299.0 -300.0 -301.0 -302.0 -303.0 -304.0 -305.0 -306.0 -307.0 -308.0 -309.0 -310.0 -311.0 -312.0 -313.0 -314.0 -315.0 -316.0 -317.0 -318.0 -319.0
    -320.0 -321.0 -322.0 -323.0 -324.0 -325.0 -326.0 -327.0 -328.0 -329.0 -330.0 -331.0 -332.0 -333.0 -334.0 -335.0 -336.0 -337.0 -338.0 -339.0 -340.0 -341.0 -342.0 -343.0 -344.0 -345.0 -346.0 -347.0 -348.0 -349.0 -350.0 -351.0 -352.0 -353.0 -354.0 -355.0 -356.0 -357.0 -358.0 -359.0 -360.0 -361.0 -362.0 -363.0 -364.0 -365.0 -366.0 -367.0 -368.0 -369.0 -370.0 -371.0 -372.0 -373.0 -374.0 -375.0 -376.0 -377.0 -378.0 -379.0 -380.0 -381.0 -382.0 -383.0

▶ 涨姿势

● helper_time.h 中新定义的计时函数

 1 // 关键步骤
 2 StopWatchInterface *timer = NULL;
 3 sdkCreateTimer(&timer);
 4 sdkStartTimer(&timer);
 5 
 6 sdkStopTimer(&timer);
 7 sdkGetTimerValue(&timer);
 8 sdkDeleteTimer(&timer);
 9 
10 // helper_time.h
11 class StopWatchInterface
12 {
13     public:
14         StopWatchInterface() {};
15         virtual ~StopWatchInterface() {};
16 
17     public:
18         virtual void start() = 0;
19         virtual void stop() = 0;
20         virtual void reset() = 0;
21         virtual float getTime() = 0;// 获取计时(计时器不停)
22         virtual float getAverageTime() = 0;
23 };
24 
25 inline bool sdkCreateTimer(StopWatchInterface **timer_interface)
26 {
27 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
28     *timer_interface = (StopWatchInterface *)new StopWatchWin();
29 #else
30     *timer_interface = (StopWatchInterface *)new StopWatchLinux();
31 #endif
32     return (*timer_interface != NULL) ? true : false;
33 }
34 
35 inline bool sdkDeleteTimer(StopWatchInterface **timer_interface)
36 {
37     if (*timer_interface)
38     {
39         delete *timer_interface;
40         *timer_interface = NULL;
41     }
42     return true;
43 }
44 
45 inline bool sdkStartTimer(StopWatchInterface **timer_interface)
46 {
47     if (*timer_interface)
48         (*timer_interface)->start();
49     return true;
50 }
51 
52 inline bool sdkStopTimer(StopWatchInterface **timer_interface)
53 {
54     if (*timer_interface)
55         (*timer_interface)->stop();
56     return true;
57 }
58 
59 inline float sdkGetTimerValue(StopWatchInterface **timer_interface)
60 {
61     if (*timer_interface)    
62         return (*timer_interface)->getTime();
63     else
64         return 0.0f;
65 }

 

● 立方体纹理贴图。六个面分别为 x = 1 正面、x = -1 轴负面、y = 1 正面、y = -1 负面、z = 1 正面、x = -1 负面,对应前、后、右、左、上、下。按照线性下标 [0, width * width * 6 - 1] 顺序访问时,各元素存储位置如下图所示(width == 2 为例)。

    

 

posted on 2017-11-22 22:45  爨爨爨好  阅读(333)  评论(0编辑  收藏  举报