CUDA实例练习(六):矢量求和

 1 #include <stdio.h>
 2 #include <cuda_runtime.h>
 3 #include <device_launch_parameters.h>
 4 #include <book.h>
 5 #include <gputimer.h>
 6 #define N (33 * 1024)
 7 
 8 __global__ void add(int *a, int *b, int *c){
 9     int tid = threadIdx.x + blockIdx.x * blockDim.x;
10     while (tid < N){
11         c[tid] = a[tid] + b[tid];
12         tid += blockDim.x * gridDim.x;
13     }
14 }
15 
16 int main(void){
17     int a[N], b[N], c[N];
18     int *dev_a, *dev_b, *dev_c;
19 
20     HANDLE_ERROR(cudaMalloc((void **)&dev_a, N * sizeof(int)));
21     HANDLE_ERROR(cudaMalloc((void **)&dev_b, N*sizeof(int)));
22     HANDLE_ERROR(cudaMalloc((void **)&dev_c, N*sizeof(int)));
23 
24     for (int i = 0; i < N; i++){
25         a[i] = i;
26         b[i] = i*i;
27     }
28 
29     HANDLE_ERROR(cudaMemcpy(dev_a, a, N*sizeof(int), cudaMemcpyHostToDevice));
30     HANDLE_ERROR(cudaMemcpy(dev_b, b, N*sizeof(int), cudaMemcpyHostToDevice));
31     add << <128, 128 >> >(dev_a, dev_b, dev_c);
32     
33     HANDLE_ERROR(cudaMemcpy(c, dev_c, N*sizeof(int), cudaMemcpyDeviceToHost));
34 
35     bool success = true;
36     for (int i = 0; i < N; i++){
37         if (a[i] + b[i] != c[i]){
38             printf("Error: %d + %d != %d\n", a[i], b[i], c[i]);
39             success = false;
40         }
41     }
42     if (success)
43         printf("We did it!\n");
44 
45     cudaFree(dev_a);
46     cudaFree(dev_b);
47     cudaFree(dev_c);
48     return 0;
49 }

 

posted @ 2017-08-08 11:30  Jason&Hymer  阅读(485)  评论(0编辑  收藏  举报