[CUDA]异构并行vector查询,CPU端vector转GPU端数组
#include "cuda_runtime.h" #include "device_launch_parameters.h" #include <stdio.h> #include "iostream" #include "cstdlib" #include "vector" #include "thread" using namespace std; #define USE_CPU 1 #define USE_GPU 0 struct stu_info { char stu_num[14]; int try_seat_num; int exam_seat_num; }; struct select_info { char stu_num[14]; int try_seat_num; int exam_seat_num; }; vector<stu_info> stu; vector<select_info> select; __device__ void gpu_strcpy(char* a, char *b) { for (int i = 0; i < 14; i++) { a[i] = b[i]; } } void cpu_strcpy(char* a, char *b) { for (int i = 0; i < 14; i++) { a[i] = b[i]; } } __global__ void gpu_select_kernel(stu_info *dev_stu,select_info *dev_select,int *n) { int index = threadIdx.x; for (int i = 0; i < *n; i++) { if (dev_select[index].try_seat_num == dev_stu[i].try_seat_num) { gpu_strcpy(dev_select[index].stu_num, dev_stu[i].stu_num); dev_select[index].exam_seat_num = dev_stu[i].exam_seat_num; break; } } } void fun_select_cpu(int index, int n) { for (int i = 0; i < n; i++) { if (select[index].try_seat_num == stu[i].try_seat_num) { //cout << stu[index].stu_num << " " << stu[index].exam_seat_num<<endl; strcpy(select[index].stu_num, stu[i].stu_num); select[index].exam_seat_num = stu[i].exam_seat_num; break; } } } void fun_select_gpu(int m,int n,int size) { stu_info *dev_stu ; stu_info *host_stu = new stu_info[m]; select_info *dev_select ; select_info *host_select = new select_info[n]; int *dev_n; for (int i = 0; i < m; i++) { cpu_strcpy(host_stu[i].stu_num, stu[i].stu_num); host_stu[i].try_seat_num = stu[i].try_seat_num; host_stu[i].exam_seat_num = stu[i].exam_seat_num; } for (int i = 0; i < n; i++) { host_select[i].try_seat_num = select[i].try_seat_num; } cudaMalloc((void**)&dev_stu, sizeof(stu_info)*m); cudaMalloc((void**)&dev_select, sizeof(select_info)*n); cudaMalloc((void**)&dev_n, sizeof(int)); cudaMemcpy(dev_stu, host_stu, sizeof(stu_info)*m,cudaMemcpyHostToDevice); cudaMemcpy(dev_select, host_select, sizeof(select_info)*n, cudaMemcpyHostToDevice); cudaMemcpy(dev_n, &m, sizeof(int), cudaMemcpyHostToDevice); gpu_select_kernel << <1, size >> > (dev_stu, dev_select, dev_n); cudaMemcpy(host_stu, dev_stu, sizeof(stu_info)*n, cudaMemcpyDeviceToHost); cudaMemcpy(host_select, dev_select, sizeof(select_info)*n, cudaMemcpyDeviceToHost); for (int i = 0; i < n; i++) { cpu_strcpy(select[i].stu_num, host_select[i].stu_num); select[i].exam_seat_num = host_select[i].exam_seat_num; } cudaFree(dev_stu); cudaFree(dev_select); cudaFree(dev_n); } int main() { stu_info info_temp; select_info select_temp; int n, m,sign; cudaError_t cudaStatus; cudaStatus = cudaSetDevice(0); if (cudaStatus != cudaSuccess) { cout << "检测到你的计算机没有支持CUDA的NVIDIA的GPU设备,程序将使用CPU并行查询" << endl; sign = USE_CPU; } else { cout << "检测到你的计算机有支持CUDA的NVIDIA的GPU设备,程序将使用GPU并行查询" << endl; sign = USE_GPU; } cin >> n; for (int i = 0; i < n; i++) { cin >> info_temp.stu_num >> info_temp.try_seat_num >> info_temp.exam_seat_num; stu.push_back(info_temp); } cin >> m; for (int i = 0; i < m; i++) { cin >> select_temp.try_seat_num; select.push_back(select_temp); } if (sign == USE_CPU) { thread **thread_p = new thread*[m]; int thread_id = 0; for (thread_id; thread_id < m; thread_id++) { thread_p[thread_id] = new thread(fun_select_cpu, thread_id, n); thread_p[thread_id]->detach(); } delete[] thread_p; } else if (sign == USE_GPU) { fun_select_gpu(n, m, m); } for (int i = 0; i < m; i++) { cout << select[i].stu_num << " " << select[i].exam_seat_num << endl; } system("pause"); return 0; }