SM(Streaming multiProcessor)

https://zhuanlan.zhihu.com/p/105775783
https://www.likecs.com/show-204740120.html
#define CLOCK_RATE 1683000 /* modify for different device */
__device__ void sleeps(float t) {
clock_t t0 = clock64();
clock_t t1 = t0;
while ((t1 - t0)/(CLOCK_RATE*1000.0f) < t)
t1 = clock64();
}
int getSPcores(cudaDeviceProp devProp)
{
int cores = 0;
int mp = devProp.multiProcessorCount;
switch (devProp.major){
case 2: // Fermi
if (devProp.minor == 1) cores = mp * 48;
else cores = mp * 32;
break;
case 3: // Kepler
cores = mp * 192;
break;
case 5: // Maxwell
cores = mp * 128;
break;
case 6: // Pascal
if ((devProp.minor == 1) || (devProp.minor == 2)) cores = mp * 128;
else if (devProp.minor == 0) cores = mp * 64;
else printf("Unknown device type\n");
break;
case 7: // Volta and Turing
if ((devProp.minor == 0) || (devProp.minor == 5)) cores = mp * 64;
else printf("Unknown device type\n");
break;
default:
printf("Unknown device type\n");
break;
}
return cores;
}
r=cudaGetDeviceProperties(&devProp, 0);
if (r) return r;
{ printf("%s\n", devProp.name);
printf("Major revision number: %d\n", devProp.major);
printf("Minor revision number: %d\n", devProp.minor);
printf("Number of multiprocessors: %d\n", devProp.multiProcessorCount);
printf("maxThreadsPerMultiProcessor: %d\n", devProp.maxThreadsPerMultiProcessor);
printf("l2CacheSize: %d\n", devProp.l2CacheSize);
printf("l2CacheSize: %d\n", devProp.memoryBusWidth);
printf("Total global memory: %llu\n", devProp.totalGlobalMem);
printf("Total amount of shared memory per block: %u\n",devProp.sharedMemPerBlock);
printf("Total registers per block: %d\n", devProp.regsPerBlock);
printf("Warp size: %d\n", devProp.warpSize);
printf("Maximum memory pitch: %u\n", devProp.memPitch);
printf("Total amount of constant memory: %u\n", devProp.totalConstMem);
printf("core: %d\n", getSPcores(devProp));
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
2021-07-29 prometheus