go代码:
package cudaruntime /* #cgo CFLAGS: -I/usr/local/cuda/include #cgo LDFLAGS: -L/usr/local/cuda/lib64 -lcudart -lcuda #include <cuda_runtime.h> */ import "C" import .....
func GetCUDADeviceProperties(deviceID int) (*CUDADeviceProperties, error) {
var deviceProperties CUDADeviceProperties
var cp C.struct_cudaDeviceProp
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html
err := newCudaError(C.cudaGetDeviceProperties(&cp, C.int(deviceID)))
deviceProperties.Major = int(cp.major)
deviceProperties.Minor = int(cp.minor)
deviceProperties.Name = C.GoString(&(cp.name[0]))
return &deviceProperties, err
}
readelf 查看符号cudaGetDeviceProperties的重定位信息:
重定位到了libcudart.so
root@sensetime:~/go/src/gitlab.sz.sensetime.com/viper/engine-video-process-service# readelf -a video-process-service-worker |grep cudaGetDeviceProperties 166: 0000000000000000 0 FUNC GLOBAL DEFAULT UND cudaGetDeviceProperties@libcudart.so.11.0 (6) 46350: 0000000000000000 0 FUNC GLOBAL DEFAULT UND cudaGetDeviceProperties@@ root@sensetime:~/go/src/gitlab.sz.sensetime.com/viper/engine-video-process-service#
cuda官网接口定义描述地址:
https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html
/usr/local/cuda/include/cuda_runtime_api.h:
/** * \brief Returns information about the compute-device * * Returns in \p *prop the properties of device \p dev. The ::cudaDeviceProp * structure is defined as: * \code struct cudaDeviceProp { char name[256]; cudaUUID_t uuid; size_t totalGlobalMem; size_t sharedMemPerBlock; int regsPerBlock; int warpSize; size_t memPitch; int maxThreadsPerBlock; int maxThreadsDim[3]; int maxGridSize[3]; int clockRate; size_t totalConstMem; int major; int minor; size_t textureAlignment; size_t texturePitchAlignment; int deviceOverlap; int multiProcessorCount; int kernelExecTimeoutEnabled; int integrated; int canMapHostMemory; int computeMode; int maxTexture1D; int maxTexture1DMipmap; int maxTexture1DLinear; int maxTexture2D[2]; int maxTexture2DMipmap[2]; int maxTexture2DLinear[3]; int maxTexture2DGather[2]; int maxTexture3D[3]; int maxTexture3DAlt[3]; int maxTextureCubemap; int maxTexture1DLayered[2]; int maxTexture2DLayered[3]; int maxTextureCubemapLayered[2]; int maxSurface1D; int maxSurface2D[2]; int maxSurface3D[3]; int maxSurface1DLayered[2]; int maxSurface2DLayered[3]; int maxSurfaceCubemap; int maxSurfaceCubemapLayered[2]; size_t surfaceAlignment; int concurrentKernels; int ECCEnabled; int pciBusID; int pciDeviceID; int pciDomainID; int tccDriver; int asyncEngineCount; int unifiedAddressing; int memoryClockRate; int memoryBusWidth; int l2CacheSize; int persistingL2CacheMaxSize; int maxThreadsPerMultiProcessor; int streamPrioritiesSupported; int globalL1CacheSupported; int localL1CacheSupported; size_t sharedMemPerMultiprocessor; int regsPerMultiprocessor; int managedMemory; int isMultiGpuBoard; int multiGpuBoardGroupID; int singleToDoublePrecisionPerfRatio; int pageableMemoryAccess; int concurrentManagedAccess; int computePreemptionSupported; int canUseHostPointerForRegisteredMem; int cooperativeLaunch; int cooperativeMultiDeviceLaunch; int pageableMemoryAccessUsesHostPageTables; int directManagedMemAccessFromHost; int accessPolicyMaxWindowSize; } \endcode * where: * - \ref ::cudaDeviceProp::name "name[256]" is an ASCII string identifying * the device; * - \ref ::cudaDeviceProp::uuid "uuid" is a 16-byte unique identifier. * - \ref ::cudaDeviceProp::totalGlobalMem "totalGlobalMem" is the total * amount of global memory available on the device in bytes; * - \ref ::cudaDeviceProp::sharedMemPerBlock "sharedMemPerBlock" is the * maximum amount of shared memory available to a thread block in bytes; * - \ref ::cudaDeviceProp::regsPerBlock "regsPerBlock" is the maximum number * of 32-bit registers available to a thread block; * - \ref ::cudaDeviceProp::warpSize "warpSize" is the warp size in threads; * - \ref ::cudaDeviceProp::memPitch "memPitch" is the maximum pitch in * bytes allowed by the memory copy functions that involve memory regions * allocated through ::cudaMallocPitch(); * - \ref ::cudaDeviceProp::maxThreadsPerBlock "maxThreadsPerBlock" is the * maximum number of threads per block; * - \ref ::cudaDeviceProp::maxThreadsDim "maxThreadsDim[3]" contains the * maximum size of each dimension of a block; * - \ref ::cudaDeviceProp::maxGridSize "maxGridSize[3]" contains the * maximum size of each dimension of a grid; * - \ref ::cudaDeviceProp::clockRate "clockRate" is the clock frequency in * kilohertz; * - \ref ::cudaDeviceProp::totalConstMem "totalConstMem" is the total amount * of constant memory available on the device in bytes; * - \ref ::cudaDeviceProp::major "major", * \ref ::cudaDeviceProp::minor "minor" are the major and minor revision * numbers defining the device's compute capability; * - \ref ::cudaDeviceProp::textureAlignment "textureAlignment" is the * alignment requirement; texture base addresses that are aligned to * \ref ::cudaDeviceProp::textureAlignment "textureAlignment" bytes do not * need an offset applied to texture fetches; * - \ref ::cudaDeviceProp::texturePitchAlignment "texturePitchAlignment" is the * pitch alignment requirement for 2D texture references that are bound to * pitched memory; * - \ref ::cudaDeviceProp::deviceOverlap "deviceOverlap" is 1 if the device * can concurrently copy memory between host and device while executing a * kernel, or 0 if not. Deprecated, use instead asyncEngineCount. * - \ref ::cudaDeviceProp::multiProcessorCount "multiProcessorCount" is the * number of multiprocessors on the device; * - \ref ::cudaDeviceProp::kernelExecTimeoutEnabled "kernelExecTimeoutEnabled" * is 1 if there is a run time limit for kernels executed on the device, or * 0 if not. * - \ref ::cudaDeviceProp::integrated "integrated" is 1 if the device is an * integrated (motherboard) GPU and 0 if it is a discrete (card) component. * - \ref ::cudaDeviceProp::canMapHostMemory "canMapHostMemory" is 1 if the * device can map host memory into the CUDA address space for use with * ::cudaHostAlloc()/::cudaHostGetDevicePointer(), or 0 if not; * - \ref ::cudaDeviceProp::computeMode "computeMode" is the compute mode * that the device is currently in. Available modes are as follows: * - cudaComputeModeDefault: Default mode - Device is not restricted and * multiple threads can use ::cudaSetDevice() with this device. * - cudaComputeModeExclusive: Compute-exclusive mode - Only one thread will * be able to use ::cudaSetDevice() with this device. * - cudaComputeModeProhibited: Compute-prohibited mode - No threads can use * ::cudaSetDevice() with this device. * - cudaComputeModeExclusiveProcess: Compute-exclusive-process mode - Many * threads in one process will be able to use ::cudaSetDevice() with this device. * <br> If ::cudaSetDevice() is called on an already occupied \p device with * computeMode ::cudaComputeModeExclusive, ::cudaErrorDeviceAlreadyInUse * will be immediately returned indicating the device cannot be used. * When an occupied exclusive mode device is chosen with ::cudaSetDevice, * all subsequent non-device management runtime functions will return * ::cudaErrorDevicesUnavailable. * - \ref ::cudaDeviceProp::maxTexture1D "maxTexture1D" is the maximum 1D * texture size. * - \ref ::cudaDeviceProp::maxTexture1DMipmap "maxTexture1DMipmap" is the maximum * 1D mipmapped texture texture size. * - \ref ::cudaDeviceProp::maxTexture1DLinear "maxTexture1DLinear" is the maximum * 1D texture size for textures bound to linear memory. * - \ref ::cudaDeviceProp::maxTexture2D "maxTexture2D[2]" contains the maximum * 2D texture dimensions. * - \ref ::cudaDeviceProp::maxTexture2DMipmap "maxTexture2DMipmap[2]" contains the * maximum 2D mipmapped texture dimensions. * - \ref ::cudaDeviceProp::maxTexture2DLinear "maxTexture2DLinear[3]" contains the * maximum 2D texture dimensions for 2D textures bound to pitch linear memory. * - \ref ::cudaDeviceProp::maxTexture2DGather "maxTexture2DGather[2]" contains the * maximum 2D texture dimensions if texture gather operations have to be performed. * - \ref ::cudaDeviceProp::maxTexture3D "maxTexture3D[3]" contains the maximum * 3D texture dimensions. * - \ref ::cudaDeviceProp::maxTexture3DAlt "maxTexture3DAlt[3]" * contains the maximum alternate 3D texture dimensions. * - \ref ::cudaDeviceProp::maxTextureCubemap "maxTextureCubemap" is the * maximum cubemap texture width or height. * - \ref ::cudaDeviceProp::maxTexture1DLayered "maxTexture1DLayered[2]" contains * the maximum 1D layered texture dimensions. * - \ref ::cudaDeviceProp::maxTexture2DLayered "maxTexture2DLayered[3]" contains * the maximum 2D layered texture dimensions. * - \ref ::cudaDeviceProp::maxTextureCubemapLayered "maxTextureCubemapLayered[2]" * contains the maximum cubemap layered texture dimensions. * - \ref ::cudaDeviceProp::maxSurface1D "maxSurface1D" is the maximum 1D * surface size. * - \ref ::cudaDeviceProp::maxSurface2D "maxSurface2D[2]" contains the maximum * 2D surface dimensions. * - \ref ::cudaDeviceProp::maxSurface3D "maxSurface3D[3]" contains the maximum * 3D surface dimensions. * - \ref ::cudaDeviceProp::maxSurface1DLayered "maxSurface1DLayered[2]" contains * the maximum 1D layered surface dimensions. * - \ref ::cudaDeviceProp::maxSurface2DLayered "maxSurface2DLayered[3]" contains * the maximum 2D layered surface dimensions. * - \ref ::cudaDeviceProp::maxSurfaceCubemap "maxSurfaceCubemap" is the maximum * cubemap surface width or height. * - \ref ::cudaDeviceProp::maxSurfaceCubemapLayered "maxSurfaceCubemapLayered[2]" * contains the maximum cubemap layered surface dimensions. * - \ref ::cudaDeviceProp::surfaceAlignment "surfaceAlignment" specifies the * alignment requirements for surfaces. * - \ref ::cudaDeviceProp::concurrentKernels "concurrentKernels" is 1 if the * device supports executing multiple kernels within the same context * simultaneously, or 0 if not. It is not guaranteed that multiple kernels * will be resident on the device concurrently so this feature should not be * relied upon for correctness; * - \ref ::cudaDeviceProp::ECCEnabled "ECCEnabled" is 1 if the device has ECC * support turned on, or 0 if not. * - \ref ::cudaDeviceProp::pciBusID "pciBusID" is the PCI bus identifier of * the device. * - \ref ::cudaDeviceProp::pciDeviceID "pciDeviceID" is the PCI device * (sometimes called slot) identifier of the device. * - \ref ::cudaDeviceProp::pciDomainID "pciDomainID" is the PCI domain identifier * of the device. * - \ref ::cudaDeviceProp::tccDriver "tccDriver" is 1 if the device is using a * TCC driver or 0 if not. * - \ref ::cudaDeviceProp::asyncEngineCount "asyncEngineCount" is 1 when the * device can concurrently copy memory between host and device while executing * a kernel. It is 2 when the device can concurrently copy memory between host * and device in both directions and execute a kernel at the same time. It is * 0 if neither of these is supported. * - \ref ::cudaDeviceProp::unifiedAddressing "unifiedAddressing" is 1 if the device * shares a unified address space with the host and 0 otherwise. * - \ref ::cudaDeviceProp::memoryClockRate "memoryClockRate" is the peak memory * clock frequency in kilohertz. * - \ref ::cudaDeviceProp::memoryBusWidth "memoryBusWidth" is the memory bus width * in bits. * - \ref ::cudaDeviceProp::l2CacheSize "l2CacheSize" is L2 cache size in bytes. * - \ref ::cudaDeviceProp::persistingL2CacheMaxSize "persistingL2CacheMaxSize" is L2 cache's maximum persisting lines size in bytes. * - \ref ::cudaDeviceProp::maxThreadsPerMultiProcessor "maxThreadsPerMultiProcessor" * is the number of maximum resident threads per multiprocessor. * - \ref ::cudaDeviceProp::streamPrioritiesSupported "streamPrioritiesSupported" * is 1 if the device supports stream priorities, or 0 if it is not supported. * - \ref ::cudaDeviceProp::globalL1CacheSupported "globalL1CacheSupported" * is 1 if the device supports caching of globals in L1 cache, or 0 if it is not supported. * - \ref ::cudaDeviceProp::localL1CacheSupported "localL1CacheSupported" * is 1 if the device supports caching of locals in L1 cache, or 0 if it is not supported. * - \ref ::cudaDeviceProp::sharedMemPerMultiprocessor "sharedMemPerMultiprocessor" is the * maximum amount of shared memory available to a multiprocessor in bytes; this amount is * shared by all thread blocks simultaneously resident on a multiprocessor; * - \ref ::cudaDeviceProp::regsPerMultiprocessor "regsPerMultiprocessor" is the maximum number * of 32-bit registers available to a multiprocessor; this number is shared * by all thread blocks simultaneously resident on a multiprocessor; * - \ref ::cudaDeviceProp::managedMemory "managedMemory" * is 1 if the device supports allocating managed memory on this system, or 0 if it is not supported. * - \ref ::cudaDeviceProp::isMultiGpuBoard "isMultiGpuBoard" * is 1 if the device is on a multi-GPU board (e.g. Gemini cards), and 0 if not; * - \ref ::cudaDeviceProp::multiGpuBoardGroupID "multiGpuBoardGroupID" is a unique identifier * for a group of devices associated with the same board. * Devices on the same multi-GPU board will share the same identifier; * - \ref ::cudaDeviceProp::singleToDoublePrecisionPerfRatio "singleToDoublePrecisionPerfRatio" * is the ratio of single precision performance (in floating-point operations per second) * to double precision performance. * - \ref ::cudaDeviceProp::pageableMemoryAccess "pageableMemoryAccess" is 1 if the device supports * coherently accessing pageable memory without calling cudaHostRegister on it, and 0 otherwise. * - \ref ::cudaDeviceProp::concurrentManagedAccess "concurrentManagedAccess" is 1 if the device can * coherently access managed memory concurrently with the CPU, and 0 otherwise. * - \ref ::cudaDeviceProp::computePreemptionSupported "computePreemptionSupported" is 1 if the device * supports Compute Preemption, and 0 otherwise. * - \ref ::cudaDeviceProp::canUseHostPointerForRegisteredMem "canUseHostPointerForRegisteredMem" is 1 if * the device can access host registered memory at the same virtual address as the CPU, and 0 otherwise. * - \ref ::cudaDeviceProp::cooperativeLaunch "cooperativeLaunch" is 1 if the device supports launching * cooperative kernels via ::cudaLaunchCooperativeKernel, and 0 otherwise. * - \ref ::cudaDeviceProp::cooperativeMultiDeviceLaunch "cooperativeMultiDeviceLaunch" is 1 if the device * supports launching cooperative kernels via ::cudaLaunchCooperativeKernelMultiDevice, and 0 otherwise. * - \ref ::cudaDeviceProp::pageableMemoryAccessUsesHostPageTables "pageableMemoryAccessUsesHostPageTables" is 1 if the device accesses * pageable memory via the host's page tables, and 0 otherwise. * - \ref ::cudaDeviceProp::directManagedMemAccessFromHost "directManagedMemAccessFromHost" is 1 if the host can directly access managed * memory on the device without migration, and 0 otherwise. * - \ref ::cudaDeviceProp::maxBlocksPerMultiProcessor "maxBlocksPerMultiProcessor" is the maximum number of thread blocks * that can reside on a multiprocessor. * - \ref ::cudaDeviceProp::accessPolicyMaxWindowSize "accessPolicyMaxWindowSize" is * the maximum value of ::cudaAccessPolicyWindow::num_bytes. * * \param prop - Properties for the specified device * \param device - Device number to get properties for * * \return * ::cudaSuccess, * ::cudaErrorInvalidDevice * \notefnerr * \note_init_rt * \note_callback * * \sa ::cudaGetDeviceCount, ::cudaGetDevice, ::cudaSetDevice, ::cudaChooseDevice, * ::cudaDeviceGetAttribute, * ::cuDeviceGetAttribute, * ::cuDeviceGetName */ extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device);