darknet编译GPU、CUDNN

错误:/src/convolutional_layer.c:153:13: error: 'CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT' undeclared (first use in this function);

修改出错的文件src/convolutional_layer.c的代码,增加针对CUDNN_MAJOR>=8的处理:

#ifdef GPU

#ifdef CUDNN

void cudnn_convolutional_setup(layer *l)

{

 cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 

 cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 



    cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 

    cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 

    cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); 




    cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); 

    cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); 

    #if CUDNN_MAJOR >= 6

    cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);

    #else

    cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);

    #endif




    #if CUDNN_MAJOR >= 7

    cudnnSetConvolutionGroupCount(l->convDesc, l->groups);

    #else

    if(l->groups > 1){

        error("CUDNN < 7 doesn't support groups, please upgrade!");

    }

    #endif

   #if CUDNN_MAJOR >= 8

    int returnedAlgoCount;

    cudnnConvolutionFwdAlgoPerf_t       fw_results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT];

    cudnnConvolutionBwdDataAlgoPerf_t   bd_results[2 * CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT];

    cudnnConvolutionBwdFilterAlgoPerf_t bf_results[2 * CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT];




    cudnnFindConvolutionForwardAlgorithm(cudnn_handle(),

            l->srcTensorDesc,

            l->weightDesc,

            l->convDesc,

            l->dstTensorDesc,

            CUDNN_CONVOLUTION_FWD_ALGO_COUNT,

            &returnedAlgoCount,

        fw_results);

    for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){

        #if PRINT_CUDNN_ALGO > 0

        printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",

               cudnnGetErrorString(fw_results[algoIndex].status),

               fw_results[algoIndex].algo, fw_results[algoIndex].time,

               (unsigned long long)fw_results[algoIndex].memory);

        #endif

        if( fw_results[algoIndex].memory < MEMORY_LIMIT ){

            l->fw_algo = fw_results[algoIndex].algo;

            break;

    }

    }




    cudnnFindConvolutionBackwardDataAlgorithm(cudnn_handle(),

            l->weightDesc,

            l->ddstTensorDesc,

            l->convDesc,

            l->dsrcTensorDesc,

            CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT,

            &returnedAlgoCount,

            bd_results);

    for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){

        #if PRINT_CUDNN_ALGO > 0

        printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",

               cudnnGetErrorString(bd_results[algoIndex].status),

               bd_results[algoIndex].algo, bd_results[algoIndex].time,

               (unsigned long long)bd_results[algoIndex].memory);

        #endif

        if( bd_results[algoIndex].memory < MEMORY_LIMIT ){

            l->bd_algo = bd_results[algoIndex].algo;

            break;

        }

    }




    cudnnFindConvolutionBackwardFilterAlgorithm(cudnn_handle(),

            l->srcTensorDesc,

            l->ddstTensorDesc,

            l->convDesc,

            l->dweightDesc,

            CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT,

            &returnedAlgoCount,

            bf_results);

    for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){

        #if PRINT_CUDNN_ALGO > 0

        printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",

               cudnnGetErrorString(bf_results[algoIndex].status),

               bf_results[algoIndex].algo, bf_results[algoIndex].time,

               (unsigned long long)bf_results[algoIndex].memory);

        #endif

        if( bf_results[algoIndex].memory < MEMORY_LIMIT ){

            l->bf_algo = bf_results[algoIndex].algo;

            break;

        }

    }




    #else




    cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),

            l->srcTensorDesc,

            l->weightDesc,

            l->convDesc,

            l->dstTensorDesc,

            CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,

            2000000000,

            &l->fw_algo);

    cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),

            l->weightDesc,

            l->ddstTensorDesc,

            l->convDesc,

            l->dsrcTensorDesc,

            CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,

            2000000000,

            &l->bd_algo);

    cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),

            l->srcTensorDesc,

            l->ddstTensorDesc,

            l->convDesc,

            l->dweightDesc,

            CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,

            2000000000,

            &l->bf_algo);

    #endif

}

#endif

#endif


增加声明
#define MEMORY_LIMIT 2000000000

错误:nvcc fatal   : Unsupported gpu architecture 'compute_30'

把Makefile里的配置修改一下,去掉ARCH配置中的 -gencode arch=compute_30,code=sm_30 \ 这行,改成下面这样即可:

ARCH= -gencode arch=compute_35,code=sm_35 \
      -gencode arch=compute_50,code=[sm_50,compute_50] \
      -gencode arch=compute_52,code=[sm_52,compute_52] \
      -gencode arch=compute_70,code=[sm_70,compute_70] \
      -gencode arch=compute_75,code=[sm_75,compute_75]\
      -gencode arch=compute_86,code=[sm_86,compute_86]

posted @ 2021-12-14 13:33  wuyuan2011woaini  阅读(945)  评论(0编辑  收藏  举报