Fork me on GitHub

opencv之安装与cuda冲突问题

背景

背景: 由于工作需要, 需要在现有opencv 3.3.1版本上安装opencv2.4.11版本, 安装目录为/usr/local/opencv-2.4/下, 根据多版本安装教程, 出现了opencv与cuda的安装冲突.

编译出错如下:

CMake Error: The following variables are used in this project, but they are set to NOTFOUND. Please set them or make sure they are set and tested correctly in the CMake files: CUDA_nppi_LIBRARY (ADVANCED)

解决方案

根据google上某神人的总结:

When trying to compile OpenCV 2, for example OpenCV 2.4.13.6, with CUDA 9 there are mainly two issues:

The nppi library was splitted up under CUDA 9 into a series of libraries, preventing the shipped `FindCUDA.cma# 参考

and the FindCUDA.cmake does not handle the latest GPU architectures correctly.

The first problem can be fixed following this StackOverflow question. Specifically, adapting FindCUDA.cmake as follows:

replace

find_cuda_helper_libs(nppi)

with

find_cuda_helper_libs(nppial)
find_cuda_helper_libs(nppicc)
find_cuda_helper_libs(nppicom)
find_cuda_helper_libs(nppidei)
find_cuda_helper_libs(nppif)
find_cuda_helper_libs(nppig)
find_cuda_helper_libs(nppim)
find_cuda_helper_libs(nppist)
find_cuda_helper_libs(nppisu)
find_cuda_helper_libs(nppitc)

A few lines below, the set statement for CUDA_npp_LIBRARY needs to reflect these changes:

repalce

set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}")

with

set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppial_LIBRARY};${CUDA_nppicc_LIBRARY};${CUDA_nppicom_LIBRARY};${CUDA_nppidei_LIBRARY};${CUDA_nppif_LIBRARY};${CUDA_nppig_LIBRARY};${CUDA_nppim_LIBRARY};${CUDA_nppist_LIBRARY};${CUDA_nppisu_LIBRARY};${CUDA_nppitc_LIBRARY};${CUDA_npps_LIBRARY}")

Similarly,

replace

unset(CUDA_nppi_LIBRARY CACHE)

with

unset(CUDA_nppial_LIBRARY CACHE)
unset(CUDA_nppicc_LIBRARY CACHE)
unset(CUDA_nppicom_LIBRARY CACHE)
unset(CUDA_nppidei_LIBRARY CACHE)
unset(CUDA_nppif_LIBRARY CACHE)
unset(CUDA_nppig_LIBRARY CACHE)
unset(CUDA_nppim_LIBRARY CACHE)
unset(CUDA_nppist_LIBRARY CACHE)
unset(CUDA_nppisu_LIBRARY CACHE)
unset(CUDA_nppitc_LIBRARY CACHE)

In OpenCVDetectCuda.cmake, two more adjustements are necessary to tackle the second problem. In particular, the _generations variable needs to reflect the latest GPU generations and needs to correctly map them to the corresponding compute capabilities. To this end,

replace

set(_generations "Fermi" "Kepler")

with

set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta")

can be used. Then, a few lines below, the case distinction needs to include these generations:

replace

 set(__cuda_arch_ptx "")
  if(CUDA_GENERATION STREQUAL "Fermi")
    set(__cuda_arch_bin "2.0 2.1(2.0)")
  elseif(CUDA_GENERATION STREQUAL "Kepler")
    if(${CUDA_VERSION} VERSION_LESS "5.0")
      set(__cuda_arch_bin "3.0")
    else()
      set(__cuda_arch_bin "3.0 3.5")
    endif()
  elseif(CUDA_GENERATION STREQUAL "Auto")
    execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
                     WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
                     RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
                     ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
    if(NOT _nvcc_res EQUAL 0)
      message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
    else()
      set(__cuda_arch_bin "${_nvcc_out}")
      string(# 参考

+ https://davidstutz.de/compiling-opencv-2-4-x-with-cuda-9/

+ https://medium.com/@mengjiunchiou/build-opencv-caffe-with-cuda-9-0-on-ubuntu-16-04-b2794a41612d

+ https://en.wikipedia.org/wiki/CUDA#GPUs_supportedREPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")
    endif()
  endif()

with

set(__cuda_arch_ptx "")
  if(CUDA_GENERATION STREQUAL "Fermi")
    set(__cuda_arch_bin "2.0")
  elseif(CUDA_GENERATION STREQUAL "Kepler")
    set(__cuda_arch_bin "3.0 3.5 3.7")
  elseif(CUDA_GENERATION STREQUAL "Maxwell")
    set(__cuda_arch_bin "5.0 5.2")
  elseif(CUDA_GENERATION STREQUAL "Pascal")
    set(__cuda_arch_bin "6.0 6.1")
  elseif(CUDA_GENERATION STREQUAL "Volta")
    set(__cuda_arch_bin "7.0")
  elseif(CUDA_GENERATION STREQUAL "Auto")
    execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
    RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
    if(NOT _nvcc_res EQUAL 0)
    message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
    else()
    set(__cuda_arch_bin "${_nvcc_out}")
    string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")
    endif()
  endif()

注意: 该处不要替换错了, 容易出错.

Finally, to avoid compilation errors, the NVCC flag --expt-relaxed-constexpr needs to be set. To this end, FindCUDA.cmake and OpenCVDetectCuda.cmake needs to be adapted:

replace

set(nvcc_flags "")

with

set(nvcc_flags "--expt-relaxed-constexpr")

OpenCV 2 should now be ready to be compiled with CUDA 9. As the correct GPU generation might not be selected automatically, make sure to use -DCUDA_GENERATION when running CMake to set the correct generation.

  • 在wiki上查看Compute capability (version)

对于GTX 1060来说, 对应的是Pascal

cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/opencv-2.4 -DCUDA_GENERATION="Pascal" ..
  • 但是问题没有这么简单, bug又来了
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:135:18: error: ‘NppiGraphcutState’ does not name a type
         operator NppiGraphcutState*()
                  ^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:141:9: error: ‘NppiGraphcutState’ does not name a type
         NppiGraphcutState* pState;
         ^
In file included from /home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/cuda/internal_shared.hpp:50:0,
                 from /home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/precomp.hpp:105,
                 from /home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:43:
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp: In constructor ‘{anonymous}::NppiGraphcutStateHandler::NppiGraphcutStateHandler(NppiSize, Npp8u*, {anonymous}::init_func_t)’:
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:127:39: error: ‘pState’ was not declared in this scope
             nppSafeCall( func(sznpp, &pState, pDeviceMem) );
                                       ^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/cuda/safe_call.hpp:84:43: note: in definition of macro ‘nppSafeCall’
 #define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
                                           ^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp: In destructor ‘{anonymous}::NppiGraphcutStateHandler::~NppiGraphcutStateHandler()’:
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:132:43: error: ‘pState’ was not declared in this scope
             nppSafeCall( nppiGraphcutFree(pState) );
                                           ^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/cuda/safe_call.hpp:84:43: note: in definition of macro ‘nppSafeCall’
 #define nppSafeCall(expr)  ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
                                           ^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:132:49: error: ‘nppiGraphcutFree’ was not declared in this scope
             nppSafeCall( nppiGraphcutFree(pState) );

然后又参考了google一大神

修改modules/gpu/src/graphcuts.cpp, find the line,

replace

#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)

with

#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || (CUDART_VERSION >= 8000)

再编译一次, 成功了!!!!!! 高兴的哭了!!!!

参考

posted @ 2021-06-26 17:22  chrislzy  阅读(805)  评论(0编辑  收藏  举报