opencv之安装与cuda冲突问题
背景
背景: 由于工作需要, 需要在现有opencv 3.3.1版本上安装opencv2.4.11版本, 安装目录为/usr/local/opencv-2.4/下, 根据多版本安装教程, 出现了opencv与cuda的安装冲突.
编译出错如下:
CMake Error: The following variables are used in this project, but they are set to NOTFOUND. Please set them or make sure they are set and tested correctly in the CMake files: CUDA_nppi_LIBRARY (ADVANCED)
解决方案
根据google上某神人的总结:
When trying to compile OpenCV 2, for example OpenCV 2.4.13.6, with CUDA 9 there are mainly two issues:
The
nppi
library was splitted up under CUDA 9 into a series of libraries, preventing the shipped `FindCUDA.cma# 参考
and the
FindCUDA.cmake
does not handle the latest GPU architectures correctly.
The first problem can be fixed following this StackOverflow question. Specifically, adapting FindCUDA.cmake
as follows:
replace
find_cuda_helper_libs(nppi)
with
find_cuda_helper_libs(nppial)
find_cuda_helper_libs(nppicc)
find_cuda_helper_libs(nppicom)
find_cuda_helper_libs(nppidei)
find_cuda_helper_libs(nppif)
find_cuda_helper_libs(nppig)
find_cuda_helper_libs(nppim)
find_cuda_helper_libs(nppist)
find_cuda_helper_libs(nppisu)
find_cuda_helper_libs(nppitc)
A few lines below, the set
statement for CUDA_npp_LIBRARY
needs to reflect these changes:
repalce
set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}")
with
set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppial_LIBRARY};${CUDA_nppicc_LIBRARY};${CUDA_nppicom_LIBRARY};${CUDA_nppidei_LIBRARY};${CUDA_nppif_LIBRARY};${CUDA_nppig_LIBRARY};${CUDA_nppim_LIBRARY};${CUDA_nppist_LIBRARY};${CUDA_nppisu_LIBRARY};${CUDA_nppitc_LIBRARY};${CUDA_npps_LIBRARY}")
Similarly,
replace
unset(CUDA_nppi_LIBRARY CACHE)
with
unset(CUDA_nppial_LIBRARY CACHE)
unset(CUDA_nppicc_LIBRARY CACHE)
unset(CUDA_nppicom_LIBRARY CACHE)
unset(CUDA_nppidei_LIBRARY CACHE)
unset(CUDA_nppif_LIBRARY CACHE)
unset(CUDA_nppig_LIBRARY CACHE)
unset(CUDA_nppim_LIBRARY CACHE)
unset(CUDA_nppist_LIBRARY CACHE)
unset(CUDA_nppisu_LIBRARY CACHE)
unset(CUDA_nppitc_LIBRARY CACHE)
In OpenCVDetectCuda.cmake
, two more adjustements are necessary to tackle the second problem. In particular, the _generations
variable needs to reflect the latest GPU generations and needs to correctly map them to the corresponding compute capabilities. To this end,
replace
set(_generations "Fermi" "Kepler")
with
set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta")
can be used. Then, a few lines below, the case distinction needs to include these generations:
replace
set(__cuda_arch_ptx "")
if(CUDA_GENERATION STREQUAL "Fermi")
set(__cuda_arch_bin "2.0 2.1(2.0)")
elseif(CUDA_GENERATION STREQUAL "Kepler")
if(${CUDA_VERSION} VERSION_LESS "5.0")
set(__cuda_arch_bin "3.0")
else()
set(__cuda_arch_bin "3.0 3.5")
endif()
elseif(CUDA_GENERATION STREQUAL "Auto")
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
else()
set(__cuda_arch_bin "${_nvcc_out}")
string(# 参考
+ https://davidstutz.de/compiling-opencv-2-4-x-with-cuda-9/
+ https://medium.com/@mengjiunchiou/build-opencv-caffe-with-cuda-9-0-on-ubuntu-16-04-b2794a41612d
+ https://en.wikipedia.org/wiki/CUDA#GPUs_supportedREPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")
endif()
endif()
with
set(__cuda_arch_ptx "")
if(CUDA_GENERATION STREQUAL "Fermi")
set(__cuda_arch_bin "2.0")
elseif(CUDA_GENERATION STREQUAL "Kepler")
set(__cuda_arch_bin "3.0 3.5 3.7")
elseif(CUDA_GENERATION STREQUAL "Maxwell")
set(__cuda_arch_bin "5.0 5.2")
elseif(CUDA_GENERATION STREQUAL "Pascal")
set(__cuda_arch_bin "6.0 6.1")
elseif(CUDA_GENERATION STREQUAL "Volta")
set(__cuda_arch_bin "7.0")
elseif(CUDA_GENERATION STREQUAL "Auto")
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
else()
set(__cuda_arch_bin "${_nvcc_out}")
string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")
endif()
endif()
注意: 该处不要替换错了, 容易出错.
Finally, to avoid compilation errors, the NVCC flag --expt-relaxed-constexpr
needs to be set. To this end, FindCUDA.cmake
and OpenCVDetectCuda.cmake
needs to be adapted:
replace
set(nvcc_flags "")
with
set(nvcc_flags "--expt-relaxed-constexpr")
OpenCV 2 should now be ready to be compiled with CUDA 9. As the correct GPU generation might not be selected automatically, make sure to use -DCUDA_GENERATION
when running CMake to set the correct generation.
- 在wiki上查看Compute capability (version)
对于GTX 1060来说, 对应的是Pascal
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/opencv-2.4 -DCUDA_GENERATION="Pascal" ..
- 但是问题没有这么简单, bug又来了
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:135:18: error: ‘NppiGraphcutState’ does not name a type
operator NppiGraphcutState*()
^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:141:9: error: ‘NppiGraphcutState’ does not name a type
NppiGraphcutState* pState;
^
In file included from /home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/cuda/internal_shared.hpp:50:0,
from /home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/precomp.hpp:105,
from /home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:43:
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp: In constructor ‘{anonymous}::NppiGraphcutStateHandler::NppiGraphcutStateHandler(NppiSize, Npp8u*, {anonymous}::init_func_t)’:
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:127:39: error: ‘pState’ was not declared in this scope
nppSafeCall( func(sznpp, &pState, pDeviceMem) );
^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/cuda/safe_call.hpp:84:43: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp: In destructor ‘{anonymous}::NppiGraphcutStateHandler::~NppiGraphcutStateHandler()’:
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:132:43: error: ‘pState’ was not declared in this scope
nppSafeCall( nppiGraphcutFree(pState) );
^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/cuda/safe_call.hpp:84:43: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
^
/home/liuzhiyang/app/code/opencv-2.4.11/modules/gpu/src/graphcuts.cpp:132:49: error: ‘nppiGraphcutFree’ was not declared in this scope
nppSafeCall( nppiGraphcutFree(pState) );
然后又参考了google一大神
修改modules/gpu/src/graphcuts.cpp
, find the line,
replace
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
with
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || (CUDART_VERSION >= 8000)
再编译一次, 成功了!!!!!! 高兴的哭了!!!!