caffe 报错与解决
1.error while loading shared libraries: libcaffe.so.1.0.0: cannot open shared object file: No such file or directory
error while loading shared libraries: libcaffe.so.1.0.0: cannot open shared object file: No such file or directory
直接从一个地方拷贝的caffe,然后运行的。
解决方法:
export LD_LIBRARY_PATH=/data_1/model_train/light_on/caffe_multi_focalloss_se/build_310/lib:$LD_LIBRARY_PATH
2.src/caffe/common.cpp:35:5: error: ‘::gflags’ has not been declared
CXX src/caffe/solver.cpp
CXX src/caffe/internal_thread.cpp
CXX src/caffe/common.cpp
src/caffe/common.cpp: In function ‘void caffe::GlobalInit(int*, char***)’:
src/caffe/common.cpp:35:5: error: ‘::gflags’ has not been declared
::gflags::ParseCommandLineFlags(pargc, pargv, true);
^
make: *** [.build_release/src/caffe/common.o] Error 1
解决方案:
https://github.com/BVLC/caffe/blob/master/include/caffe/common.hpp#L21-L28
注释掉ifndef
I also got this problem, and I installed gflags by myself before .
And I solved this problem :
in the file include/caffe/common.hpp
//#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
//#endif // GFLAGS_GFLAGS_H_
comment ;
namespace gflags = google;
this code is important
qt工程跑refinedet前向工程报错:
g++ -Wl,-rpath,/home/yhl/Qt5.9.2/5.9.2/gcc_64/lib -o refinedet main.o -L/home/yhl/software_install/opencv3.2/lib -L/data_1/2021biaozhushuju/obj_vehicle/RefineDet-master/build/lib -L/usr/local/ssl/lib -L/usr/local/cuda-8.0/lib64/ -L/usr/lib/x86_64-linux-gnu/hdf5/serial -L/lib/x86_64-linux-gnu/ -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_video -lopencv_imgcodecs -lopencv_videoio -lboost_serialization -lboost_system -lboost_filesystem -lglog -lcaffe -lhdf5 -lhdf5_hl -lboost_thread -lprotobuf -latlas -lssl3 -ldl -lrt /usr/lib/x86_64-linux-gnu/libgflags.a -lcublas_static -lcudart -lculibos -lcurand_static -lcudnn -lcurand -L/home/yhl/Qt5.9.2/5.9.2/gcc_64/lib -lQt5Core -lpthread
/usr/bin/ld: /data_1/2021biaozhushuju/obj_vehicle/RefineDet-master/build/lib/libcaffe.so: undefined reference to symbol 'cudaGetDevice'
/usr/local/cuda-8.0/lib64//libcudart.so: error adding symbols: DSO missing from command line
Makefile:227: recipe for target 'refinedet' failed
很奇怪的错误。。
其中我的pro文件这么写的:
QT += core
QT -= gui
CONFIG += c++11
TARGET = refinedet
CONFIG += console
CONFIG -= app_bundle
TEMPLATE = app
DEFINES += USE_OPENCV
QMAKE_CXXFLAGS += -std=c++11
OPENCV_ROOT_PATH = /home/yhl/software_install/opencv3.2
CAFFE_ROOT_PATH = /data_1/2021biaozhushuju/obj_vehicle/RefineDet-master
INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/
INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/opencv
INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/opencv2
INCLUDEPATH += $${CAFFE_ROOT_PATH}/include/
#INCLUDEPATH += $${CAFFE_ROOT_PATH}/build/src/
INCLUDEPATH += $${CAFFE_ROOT_PATH}/build/include/
INCLUDEPATH += /usr/local/cuda/include
INCLUDEPATH += /usr/local/include/node
LIBS += -L$${OPENCV_ROOT_PATH}/lib
LIBS += -L$${CAFFE_ROOT_PATH}/build/lib
LIBS += -L/usr/local/ssl/lib
LIBS += -L/usr/local/cuda-8.0/lib64/
LIBS += -L/usr/lib/x86_64-linux-gnu/hdf5/serial
LIBS += -L/lib/x86_64-linux-gnu/
LIBS += -lopencv_core
LIBS += -lopencv_imgproc
LIBS += -lopencv_highgui
LIBS += -lopencv_video
LIBS += -lopencv_imgcodecs
LIBS += -lopencv_videoio
LIBS += -lboost_serialization
LIBS += -lboost_system
LIBS += -lboost_filesystem
LIBS += -lglog
LIBS += -lcaffe
LIBS += -lhdf5
LIBS += -lhdf5_hl
LIBS += -lboost_thread
LIBS += -lprotobuf
LIBS += -latlas
LIBS += -lssl3
LIBS += -lpthread
LIBS += -ldl
LIBS += -lrt
LIBS += /usr/lib/x86_64-linux-gnu/libgflags.a
LIBS += -lcublas_static
LIBS += -lcudart
LIBS += -lculibos
LIBS += -lcurand_static
LIBS += -lcudnn
LIBS += -lcurand
SOURCES += main.cpp
折腾了一会儿,我把和cuda相关的库放到了前面:
QT += core
QT -= gui
CONFIG += c++11
TARGET = refinedet
CONFIG += console
CONFIG -= app_bundle
TEMPLATE = app
DEFINES += USE_OPENCV
QMAKE_CXXFLAGS += -std=c++11
OPENCV_ROOT_PATH = /home/yhl/software_install/opencv3.2
CAFFE_ROOT_PATH = /data_1/2021biaozhushuju/obj_vehicle/RefineDet-master
INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/
INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/opencv
INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/opencv2
INCLUDEPATH += $${CAFFE_ROOT_PATH}/include/
#INCLUDEPATH += $${CAFFE_ROOT_PATH}/build/src/
INCLUDEPATH += $${CAFFE_ROOT_PATH}/build/include/
INCLUDEPATH += /usr/local/cuda/include
INCLUDEPATH += /usr/local/include/node
LIBS += -L$${OPENCV_ROOT_PATH}/lib
LIBS += -L$${CAFFE_ROOT_PATH}/build/lib
LIBS += -L/usr/local/ssl/lib
LIBS += -L/usr/local/cuda-8.0/lib64/
LIBS += -L/usr/lib/x86_64-linux-gnu/hdf5/serial
LIBS += -L/lib/x86_64-linux-gnu/
LIBS += -lcublas_static
LIBS += -lcudart
LIBS += -lculibos
LIBS += -lcurand_static
LIBS += -lcudnn
LIBS += -lcurand
LIBS += -lopencv_core
LIBS += -lopencv_imgproc
LIBS += -lopencv_highgui
LIBS += -lopencv_video
LIBS += -lopencv_imgcodecs
LIBS += -lopencv_videoio
LIBS += -lboost_serialization
LIBS += -lboost_system
LIBS += -lboost_filesystem
LIBS += -lglog
LIBS += -lcaffe
LIBS += -lhdf5
LIBS += -lhdf5_hl
LIBS += -lboost_thread
LIBS += -lprotobuf
LIBS += -latlas
LIBS += -lssl3
LIBS += -lpthread
LIBS += -ldl
LIBS += -lrt
LIBS += /usr/lib/x86_64-linux-gnu/libgflags.a
SOURCES += main.cpp
cuda相关的库放前面就可以了。居然和顺序有关,具体啥原因不清楚。
caffe-lstm 报错:
CXX src/caffe/common.cpp
CXX src/caffe/3rdparty/ctc_entrypoint.cpp
In file included from /usr/include/c++/5/tuple:35:0,
from ./include/caffe/3rdparty/detail/cpu_ctc.cuh:3,
from src/caffe/3rdparty/ctc_entrypoint.cpp:7:
/usr/include/c++/5/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support must be enabled with the -std=c++11 or -std=gnu++11 compiler options.
#error This file requires compiler and library support \
^
In file included from src/caffe/3rdparty/ctc_entrypoint.cpp:7:0:
./include/caffe/3rdparty/detail/cpu_ctc.cuh:82:10: error: ‘tuple’ in namespace ‘std’ does not name a template type
std::tuple<ProbT, bool>
^
./include/caffe/3rdparty/detail/cpu_ctc.cuh:188:6: error: ‘tuple’ in namespace ‘std’ does not name a template type
std::tuple<ProbT, bool>
^
./include/caffe/3rdparty/detail/cpu_ctc.cuh: In member function ‘ctcStatus_t CpuCTC<ProbT>::cost_and_grad(const ProbT*, ProbT*, ProbT*, const int*, const int*, const int*)’:
./include/caffe/3rdparty/detail/cpu_ctc.cuh:383:24: error: ‘nullptr’ was not declared in this scope
if (activations == nullptr ||
^
./include/caffe/3rdparty/detail/cpu_ctc.cuh:425:9: error: ‘tie’ is not a member of ‘std’
std::tie(costs[mb], mb_status) =
^
./include/caffe/3rdparty/detail/cpu_ctc.cuh: In member function ‘ctcStatus_t CpuCTC<ProbT>::score_forward(const ProbT*, ProbT*, const int*, const int*, const int*)’:
./include/caffe/3rdparty/detail/cpu_ctc.cuh:442:24: error: ‘nullptr’ was not declared in this scope
if (activations == nullptr ||
^
src/caffe/3rdparty/ctc_entrypoint.cpp: In function ‘ctcStatus_t compute_ctc_loss(const float*, float*, const int*, const int*, const int*, int, int, float*, void*, ctcOptions)’:
src/caffe/3rdparty/ctc_entrypoint.cpp:69:24: error: ‘nullptr’ was not declared in this scope
if (activations == nullptr ||
^
src/caffe/3rdparty/ctc_entrypoint.cpp: In function ‘ctcStatus_t get_workspace_size(const int*, const int*, int, int, ctcOptions, size_t*)’:
src/caffe/3rdparty/ctc_entrypoint.cpp:99:26: error: ‘nullptr’ was not declared in this scope
if (label_lengths == nullptr ||
^
In file included from src/caffe/3rdparty/ctc_entrypoint.cpp:7:0:
./include/caffe/3rdparty/detail/cpu_ctc.cuh: In instantiation of ‘ctcStatus_t CpuCTC<ProbT>::cost_and_grad(const ProbT*, ProbT*, ProbT*, const int*, const int*, const int*) [with ProbT = float]’:
src/caffe/3rdparty/ctc_entrypoint.cpp:52:45: required from here
./include/caffe/3rdparty/detail/cpu_ctc.cuh:426:37: error: ‘cost_and_grad_kernel’ was not declared in this scope
cost_and_grad_kernel(grads + mb * alphabet_size_,
^
Makefile:577: recipe for target '.build_release/src/caffe/3rdparty/ctc_entrypoint.o' failed
make: *** [.build_release/src/caffe/3rdparty/ctc_entrypoint.o] Error 1
make: *** 正在等待未完成的任务....
src/caffe/common.cpp: In function ‘void caffe::GlobalInit(int*, char***)’:
src/caffe/common.cpp:45:5: error: ‘::gflags’ has not been declared
::gflags::ParseCommandLineFlags(pargc, pargv, true);
^
Makefile:577: recipe for target '.build_release/src/caffe/common.o' failed
make: *** [.build_release/src/caffe/common.o] Error 1
解决方案
error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support is currently experimental, and must be enabled with the -std=c++11 or -std=gnu++11 compiler op
caffe c++11编译问题
问题:error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support is currently experimental, and must be enabled with the -std=c++11 or -std=gnu++11 compiler options.
解决:修改Makefile文件
CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS) -std=c++11
NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS) -std=c++11
LINKFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS) -std=c++11
CXX src/caffe/net.cpp
NVCC src/caffe/layers/cudnn_lrn_layer.cu
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9220): error: argument of type "const void *" is incompatible with parameter of type "const float *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9231): error: argument of type "const void *" is incompatible with parameter of type "const float *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9244): error: argument of type "const void *" is incompatible with parameter of type "const double *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9255): error: argument of type "const void *" is incompatible with parameter of type "const double *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9268): error: argument of type "const void *" is incompatible with parameter of type "const float *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9279): error: argument of type "const void *" is incompatible with parameter of type "const float *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9292): error: argument of type "const void *" is incompatible with parameter of type "const double *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9303): error: argument of type "const void *" is incompatible with parameter of type "const double *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9316): error: argument of type "const void *" is incompatible with parameter of type "const int *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9327): error: argument of type "const void *" is incompatible with parameter of type "const int *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9340): error: argument of type "const void *" is incompatible with parameter of type "const long long *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9352): error: argument of type "const void *" is incompatible with parameter of type "const long long *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9365): error: argument of type "const void *" is incompatible with parameter of type "const int *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9376): error: argument of type "const void *" is incompatible with parameter of type "const int *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9389): error: argument of type "const void *" is incompatible with parameter of type "const long long *"
/usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9401): error: argument of type "const void *" is incompatible with parameter of type "const long long *"
这个错误真难解决,因为在服务器上,一些环境不敢随便动。
然后找来高手:
cd /usr/bin
gcc 再敲键盘tab键
gcc
gcc gcc-5 gcc-7 gcc-ar-4.9 gcc-ar-7 gcc-nm-4.9 gcc-nm-7 gcc-ranlib-4.9 gcc-ranlib-7
gcc-4.9 gcc_7 gcc-ar gcc-ar-5 gcc-nm gcc-nm-5 gcc-ranlib gcc-ranlib-5
g++
g++ g++-4.9 g++-5 g++-7
ln -snf gcc-4.9 gcc
ln -snf g++-4.9 g++
集群上面编译训练caffe
改了makefile里面CUDA_DIR,BLAS这些。然后加载gcc5.4。编是遍通过了,但是训练的时候报错。
说/usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.0找不到。
可是1.65我去/usr/lib/x86_64-linux-gnu目录下看是存在的啊。
ldd caffe看了也是链接的
libboost_system.so.1.67.0 => /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.0
然后在训练的sh文件加上
export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
也是不行!
无解!~
然后找来jiamin,他说编译的环境和实际运行的环境不一样! 敲命令:
srun --gres=gpu:1 --pty bash
进去一个环境,
ll /usr/lib/x86_64-linux-gnu/libboost*
ls: cannot access '/usr/lib/x86_64-linux-gnu/libboost*': No such file or directory
这个环境下面确实没有libboost_system.so.1.65.0
再ifconfig
提示现在ip是192.168.80.19
再exit退出:
查看ip是192.168.80.2
ip都不一样。
然后再srun --gres=gpu:1 --pty bash
环境下重新编译caffe。
然后编译好的caffe链接的是
libboost_system.so.1.67.0 => /home/nfs/admin0/apps/python/anaconda3/envs/py2.7_proc3.5/lib/libboost_system.so.1.67.0 (0x00001462e9873000)
之前链接的是:
libboost_system.so.1.65.1 => /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.1 (0x0000152e8f6e3000)。
然后就可以正常训练了!
厉害啊!还有这问题,编译的环境和实际运行的环境不一样。这个问题估计只有jiamin能解决了。
上面的就是编译的caffe-lstm,在服务器上面编译不过,报错,把gcc改成4.9解决一个报错,然后又报错提示protobuf问题,一查又说是gcc需要5.4才能解决。可以是服务器上面gcc是5.5.没有5.4
确实,拉到本地gcc是5.4版本的,编译可以过的。
不会把5.5降低到5.4. 然后就整不了了,去集群试试的。