多版本cuda环境管理

多版本cuda环境管理

1、anaconda安装

1.1 下载 Anaconda 脚本

#wget下载 Anaconda 安装脚本
root@bc23574385ad:~# wget -P ./ https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-2020.02-Linux-x86_64.sh

#检查包的完整性
root@bc23574385ad:~# sha256sum Anaconda3-2020.02-Linux-x86_64.sh 
2b9f088b2022edb474915d9f69a803d6449d5fdb4c303041f60ac4aefcc208bb  Anaconda3-2020.02-Linux-x86_64.sh

1.2 安装Anaconda

#运行脚本启动安装进程
root@bc23574385ad:~# bash Anaconda3-2020.02-Linux-x86_64.sh 

#你应该能看到下面的输出:
Welcome to Anaconda3 2020.02

In order to continue the installation process, please review the license
agreement.
Please, press ENTER to continue
>>> 

#按ENTER继续。往下滑动阅读协议,使用ENTER按键。一旦你看完协议,你将会被询问是否接受协议条款:
Do you accept the license terms? [yes|no]
[no] >>> yes

输入yes接受协议,并且你会被提示选择安装路径:
Anaconda3 will now be installed into this location:
/root/anaconda3         #这里是默认路径

- Press ENTER to confirm the location
- Press CTRL-C to abort the installation
- Or specify a different location below

[/root/anaconda3] >>> /usr/local/anaconda3    #可以在这里自定义新的的安装路径

#安装过程将会花费一些时间,并且一旦完成,脚本将会问你是否想要运行conda init。输入yes。
installation finished.
Do you wish the installer to initialize Anaconda3
by running conda init? [yes|no]
[no] >>> yes

#这将会将命令行工具conda添加到系统的PATH环境变量中。
#想要激活 Anaconda,你可以关闭并且重新打开你的 shell 或者在当前 shell 会话中输入下面的命令,来重新加载PATH环境变量:
root@bc23574385ad:~# source ~/.bashrc

#查看安装的版本
(base) root@bc23574385ad:~# conda -V
conda 4.8.2

1.3 添加清华源

#添加清华源
(base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
(base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
(base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
(base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/
(base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r/
(base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro/

2、完善python-3.7环境

名称 版本
cuda 10.2
cudnn 7.6
pytorch 1.6
tensorflow 2.3.0

2.1 创建python-3.7环境

#创建python-3.7环境
(base) root@bc23574385ad:~# conda create --name python-3.7 python=3.7
(base) root@bc23574385ad:~# conda activate python-3.7
#安装常用命令
(python-3.7) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ipython pandas pillow matplotlib setproctitle networkx scikit-learn scipy tqdm GPUtil jupyterlab notebook h5py statsmodels

2.2 安装CUDA

#下载cuda包
(python-3.7) root@bc23574385ad:~# wget https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run

#安装cuda
(python-3.7) root@bc23574385ad:~# sh cuda_10.2.89_440.33.01_linux.run --silent --toolkit --samples --librarypath=/usr/local/cuda-10.2
#设置软链接
(python-3.7) root@bc23574385ad:~# ln -s /usr/local/cuda-10.2/bin/nvcc /usr/bin/nvcc-python-3.7
(python-3.7) root@bc23574385ad:~# which nvcc-python-3.7
/usr/bin/nvcc-python-3.7

#检查安装的版本
(python-3.7) root@bc23574385ad:~# nvcc-python-3.7 -V
nvcc-python-3: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Wed_Oct_23_19:24:38_PDT_2019
Cuda compilation tools, release 10.2, V10.2.89
#测试 CUDA Toolkit 以验证是否安装成功
#Result = PASS则安装成功
(python-3.7) root@bc23574385ad:~# cd /usr/local/cuda-10.2/extras/demo_suite/
(python-3.7) root@bc23574385ad:/usr/local/cuda-10.2/extras/demo_suite# ./deviceQuery
......
deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 11.6, CUDA Runtime Version = 10.2, NumDevs = 4, Device0 = NVIDIA GeForce RTX 2080 Ti, Device1 = NVIDIA GeForce RTX 2080 Ti, Device2 = NVIDIA GeForce RTX 2080 Ti, Device3 = NVIDIA GeForce RTX 2080 Ti
Result = PASS

2.3 安装CUDNN

#下载cudnn包
(python-3.7) root@bc23574385ad:~# ll -d cudnn-10.2-linux-x64-v7.6.5.32.tgz 
-rw-r--r-- 1 root root 548210361 Mar 31 13:53 cudnn-10.2-linux-x64-v7.6.5.32.tgz

#解压缩
(python-3.7) root@bc23574385ad:~# tar xf cudnn-10.2-linux-x64-v7.6.5.32.tgz
(python-3.7) root@bc23574385ad:~# ll -d cuda
drwxr-xr-x 4 root root 4096 Mar 31 14:03 cuda/
#把相应的文件,复制到指定目录即可
(python-3.7) root@bc23574385ad:~# cp cuda/include/cudnn* /usr/local/cuda-10.2/include/
(python-3.7) root@bc23574385ad:~# cp cuda/lib64/libcudnn* /usr/local/cuda-10.2/lib64/

#添加权限
(python-3.7) root@bc23574385ad:~# chmod a+r /usr/local/cuda-10.2/include/cudnn* /usr/local/cuda-10.2/lib64/libcudnn*

2.4 安装tensorflow

#安装依赖包
(python-3.7) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --user pytest-cov==2.0 pytest-filter-subpackage==0.1

#安装指定版本的tensorflow
(python-3.7) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow==2.3.0
#验证tensorflow是否安装成功
(python-3.7) root@bc23574385ad:~# pip show tensorflow
Name: tensorflow
Version: 2.3.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/anaconda3/envs/python-3.7/lib/python3.7/site-packages
Requires: absl-py, astunparse, gast, google-pasta, grpcio, h5py, keras-preprocessing, numpy, opt-einsum, protobuf, scipy, six, tensorboard, tensorflow-estimator, termcolor, wheel, wrapt
Required-by:

2.5 安装pytorch

#下载对应cuda版本编译的安装包
(python-3.7) root@bc23574385ad:~# wget -P ./ https://download.pytorch.org/whl/cu102/torch-1.6.0-cp37-cp37m-linux_x86_64.whl

#安装下载好的安装包
(python-3.7) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple torch-1.6.0-cp37-cp37m-linux_x86_64.whl
#验证pytorch是否安装成功,这里可以写一个小脚本验证下,如没有报错就是安装成功了
(python-3.7) root@bc23574385ad:~# vim pytorch.py
(python-3.7) root@bc23574385ad:~# cat pytorch.py 
from __future__ import print_function  
import torch
x = torch.rand(5, 3)
print(x)

(python-3.7) root@bc23574385ad:~# python3.7 pytorch.py
tensor([[0.6295, 0.4860, 0.4348],
        [0.2331, 0.1373, 0.6409],
        [0.8252, 0.2289, 0.3068],
        [0.2569, 0.0396, 0.2084],
        [0.3917, 0.4409, 0.2219]])

2.6 多版本cuda环境管理

#进入环境生效脚本
(python-3.7) root@bc23574385ad:~# mkdir -p ~/anaconda3/envs/python-3.7/etc/conda/activate.d

(python-3.7) root@bc23574385ad:~# vim ~/anaconda3/envs/python-3.7/etc/conda/activate.d/activate.sh
(python-3.7) root@bc23574385ad:~# cat ~/anaconda3/envs/python-3.7/etc/conda/activate.d/activate.sh
#!/bin/sh
ORIGINAL_LD_LIBRARY_PATH=$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/extras/CUPTI/lib64:/lib/nccl/cuda-10.2:$LD_LIBRARY_PATH

(python-3.7) root@bc23574385ad:~# chmod +x ~/anaconda3/envs/python-3.7/etc/conda/activate.d/activate.sh
#退出环境生效脚本
(python-3.7) root@bc23574385ad:~# mkdir -p ~/anaconda3/envs/python-3.7/etc/conda/deactivate.d

(python-3.7) root@bc23574385ad:~# vim ~/anaconda3/envs/python-3.7/etc/conda/deactivate.d/deactivate.sh
(python-3.7) root@bc23574385ad:~# cat ~/anaconda3/envs/python-3.7/etc/conda/deactivate.d/deactivate.sh
#!/bin/sh
export LD_LIBRARY_PATH=$ORIGINAL_LD_LIBRARY_PATH
unset ORIGINAL_LD_LIBRARY_PATH

(python-3.7) root@bc23574385ad:~# chmod +x ~/anaconda3/envs/python-3.7/etc/conda/deactivate.d/deactivate.sh

3、完善python-3.8环境

名称 版本
cuda 11.0
cudnn 8.0
pytorch 1.7
tensorflow 2.4.0

3.1 创建python-3.8环境

#创建python-3.8环境
(python-3.7) root@bc23574385ad:~# conda create --name python-3.8 python=3.8
(python-3.7) root@bc23574385ad:~# conda activate python-3.8
#安装常用命令
(python-3.8) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ipython pandas pillow matplotlib setproctitle networkx scikit-learn scipy tqdm GPUtil jupyterlab notebook h5py statsmodels

3.2 安装CUDA

#下载cuda包
(python-3.8) root@bc23574385ad:~# wget http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda_11.0.2_450.51.05_linux.run

#安装cuda
(python-3.8) root@bc23574385ad:~# sh cuda_11.0.2_450.51.05_linux.run --silent --toolkit --samples --librarypath=/usr/local/cuda-11.0
#设置软链接
(python-3.8) root@bc23574385ad:~# ln -s /usr/local/cuda-11.0/bin/nvcc /usr/bin/nvcc-python-3.8 
(python-3.8) root@bc23574385ad:~# which nvcc-python-3.8
/usr/bin/nvcc-python-3.8

#检查安装的版本
(python-3.8) root@bc23574385ad:~# nvcc-python-3.8 -V
nvcc-python-3: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Thu_Jun_11_22:26:38_PDT_2020
Cuda compilation tools, release 11.0, V11.0.194
Build cuda_11.0_bu.TC445_37.28540450_0#
测试 CUDA Toolkit 以验证是否安装成功
#Result = PASS则安装成功
(python-3.8) root@bc23574385ad:~# cd /usr/local/cuda-11.0/extras/demo_suite/
(python-3.8) root@bc23574385ad:/usr/local/cuda-11.0/extras/demo_suite# ./deviceQuery
......
deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 11.6, CUDA Runtime Version = 11.0, NumDevs = 4, Device0 = NVIDIA GeForce RTX 2080 Ti, Device1 = NVIDIA GeForce RTX 2080 Ti, Device2 = NVIDIA GeForce RTX 2080 Ti, Device3 = NVIDIA GeForce RTX 2080 Ti
Result = PASS

3.3 安装CUDNN

#下载cudnn包
(python-3.8) root@bc23574385ad:~# ll -d cudnn-11.0-linux-x64-v8.0.1.13.tgz 
-rw-r--r-- 1 root root 1142456047 Mar 31 14:30 cudnn-11.0-linux-x64-v8.0.1.13.tgz

#解压缩
(python-3.8) root@bc23574385ad:~# tar xf cudnn-11.0-linux-x64-v8.0.1.13.tgz 
(python-3.8) root@bc23574385ad:~# ll -d cuda                               
drwxr-xr-x 4 root root 4096 Mar 31 14:38 cuda/
#把相应的文件,复制到指定目录即可
(python-3.8) root@bc23574385ad:~# cp cuda/include/cudnn* /usr/local/cuda-11.0/include/
(python-3.8) root@bc23574385ad:~# cp cuda/lib64/libcudnn* /usr/local/cuda-11.0/lib64/

#添加权限
(python-3.8) root@bc23574385ad:~# chmod a+r /usr/local/cuda-11.0/include/cudnn* /usr/local/cuda-11.0/lib64/libcudnn*

3.4 安装tensorflow

#安装指定版本的tensorflow
(python-3.8) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow==2.4.0
#验证tensorflow是否安装成功
(python-3.8) root@bc23574385ad:~# pip show tensorflow
Name: tensorflow
Version: 2.4.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/anaconda3/envs/python-3.8/lib/python3.8/site-packages
Requires: absl-py, astunparse, flatbuffers, gast, google-pasta, grpcio, h5py, keras-preprocessing, numpy, opt-einsum, protobuf, six, tensorboard, tensorflow-estimator, termcolor, typing-extensions, wheel, wrapt
Required-by: 

3.5 安装pytorch

#下载对应cuda版本编译的安装包
(python-3.8) root@bc23574385ad:~# wget -P ./ https://download.pytorch.org/whl/cu110/torch-1.7.1%2Bcu110-cp38-cp38-linux_x86_64.whl

#安装下载好的安装包
(python-3.8) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple torch-1.7.1+cu110-cp38-cp38-linux_x86_64.whl
#验证pytorch是否安装成功,这里可以写一个小脚本验证下,如没有报错就是安装成功了
(python-3.8) root@bc23574385ad:~# vim pytorch.py
(python-3.8) root@bc23574385ad:~# cat pytorch.py 
from __future__ import print_function  
import torch
x = torch.rand(5, 3)
print(x)

(python-3.8) root@bc23574385ad:~# python3.8 pytorch.py 
tensor([[0.4627, 0.5238, 0.0711],
        [0.2442, 0.7200, 0.0021],
        [0.3826, 0.1364, 0.1059],
        [0.2161, 0.9110, 0.2768],
        [0.1932, 0.7716, 0.2172]])

3.6 多版本cuda环境管理

#进入环境生效脚本
(python-3.8) root@bc23574385ad:~# mkdir -p ~/anaconda3/envs/python-3.8/etc/conda/activate.d

(python-3.8) root@bc23574385ad:~# vim ~/anaconda3/envs/python-3.8/etc/conda/activate.d/activate.sh
(python-3.8) root@bc23574385ad:~# cat ~/anaconda3/envs/python-3.8/etc/conda/activate.d/activate.sh
#!/bin/sh
ORIGINAL_LD_LIBRARY_PATH=$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/lib/nccl/cuda-11.0:$LD_LIBRARY_PATH

(python-3.8) root@bc23574385ad:~# chmod +x ~/anaconda3/envs/python-3.8/etc/conda/activate.d/activate.sh
#退出环境生效脚本
(python-3.8) root@bc23574385ad:~# mkdir -p ~/anaconda3/envs/python-3.8/etc/conda/deactivate.d

(python-3.8) root@bc23574385ad:~# vim ~/anaconda3/envs/python-3.8/etc/conda/deactivate.d/deactivate.sh
(python-3.8) root@bc23574385ad:~# cat ~/anaconda3/envs/python-3.8/etc/conda/deactivate.d/deactivate.sh
#!/bin/sh
export LD_LIBRARY_PATH=$ORIGINAL_LD_LIBRARY_PATH
unset ORIGINAL_LD_LIBRARY_PATH

(python-3.8) root@bc23574385ad:~# chmod +x ~/anaconda3/envs/python-3.8/etc/conda/deactivate.d/deactivate.sh