编写dockerfile
| |
| FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 |
| |
| MAINTAINER watcherprime <woma@126.com> |
| |
| |
| ENV TZ=Asia/Shanghai \ |
| DEBIAN_FRONTEND=noninteractive \ |
| PATH=/opt/miniconda3/bin:$PATH |
| |
| |
| WORKDIR /root |
| |
| |
| RUN apt-get update |
| |
| |
| RUN DEBIAN_FRONTEND=noninteractive apt-get install -y ssh \ |
| && mkdir /var/run/sshd && mkdir /root/.ssh \ |
| && echo 'root:root' | chpasswd \ |
| && sed -i 's/^#PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config \ |
| && sed -i 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config |
| |
| |
| RUN echo 'service ssh start >>/root/start_ssh.log' >> ~/start_ssh.sh \ |
| && chmod 777 ~/start_ssh.sh \ |
| && echo '#<<< ssh <<<\nif [ -f /root/start_ssh.sh ]; then\n/root/start_ssh.sh\nfi\n#<<< ssh <<<' >> ~/.bashrc |
| |
| |
| RUN echo '#!/bin/bash\na=$(netstat -anp | grep 6006)\nif [ -z "$a" ]\nthen\nsource /opt/miniconda3/etc/profile.d/conda.sh\nconda activate d2l\nnohup python /opt/miniconda3/envs/d2l/lib/python3.10/site-packages/tensorboard/main.py --logdir /gemini/logs --bind_all > /root/start_tensorboard.log 2>&1 &\nfi' >> ./start_tensorboard.sh \ |
| && chmod 777 ./start_tensorboard.sh \ |
| && echo '#<<< tensorboard <<<\nif [ -f /root/start_tensorboard.sh ]; then\n/root/start_tensorboard.sh\nfi\n#<<< tensorboard <<<' >> ~/.bashrc |
| |
| |
| RUN apt-get install -y vim && apt-get install -y git && apt-get install -y net-tools |
| |
| |
| RUN wget -O /opt/Miniconda3-py310_24.3.0-0-Linux-x86_64.sh "https://mirrors.bfsu.edu.cn/anaconda/miniconda/Miniconda3-py310_24.3.0-0-Linux-x86_64.sh" \ |
| && chmod +x /opt/Miniconda3-py310_24.3.0-0-Linux-x86_64.sh \ |
| && sh -c '/bin/echo -e "\nyes\n\nyes" | sh /opt/Miniconda3-py310_24.3.0-0-Linux-x86_64.sh -b -p /opt/miniconda3' |
| ENV PATH=/opt/miniconda3/bin:$PATH |
| |
| |
| RUN apt install -y tzdata \ |
| && ln -fs /usr/share/zoneinfo/${TZ} /etc/localtime \ |
| && echo ${TZ} > /etc/timezone \ |
| && dpkg-reconfigure --frontend noninteractive tzdata |
| |
| |
| RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ \ |
| && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ \ |
| && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ \ |
| && conda config --set show_channel_urls yes \ |
| && conda config --set auto_activate_base no |
| |
| |
| RUN conda create --name d2l python=3.10.13 |
| SHELL ["/bin/bash", "--login", "-c"] |
| |
| |
| RUN export PATH=/opt/miniconda3/bin:$PATH \ |
| && conda update --name base conda \ |
| && conda init bash \ |
| && source activate d2l \ |
| && conda activate d2l \ |
| && conda install -y --quiet numpy pyyaml mkl mkl-include setuptools cmake cffi typing \ |
| && conda install -y --quiet -c mingfeima mkldnn |
| |
| |
| RUN python -m pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple \ |
| && pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple |
| |
| |
| RUN pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 -i https://pypi.tuna.tsinghua.edu.cn/simple \ |
| && pip install d2l jupyter |
| |
| |
| RUN apt-get install -y ffmpeg libsm6 libxext6 |
| |
| |
| RUN mkdir /womacode && mkdir /womacode/code && mkdir /womacode/output |
| |
| |
| RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* |
| |
| |
| EXPOSE 22 |
| EXPOSE 6006 |
| EXPOSE 8888 |
| |
| |
| CMD ["bash"] |
构建镜像
| docker build -t torch2.1.1_cu118_py310_ubuntu22.04 -f Dockerfile . |
打标签
| docker tag torch2.1.1_cu118_py310_ubuntu22.04 watcherprime/deeplearn:Torch2.1.1_cu118_py310_ubuntu22.04 |
推送
| docker push watcherprime/deeplearn:Torch2.1.1_cu118_py310_ubuntu22.04 |
启动
挂载宿主磁盘:/opt/project/code/python
| docker run -it --gpus all -v /opt/project/code/python:/remote-home/python -p 9998:22 --name d2l_env --restart=always torch2.1.1_cu118_py310_ubuntu22.04 |
| docker run -it --gpus all -p 9999:8080 --name d2l torch2.1.1_cu118_py310_ubuntu22.04 |
docker 进入 d2l 容器
| docker exec -it d2l /bin/bash |
测试环境是否生效
| import torch |
| import d2l |
| print(torch.__version__) |
| print(torch.cuda.is_available()) |
| print(d2l.__version__) |
conda命令
| |
| conda env list |
| |
| |
| conda activate d2l |
| |
下载相关包
| pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 |
运行resNet测试
| import torchvision |
| from torchvision import transforms |
| from torch.utils import data |
| |
| def get_dataloader_workers(): |
| """使用6个进程来读取的数据""" |
| return 6 |
| |
| trans = transforms.ToTensor() |
| def load_data_fashion_mnist(batch_size, resize=None): |
| """下载Fashion-MNIST数据集,然后将其加载到内存中""" |
| trans = [transforms.ToTensor()] |
| if resize: |
| trans.insert(0,transforms.Resize(resize)) |
| trans = transforms.Compose(trans) |
| mnist_train = torchvision.datasets.FashionMNIST(root="01_data/01_DataSet_FashionMNIST",train=True,transform=trans,download=True) |
| mnist_test = torchvision.datasets.FashionMNIST(root="01_data/01_DataSet_FashionMNIST",train=False,transform=trans,download=True) |
| return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers()), |
| data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers())) |
| |
| |
| batch_size = 256 |
| train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=96) |
| |
| |
| lr, num_epochs, batch_size = 0.05, 10, 256 |
| |
| d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu()) |

调用GPU成功运行!
本文作者:-Watcher-
本文链接:https://www.cnblogs.com/womaspace/p/18671873
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步