###docker 迁移
宿主机安装nvidia驱动,最好源内驱动
nvidia-smi正常
安装docker
###docker-ce
sudo apt-get remove docker docker-engine docker.io containerd runc
sudo apt-get install ca-certificates curl gnupg
sudo install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/debian/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://mirrors.tuna.tsinghua.edu.cn/docker-ce/linux/debian buster stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install docker-ce
### 安装nvidia-docker扩展
sudo apt-get update \
&& sudo apt-get install -y nvidia-container-toolkit-base
nvidia-ctk --version
sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
distribution=ubuntu18.04 \
&& curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
sudo systemctl restart docker
###迁移docker image
sudo systemctl stop docker.socket
sudo systemctl stop docker
cp -r [源路径]/docker /var/lib/docker
sudo systemctl start docker
docker run 验证
###问题 宿主驱动版本和docker image的驱动不一致
docker内运行cuda
Error 804: forward compatibility was attempted on non supported HW
修改容器中的libcuda.so和宿主机一致
root@dofish-rd450x:/usr/lib/x86_64-linux-gnu# ls -lhtra libcuda*
-rw-r--r-- 1 root root 22M Jul 13 2022 libcuda.so.510.85.02
-rw-r--r-- 1 root root 0 May 19 14:40 libcuda.so.510.73.08
lrwxrwxrwx 1 root root 20 May 19 14:40 libcuda.so.1 -> libcuda.so.515.86.01
lrwxrwxrwx 1 root root 12 May 19 14:40 libcuda.so -> libcuda.so.1
-rw-r--r-- 1 root root 21M May 19 14:40 libcuda.so.515.86.01
root@dofish-rd450x:/usr/lib/x86_64-linux-gnu# ln -sf libcuda.so.510.85.02 libcuda.so.1