加速
pip install -i https://pypi.doubanio.com/simple
查看硬件显卡信息
cat /usr/local/cuda/version.txt
cat /usr/local/cuda/include/cudnn.h | grep CUDNN_MAJOR -A 2
驱动安装
ubuntu-drivers devices
# Add the graphics-driver PPA
sudo add-apt-repository ppa:graphics-drivers
# And update
sudo apt-get update
sudo apt-get purge nvidia*
sudo apt-get install nvidia-387 # 根据需求和环境选择安装版本
安装docker
- 查看linux系统版本
cat /proc/version
https://github.com/NVIDIA/nvidia-docker - 加速
pip install -i https://pypi.doubanio.com/simple [pkgname]
修改docker镜像文件存放位置[可选]
# Edit /etc/docker/daemon.json (if it doesn’t exist, create it) and include:
# normal docker
{
"data-root": "/new/path/to/docker-data"
}
# nvidia-docker
{
"runtimes": {
"nvidia": {
"path": "/usr/bin/nvidia-container-runtime",
"runtimeArgs": [],
"data-root": "/new/path/to/docker-data"
}
},
"data-root": "/new/path/to/docker-data"
}
# Then restart Docker with:
sudo systemctl daemon-reload
sudo systemctl restart docker
方式1:通过Dockerfile创建镜像
docker build -f /path/to/a/Dockerfile .
方式2:docker hub上查找下载感兴趣的镜像文件
https://hub.docker.com/
https://hub.docker.com/r/ismconnectiris/tensorrt5-cuda10_0-cudnn7-pycuda/tags?page=1&ordering=last_updated
通过镜像文件启动docker 容器
sudo nvidia-docker run -it [-p 映射的主机端口:容器端口] [-v 主机文件夹:容器文件夹] pytorch/torchlearn
# eg
sudo nvidia-docker run -it --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 --name fastai_xz -p 18888:8888 -v /opt/user/xz/dockerShare/:/dockerShare -p 6006:6006 fe423126731f /bin/bash
sudo nvidia-docker run -it --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 --name fastai_xz -p 10022:22 -p 16006:6006 -p 18888:8888 -v /opt/user/xz_2020/docker/workspace:/workspace fe423126731f /bin/bash
sudo nvidia-docker run -itd --shm-size=2g --ulimit memlock=-1 --ulimit stack=67108864 --name nvdk_cu10 -p 20022:22 -p 26006:6006 -p 28888:8888 -v /home/kcadmin/fastai/docker:/workspace -v /home/kcadmin/Dataset:/data 8641b9ea72a5 /bin/bash
docker run -v $(pwd):/workspace -it tnn-convert:latest /bin/bash
安装ssh 设置
apt update && apt install openssh-server
echo 'root:123456' | chpasswd
sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
# 允许使用root身份登录
sed 's@session\srequired\spam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
# vi /etc/ssh/sshd_config 改端口属性
echo "export VISIBLE=now" >> /etc/profile
service ssh restart
修改环境变量
export PATH=$PATH:/usr/local/cuda-10.1/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-10.1/lib64
export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda-10.1/lib64
source /etc/profile
在docker内启动jupyter notebook[ 可选]
jupyter notebook --ip 0.0.0.0 --no-browser --allow-root --port 容器端口默认8888
### 修改jupyter notebook 密码
https://www.jianshu.com/p/642489051956
# 1.生成配置文件
jupyter notebook --generate-config
# 2.生成密码sha
# 3.修改配置文件
# 4.重新运行notebook
# 外部访问jupyter notebook
http://主机ip:映射的主机端口
### 多个环境切换
# 1. 切换到目标环境
# 2. 安装ipykernel
conda install ipykernel
# 3. 如果还是没看到,手动添加
python -m ipykernel install --user --name py27 --display-name "Python (py27)"
容器相关命令
查看已启动的容器
sudo docker ps
查看所有容器
sudo docker ps -a
删除容器 容器必须是停止状态的才可以删除
sudo docker rm 8d98fd43acd4
查看镜像
docker system df
docker images
删除镜像
docker irm imagename 删除镜像时必须先删除建立在其之上的容器
启动容器
docker exec -it containerId /bin/bash #用exec时退出后容器继续运行,一般用这个。# 退出容器用exit命令
docker attach # 退出时会关闭docker容器
停止容器
docker stop containerId 此时容器任然保存了运行时的状态。
docker start containerId 可以继续运行容器,状态任然在。
导出容器
# 导出时需要停止容器
sudo docker export containerId > containerId.tar
sudo docker import 仓库:Tag
保存镜像
sudo docker save -o newTarName.tar imageName
sudo docker load -i docker.tar
清理容器
rm -rf ~/.cache/pip
conda clean --tarballs
rm -rf ~/.local/share/Trash/*
apt-get clean
#rm -rf /var/lib/apt/lists/*
通过容器创建新镜像
docker commit containerId newImageName
镜像改名
docker tag IMAGEID(镜像id) REPOSITORY:TAG(仓库:标签)
清理docker容器缓存
docker system prune --volumes
#清理:
#所有停止的容器
#所有不被任何一个容器使用的网络
#所有不被任何一个容器使用的volume
#所有无实例的镜像
docker file
ARG PYTORCH="1.6.0"
ARG CUDA="10.1"
ARG CUDNN="7"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install MMCV
RUN pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
# Install MMDetection
RUN conda clean --all
RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection
WORKDIR /mmdetection
ENV FORCE_CUDA="1"
RUN pip install -r requirements/build.txt
RUN pip install --no-cache-dir -e .
install OpenCV
pip install opencv-contrib-python