Содержание

TensorFlow

Docker

https://www.tensorflow.org/install/docker
https://docs.docker.com/compose/gpu-support/

Пример Dockerfile

Пример Dockerfile

FROM tensorflow/tensorflow:2.10.0-gpu-jupyter
 
LABEL pip1="opencv-python Pillow numpy scikit-learn scipy sklearn" \
      pip2="jedi tensorflowjs tf2onnx onnxruntime pandas" \
      pip3="torch" \
      pkg="miniconda"
 
RUN set -x \
  && DEBIAN_FRONTEND="noninteractive" apt-get update \
  && DEBIAN_FRONTEND="noninteractive" apt-get upgrade -y \
  && DEBIAN_FRONTEND="noninteractive" apt-get install -y curl gnupg mc htop
RUN set -x \
  && curl -s https://packages.microsoft.com/keys/microsoft.asc | APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 apt-key add - \
  && curl -s https://packages.microsoft.com/config/ubuntu/20.04/prod.list > /etc/apt/sources.list.d/mssql-release.list \
  && apt-get update \
  && DEBIAN_FRONTEND="noninteractive" ACCEPT_EULA=Y apt-get install -y time g++ unixodbc-dev libgl1-mesa-glx libglib2.0-0 unixodbc libgssapi-krb5-2 msodbcsql17 libtiff5 \
  && /usr/bin/python3 -m pip install --upgrade pip \
  && pip install pyodbc opencv-python Pillow numpy scikit-learn scipy sklearn \
  && pip install jedi tensorflowjs tf2onnx onnxruntime pandas tqdm \
  && pip install torch \
  && curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > /tmp/Miniconda3-latest-Linux-x86_64.sh \
  && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b \
  && /root/miniconda3/bin/conda init bash \
  && echo "SHELL=/bin/bash" >> /etc/environment \
  && echo "c.NotebookApp.tornado_settings = {'shell_command': ['/bin/bash']}" > /root/.jupyter/jupyter_notebook_config.py \
  && rm -rf /tmp/* \
  && rm -rf /root/.cache/* \
  && apt-get autoremove --purge -y gnupg g++ unixodbc-dev \
  && rm -rf /var/lib/apt/lists/*

запуск:

docker build -t tf .
docker run -p80:8888 -v /data/docker/tensorflow/tf:/tf -d --gpus all --name jupyter jupyter
docker exec jupyter jupyter notebook list

Настроить с поддержкой Nvidia

Пример docker-compose.yaml

Пример docker-compose.yaml

version: '3.7'

volumes:
  augmentation:
    driver: local
    driver_opts:
      type: nfs
      o: addr=fs.idscan.loc
      device: :/data/augmentation
  dataset:
    driver: local
    driver_opts:
      type: nfs
      o: addr=fs.idscan.loc
      device: :/data/dataset
  ramdisk:
    driver: local
    driver_opts:
      type: tmpfs
      o: size=${MEMORY-80G},mode=0777
      device: tmpfs

services:
 samba-anon:
  image: ${REGISTRYHOST-registry.idscan.net}/iac/samba-anon:latest
  ports:
   - "445:445"
  volumes:
   - ramdisk:/data/ramdisk
   - /data/nvme1:/data/nvme1
  stop_grace_period: 10s
  restart: always
  deploy:
    resources:
      limits:
        memory: 1G
        cpus: '2'
  healthcheck:
    test: smbclient -L 127.0.0.1 -t 10 -U anon -N 1>/dev/null || exit 1
    interval: 60s
    timeout: 10s
    retries: 3
    start_period: 10s

 jupyter:
  image: ${REGISTRYHOST-registry.idscan.net}/iac/jupyter:${TAG-latest}
  command:
   - /usr/local/bin/jupyter-notebook
   - --notebook-dir=/tf
   - --ip=0.0.0.0
   - --no-browser
   - --allow-root
   - --NotebookApp.token='${NOTEBOOKTOKEN-f77380f30c405ab996d370bf8c8e71b9bc312e2fcb96207c}'
   - '--NotebookApp.terminado_settings={"shell_command":  ["/bin/bash"]}'
  ports:
   - "8888:8888"
  volumes:
   - augmentation:/tf/augmentation
   - dataset:/tf/dataset
   - ramdisk:/tf/ramdisk
   - /data/nvme1:/tf/nvme1
  stop_grace_period: 10s
  restart: always
  deploy:
    resources:
      limits:
        memory: ${MEMORY-80G}
        cpus: '16'
      reservations:
        devices:
          - capabilities: ["gpu"]
            driver: nvidia
  healthcheck:
    test: curl --silent --fail --show-error --output /dev/null -X GET http://127.0.0.1:8888/ || exit 1
    interval: 60s
    timeout: 5s
    retries: 3
    start_period: 10s


Тест

Проверить GPU из контейнера:

  1. Версия TensorFlow и доступность GPU
    docker run --rm -it --gpus all tensorflow/tensorflow:latest-gpu-jupyter python -c "import tensorflow as tf; print('TF version:', tf.__version__); print('GPU is', 'available' if tf.config.list_physical_devices('GPU') else 'NOT AVAILABLE')"
  2. Список GPU и их нумерация
    docker run --rm -it --gpus all tensorflow/tensorflow:latest-gpu-jupyter python -c "import tensorflow as tf; tf.test.gpu_device_name()"
  3. Сделать вычислиния на тензоре:
    docker run --rm -it --gpus all tensorflow/tensorflow:latest-gpu-jupyter python -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"