Пример запуска в swarm/compose через traefik:
compose.server.yaml
compose.server.yaml
volumes: logs: driver: local driver_opts: o: bind type: none device: /data/docker/clearml/logs config: driver: local driver_opts: o: bind type: none device: /data/docker/clearml/config fileserver: driver: local driver_opts: o: bind type: none device: /data/docker/clearml/data/fileserver elastic_7: driver: local driver_opts: o: bind type: none device: /data/docker/clearml/data/elastic_7 mongo_4_db: driver: local driver_opts: o: bind type: none device: /data/docker/clearml/data/mongo_4/db mongo_4_configdb: driver: local driver_opts: o: bind type: none device: /data/docker/clearml/data/mongo_4/configdb redis: driver: local driver_opts: o: bind type: none device: /data/docker/clearml/data/redis services: apiserver: command: - apiserver container_name: clearml-apiserver image: clearml/server:latest restart: unless-stopped volumes: - logs:/var/log/clearml - config:/opt/clearml/config - fileserver:/mnt/fileserver depends_on: - redis - mongo - elasticsearch - fileserver environment: CLEARML_ELASTIC_SERVICE_HOST: elasticsearch CLEARML_ELASTIC_SERVICE_PORT: 9200 CLEARML_MONGODB_SERVICE_HOST: mongo CLEARML_MONGODB_SERVICE_PORT: 27017 CLEARML_REDIS_SERVICE_HOST: redis CLEARML_REDIS_SERVICE_PORT: 6379 CLEARML_SERVER_DEPLOYMENT_TYPE: linux CLEARML__apiserver__pre_populate__enabled: "true" CLEARML__apiserver__pre_populate__zip_files: "/opt/clearml/db-pre-populate" CLEARML__apiserver__pre_populate__artifacts_path: "/mnt/fileserver" CLEARML__services__async_urls_delete__enabled: "true" CLEARML__services__async_urls_delete__fileserver__url_prefixes: "[${CLEARML_FILES_HOST:-fs.clearml.domain.com}]" CLEARML__secure__credentials__services_agent__user_key: ${CLEARML_AGENT_ACCESS_KEY:-} CLEARML__secure__credentials__services_agent__user_secret: ${CLEARML_AGENT_SECRET_KEY:-} networks: - backend - traefik-public deploy: resources: limits: memory: ${MEMORYWEBSERVER-500m} cpus: ${CPUWEBSERVER-1} labels: - traefik.enable=true - traefik.swarm.network=traefik-public - traefik.http.routers.api-clearml-loc-http.entrypoints=http - traefik.http.routers.api-clearml-loc-http.rule=Host(`api.clearml.domain.com`) - traefik.http.routers.api-clearml-loc-http.service=api-clearml - traefik.http.services.api-clearml.loadbalancer.server.port=8008 elasticsearch: networks: - backend container_name: clearml-elastic environment: bootstrap.memory_lock: "true" cluster.name: clearml cluster.routing.allocation.node_initial_primaries_recoveries: "500" cluster.routing.allocation.disk.watermark.low: 500mb cluster.routing.allocation.disk.watermark.high: 500mb cluster.routing.allocation.disk.watermark.flood_stage: 500mb discovery.type: "single-node" http.compression_level: "7" node.name: clearml reindex.remote.whitelist: "'*.*'" xpack.security.enabled: "false" ulimits: memlock: soft: -1 hard: -1 nofile: soft: 65536 hard: 65536 image: elasticsearch:8.17.0 restart: unless-stopped volumes: - elastic_7:/usr/share/elasticsearch/data deploy: resources: limits: memory: ${MEMORYELASTIC-2G} cpus: ${CPUELASTIC-2} fileserver: networks: - backend - traefik-public command: - fileserver container_name: clearml-fileserver image: clearml/server:latest environment: CLEARML__fileserver__delete__allow_batch: "true" CLEARML__fileserver__auth__enabled: "false" restart: unless-stopped volumes: - logs:/var/log/clearml - fileserver:/mnt/fileserver - config:/opt/clearml/config deploy: resources: limits: memory: ${MEMORYWEBSERVER-500m} cpus: ${CPUWEBSERVER-1} labels: - traefik.enable=true - traefik.swarm.network=traefik-public - traefik.http.routers.fs-clearml-loc-http.entrypoints=http - traefik.http.routers.fs-clearml-loc-http.rule=Host(`fs.clearml.domain.com`) - traefik.http.routers.fs-clearml-loc-http.service=fs-clearml - traefik.http.services.fs-clearml.loadbalancer.server.port=8081 mongo: networks: - backend container_name: clearml-mongo image: mongo:7.0.22 restart: unless-stopped command: --setParameter internalQueryMaxBlockingSortMemoryUsageBytes=196100200 volumes: - mongo_4_db:/data/db - mongo_4_configdb:/data/configdb deploy: resources: limits: memory: ${MEMORYMONGO-2G} cpus: ${CPUMONGO-2} redis: networks: - backend container_name: clearml-redis image: redis:7.4.1 restart: unless-stopped volumes: - redis:/data deploy: replicas: 1 resources: limits: memory: ${MEMORYREDIS-128m} cpus: ${CPUREDIS-1} webserver: command: - webserver container_name: clearml-webserver environment: WEBSERVER__apiBaseUrl: "http://api.clearml.domain.com/" WEBSERVER__fileBaseUrl: "http://fs.clearml.domain.com/" WEBSERVER__useFilesProxy: "true" image: clearml/server:latest restart: unless-stopped depends_on: - apiserver networks: - backend - traefik-public deploy: resources: limits: memory: ${MEMORYWEBSERVER-500m} cpus: ${CPUWEBSERVER-2} labels: - traefik.enable=true - traefik.swarm.network=traefik-public - traefik.http.routers.clearml-loc-http.entrypoints=http - traefik.http.routers.clearml-loc-http.rule=Host(`clearml.domain.com`) - traefik.http.routers.clearml-loc-http.service=clearml - traefik.http.services.clearml.loadbalancer.server.port=80 async_delete: depends_on: - apiserver - redis - mongo - elasticsearch - fileserver container_name: async_delete image: clearml/server:latest networks: - backend restart: unless-stopped environment: CLEARML_ELASTIC_SERVICE_HOST: elasticsearch CLEARML_ELASTIC_SERVICE_PORT: 9200 CLEARML_MONGODB_SERVICE_HOST: mongo CLEARML_MONGODB_SERVICE_PORT: 27017 CLEARML_REDIS_SERVICE_HOST: redis CLEARML_REDIS_SERVICE_PORT: 6379 PYTHONPATH: /opt/clearml/apiserver CLEARML__services__async_urls_delete__fileserver__url_prefixes: "[${CLEARML_FILES_HOST:-fs.clearml.domain.com}]" entrypoint: - python3 - -m - jobs.async_urls_delete - --fileserver-host - http://fileserver:8081 volumes: - logs:/var/log/clearml - config:/opt/clearml/config networks: backend: attachable: true traefik-public: external: true
https://clear.ml/docs/latest/docs/deploying_clearml/clearml_server_linux_mac/
https://github.com/clearml/clearml-server/blob/master/docker/docker-compose.yml
Пример запуска агента с автоматической чисткой директории /data/dataset и едиными директориями кэша:
compose.agent.yaml
compose.agent.yaml
volumes: cache: driver: local driver_opts: o: bind type: none device: /data/clearml/cache dataset: driver: local driver_opts: o: bind type: none device: /data/clearml/dataset services: clearml-agent: container_name: clearml-agent-services image: clearml/clearml-agent-services:latest deploy: replicas: 1 restart_policy: condition: on-failure privileged: true environment: CLEARML_WEB_HOST: ${CLEARML_WEB_HOST:-http://clearml.domain.com} CLEARML_API_HOST: ${CLEARML_API_HOST:-http://api.clearml.domain.com} CLEARML_FILES_HOST: ${CLEARML_FILES_HOST:-http://fs.clearml.domain.com} CLEARML_API_ACCESS_KEY: ${CLEARML_AGENT_ACCESS_KEY:-SET} CLEARML_API_SECRET_KEY: ${CLEARML_AGENT_SECRET_KEY:-SET} CLEARML_AGENT_GIT_USER: ${CLEARML_AGENT_GIT_USER:-SET} CLEARML_AGENT_GIT_PASS: ${CLEARML_AGENT_GIT_PASS:-SET} CLEARML_AGENT_NO_UPDATE: 1 CLEARML_AGENT_DAEMON_OPTIONS: "--create-queue" CLEARML_AGENT_QUEUES: ${CLEARML_AGENT_QUEUE:-mlqueue} CLEARML_WORKER_ID: "docker3" CLEARML_AGENT_DEFAULT_BASE_DOCKER: "pytorch/pytorch2.8.0-cuda12.9-cudnn9-runtime" CLEARML_AGENT_DOCKER_HOST_MOUNT: "/data/clearml/cache:/root/.clearml" SHUTDOWN_IF_NO_ACCESS_KEY: 1 CLEARML_AGENT_SKIP_PYTHON_ENV_INSTALL: 1 CLEARML_AGENT_SKIP_PIP_VENV_INSTALL: 1 CLEARML_TASK_NO_REUSE: 1 CLEARML_AGENT_DOCKER_ARGS_HIDE_ENV: "CLEARML_API_SECRET_KEY CLEARML_AGENT_GIT_PASS DB_DS_PASSWORD" CLEARML_AGENT_DOCKER_ARGS_FILTERS: "^--env$$ ^-e$$ ^--workdir$$ ^-w$$" CLEARML_AGENT_EXTRA_DOCKER_ARGS: "--rm --shm-size=8g --volume /data/clearml/dataset:/dataset -e CLEARML_API_HOST=$${CLEARML_API_HOST} -e CLEARML_API_ACCESS_KEY=$${CLEARML_API_ACCESS_KEY} -e CLEARML_API_SECRET_KEY=$${CLEARML_API_SECRET_KEY}" volumes: - /var/run/docker.sock:/var/run/docker.sock - cache:/root/.clearml - dataset:/dataset entrypoint: - bash - -c - | cat <<EOF > /root/clearml.conf agent { venvs_dir = /root/.clearml/venvs-builds venvs_cache: { path: /root/.clearml/venvs-cache }, vcs_cache: { enabled: true, path: /root/.clearml/vcs-cache }, pip_download_cache { enabled: true, path: /root/.clearml/pip-download-cache }, extra_docker_shell_script: ["if [ -d \"/dataset\" ]; then echo \"Cleaning up /dataset ...\"; rm -rf /dataset/*; fi"] docker_pip_cache = /root/.clearml/pip-cache docker_apt_cache = /root/.clearml/apt-cache docker_internal_mounts { sdk_cache: "/clearml_agent_cache" apt_cache: "/root/.clearml/apt-cache" ssh_folder: "/root/.ssh" ssh_ro_folder: "/.ssh" pip_cache: "/root/.clearml/pip-cache" poetry_cache: "/root/.cache/pypoetry" vcs_cache: "/root/.clearml/vcs-cache" venvs_cache: "/root/.clearml/venvs-cache" venv_build: "/root/.clearml/venvs-builds" pip_download: "/root/.clearml/pip-download-cache" } } EOF sed -i 's/--cpu-only /--gpus all /' /usr/agent/entrypoint.sh if [ -d "/dataset" ]; then echo "Cleaning up /dataset ..." rm -rf /dataset/* fi curl --retry 3 --silent --fail --show-error --output /dev/null --retry-delay 10 --retry-all-errors $${CLEARML_API_HOST}/debug.ping \ && /usr/agent/entrypoint.sh
https://clear.ml/docs/latest/docs/configs/env_vars
https://clear.ml/docs/latest/docs/clearml_agent/clearml_agent_env_var
https://clear.ml/docs/latest/docs/configs/clearml_conf#agent-section