SparkyUI/docker-compose.yml

services:
  comfyui:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        # Pin ComfyUI to a known-good commit/tag if desired
        COMFYUI_REF: "${COMFYUI_REF:-master}"
        # SageAttention ref (e.g., "main", "v2.2.0", or specific commit)
        SAGEATTN_REF: "${SAGEATTN_REF:-main}"

    image: sparkyui:cu130
    container_name: comfyui

    # GPU enablement
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]

    # LAN exposure
    ports:
      - "${COMFYUI_PORT:-8188}:8188"

    environment:
      COMFYUI_PORT: "${COMFYUI_PORT:-8188}"
      # Optimized for Grace-Blackwell unified memory architecture
      # Key insight: DON'T use --gpu-only - let the unified memory fabric work naturally
      COMFYUI_FLAGS: "${COMFYUI_FLAGS:---listen 0.0.0.0 --port 8188 --disable-pinned-memory --dont-upcast-attention}"
      NVIDIA_VISIBLE_DEVICES: "all"
      NVIDIA_DRIVER_CAPABILITIES: "compute,utility"

      # Disable torch.compile/inductor - Triton doesn't support Blackwell sm_121a yet
      TORCH_COMPILE_DISABLE: "1"
      TORCHDYNAMO_DISABLE: "1"

      # Grace-Blackwell unified memory — removed aggressive CUDA tuning (5/21):
      # CUDA_CACHE_DISABLE, CUDA_DEVICE_MAX_CONNECTIONS, CUDA_DEVICE_MAX_COPY_CONNECTIONS,
      # CUDA_MODULE_LOADING=EAGER, CUDA_MANAGED_FORCE_DEVICE_ALLOC, OMP_NUM_THREADS
      # These were over-tuning. The ComfyUI flags + Sparky patch handle the architecture.
      # Keeping only CUBLAS_WORKSPACE_CONFIG for determinism.
      CUBLAS_WORKSPACE_CONFIG: ":0:0"

      # CUDA kernel caching — PTX→SASS compilation cache for GB10 (sm_121)
      # First run compiles kernels, subsequent runs reuse from disk. 3x speedup reported.
      # 4GB cache covers all typical ComfyUI kernel variants.
      CUDA_CACHE_MAXSIZE: "4294967296"

    volumes:
      # Models from existing ComfyUI install (read-only).
      # Defaults to the project root; the model-manager service writes here.
      - ${COMFYUI_HOST_PATH:-.}/models:/opt/ComfyUI/models:ro

      # Custom nodes - comment out to use container-only (fresh) custom_nodes
      # If mounted, ComfyUI-Manager installs persist across container restarts
      - ${SPARKYUI_DATA_PATH:-.}/custom_nodes:/opt/ComfyUI/custom_nodes

      # Outputs/inputs/workflows - persistent across restarts
      - ${SPARKYUI_DATA_PATH:-.}/output:/opt/ComfyUI/output
      - ${SPARKYUI_DATA_PATH:-.}/input:/opt/ComfyUI/input
      - ${SPARKYUI_DATA_PATH:-.}/workflows:/opt/ComfyUI/workflows

      # Wheel cache (optional - for prebuilt wheels)
      - ${SPARKYUI_DATA_PATH:-.}/wheels:/opt/wheels

      # Sparky patches - Grace-Blackwell unified memory optimizations
      # model_management.py: HIGH_VRAM→NORMAL_VRAM, intermediate_device()→cuda, soft_empty_cache skip,
      #   95% vram_for_weights, UNIFIED_MEMORY detection, offload devices → cuda
      # utils.py: copy=False on tensor.to(device) — avoids double-allocation on unified memory
      #   where CPU and GPU share the same physical RAM (ComfyUI issue #10896)
      - ./patches/model_management.py:/opt/ComfyUI/comfy/model_management.py:ro
      - ./patches/utils.py:/opt/ComfyUI/comfy/utils.py:ro

    networks:
      - sparky_net

    # Health check - ComfyUI takes time to load, so generous start period
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8188/"]
      interval: 30s
      timeout: 10s
      start_period: 120s
      retries: 3

    restart: unless-stopped

  # ComfyUIMini - Mobile-friendly UI
  # Access at http://<host>:3000
  comfyuimini:
    build:
      context: ./comfyuimini
      dockerfile: Dockerfile
      args:
        COMFYUIMINI_REF: "${COMFYUIMINI_REF:-main}"

    image: comfyuimini:latest
    container_name: comfyuimini

    ports:
      - "${COMFYUIMINI_PORT:-3000}:3000"

    environment:
      # node-config override - connects to comfyui container via docker network
      NODE_CONFIG: >-
        {
          "app_port": 3000,
          "comfyui_url": "http://comfyui:8188",
          "comfyui_ws_url": "ws://comfyui:8188",
          "output_dir": "/shared/output",
          "reject_unauthorised_cert": false
        }

    volumes:
      # Share output directory with ComfyUI for gallery feature (read-only)
      - ${SPARKYUI_DATA_PATH:-.}/output:/shared/output:ro
      # Persist server-side workflows
      - comfyuimini_workflows:/app/workflows

    networks:
      - sparky_net

    depends_on:
      comfyui:
        condition: service_healthy

    restart: unless-stopped

  # Model Manager - StabilityMatrix-style model download/management UI
  # Access at http://<host>:8189
  model-manager:
    build:
      context: ./model-manager
      dockerfile: Dockerfile

    image: sparkyui-model-manager:latest
    container_name: model-manager

    # Run as the host user so downloaded models are owned by you, not root.
    # Defaults to 1000:1000; override via PUID/PGID in .env if needed.
    user: "${PUID:-1000}:${PGID:-1000}"

    ports:
      - "${MODEL_MANAGER_PORT:-8189}:8189"

    environment:
      MODELS_DIR: /models
      DATA_DIR: /data

    volumes:
      # Shared models dir - read-WRITE here so downloads land on the host.
      # ComfyUI mounts the same host folder read-only and picks up new files.
      - ${COMFYUI_HOST_PATH:-.}/models:/models
      # Persistent SQLite DB (sources, API keys, download history)
      - ${SPARKYUI_DATA_PATH:-.}/sparkyui-data:/data

    networks:
      - sparky_net

    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8189/api/model-types"]
      interval: 30s
      timeout: 10s
      start_period: 15s
      retries: 3

    restart: unless-stopped

networks:
  sparky_net:
    driver: bridge

volumes:
  comfyuimini_workflows: