Files
SparkyUI/docker-compose.yml
T
TBNilles 399acabd58 feat(model-manager): "Free GPU memory" button to unload ComfyUI models
ComfyUI caches the last model when RAM is plentiful (unified memory), so
memory doesn't drop after switching models even though models are being
swapped, not accumulated. Add a sidebar "Free GPU memory" button that
proxies ComfyUI's POST /free (unload_models + free_memory) via a new
/api/comfyui/free endpoint (COMFYUI_URL env). Verified it releases ~7GB.
README documents this plus the --disable-smart-memory auto-unload option.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 17:14:37 -04:00

187 lines
6.2 KiB
YAML

services:
comfyui:
build:
context: .
dockerfile: Dockerfile
args:
# Pin ComfyUI to a known-good commit/tag if desired
COMFYUI_REF: "${COMFYUI_REF:-master}"
# SageAttention ref (e.g., "main", "v2.2.0", or specific commit)
SAGEATTN_REF: "${SAGEATTN_REF:-main}"
image: sparkyui:cu130
container_name: comfyui
# GPU enablement
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
# LAN exposure
ports:
- "${COMFYUI_PORT:-8188}:8188"
environment:
COMFYUI_PORT: "${COMFYUI_PORT:-8188}"
# Optimized for Grace-Blackwell unified memory architecture
# Key insight: DON'T use --gpu-only - let the unified memory fabric work naturally
COMFYUI_FLAGS: "${COMFYUI_FLAGS:---listen 0.0.0.0 --port 8188 --disable-pinned-memory --dont-upcast-attention}"
NVIDIA_VISIBLE_DEVICES: "all"
NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
# Disable torch.compile/inductor - Triton doesn't support Blackwell sm_121a yet
TORCH_COMPILE_DISABLE: "1"
TORCHDYNAMO_DISABLE: "1"
# Grace-Blackwell unified memory — removed aggressive CUDA tuning (5/21):
# CUDA_CACHE_DISABLE, CUDA_DEVICE_MAX_CONNECTIONS, CUDA_DEVICE_MAX_COPY_CONNECTIONS,
# CUDA_MODULE_LOADING=EAGER, CUDA_MANAGED_FORCE_DEVICE_ALLOC, OMP_NUM_THREADS
# These were over-tuning. The ComfyUI flags + Sparky patch handle the architecture.
# Keeping only CUBLAS_WORKSPACE_CONFIG for determinism.
CUBLAS_WORKSPACE_CONFIG: ":0:0"
# CUDA kernel caching — PTX→SASS compilation cache for GB10 (sm_121)
# First run compiles kernels, subsequent runs reuse from disk. 3x speedup reported.
# 4GB cache covers all typical ComfyUI kernel variants.
CUDA_CACHE_MAXSIZE: "4294967296"
volumes:
# Models from existing ComfyUI install (read-only).
# Defaults to the project root; the model-manager service writes here.
- ${COMFYUI_HOST_PATH:-.}/models:/opt/ComfyUI/models:ro
# Custom nodes - comment out to use container-only (fresh) custom_nodes
# If mounted, ComfyUI-Manager installs persist across container restarts
- ${SPARKYUI_DATA_PATH:-.}/custom_nodes:/opt/ComfyUI/custom_nodes
# Outputs/inputs/workflows - persistent across restarts
- ${SPARKYUI_DATA_PATH:-.}/output:/opt/ComfyUI/output
- ${SPARKYUI_DATA_PATH:-.}/input:/opt/ComfyUI/input
- ${SPARKYUI_DATA_PATH:-.}/workflows:/opt/ComfyUI/workflows
# Wheel cache (optional - for prebuilt wheels)
- ${SPARKYUI_DATA_PATH:-.}/wheels:/opt/wheels
# Sparky patches - Grace-Blackwell unified memory optimizations
# model_management.py: HIGH_VRAM→NORMAL_VRAM, intermediate_device()→cuda, soft_empty_cache skip,
# 95% vram_for_weights, UNIFIED_MEMORY detection, offload devices → cuda
# utils.py: copy=False on tensor.to(device) — avoids double-allocation on unified memory
# where CPU and GPU share the same physical RAM (ComfyUI issue #10896)
- ./patches/model_management.py:/opt/ComfyUI/comfy/model_management.py:ro
- ./patches/utils.py:/opt/ComfyUI/comfy/utils.py:ro
networks:
- sparky_net
# Health check - ComfyUI takes time to load, so generous start period
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8188/"]
interval: 30s
timeout: 10s
start_period: 120s
retries: 3
restart: unless-stopped
# ComfyUIMini - Mobile-friendly UI
# Access at http://<host>:3000
comfyuimini:
build:
context: ./comfyuimini
dockerfile: Dockerfile
args:
COMFYUIMINI_REF: "${COMFYUIMINI_REF:-main}"
# Port of the Model Manager, baked into the "Manage Photos" sidebar link
MODEL_MANAGER_PORT: "${MODEL_MANAGER_PORT:-8189}"
image: comfyuimini:latest
container_name: comfyuimini
ports:
- "${COMFYUIMINI_PORT:-3000}:3000"
environment:
# node-config override - connects to comfyui container via docker network
NODE_CONFIG: >-
{
"app_port": 3000,
"comfyui_url": "http://comfyui:8188",
"comfyui_ws_url": "ws://comfyui:8188",
"output_dir": "/shared/output",
"reject_unauthorised_cert": false
}
volumes:
# Share output directory with ComfyUI for gallery feature (read-only)
- ${SPARKYUI_DATA_PATH:-.}/output:/shared/output:ro
# Persist server-side workflows
- comfyuimini_workflows:/app/workflows
networks:
- sparky_net
depends_on:
comfyui:
condition: service_healthy
restart: unless-stopped
# Model Manager - StabilityMatrix-style model download/management UI
# Access at http://<host>:8189
model-manager:
build:
context: ./model-manager
dockerfile: Dockerfile
image: sparkyui-model-manager:latest
container_name: model-manager
# Run as the host user so downloaded models are owned by you, not root.
# Defaults to 1000:1000; override via PUID/PGID in .env if needed.
user: "${PUID:-1000}:${PGID:-1000}"
ports:
- "${MODEL_MANAGER_PORT:-8189}:8189"
environment:
MODELS_DIR: /models
DATA_DIR: /data
OUTPUT_DIR: /output
# Ports used by the device-routing landing page (/start)
COMFYUI_PORT: "${COMFYUI_PORT:-8188}"
COMFYUIMINI_PORT: "${COMFYUIMINI_PORT:-3000}"
# Internal URL used to proxy the "Free GPU memory" action
COMFYUI_URL: "http://comfyui:8188"
volumes:
# Shared models dir - read-WRITE here so downloads land on the host.
# ComfyUI mounts the same host folder read-only and picks up new files.
- ${COMFYUI_HOST_PATH:-.}/models:/models
# Generated images - read-WRITE so the gallery can delete photos.
- ${SPARKYUI_DATA_PATH:-.}/output:/output
# Persistent SQLite DB (sources, API keys, download history)
- ${SPARKYUI_DATA_PATH:-.}/sparkyui-data:/data
networks:
- sparky_net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8189/api/model-types"]
interval: 30s
timeout: 10s
start_period: 15s
retries: 3
restart: unless-stopped
networks:
sparky_net:
driver: bridge
volumes:
comfyuimini_workflows: