687ce72dd3
Key changes based on HurbaLurba's DGX Spark research: - Remove --gpu-only flag (fights unified memory fabric) - Add --disable-pinned-memory, --force-fp16, --dont-upcast-attention - Add CUDA env vars for unified memory: CUDA_MANAGED_FORCE_DEVICE_ALLOC, PYTORCH_NO_CUDA_MEMORY_CACHING, OMP_NUM_THREADS=20 - Document unified memory architecture best practices - Add host-level GPU optimization instructions (clock locking, vboost) - Document SageAttention PR #297 status (merged then reverted) - Add credits section 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
67 lines
2.3 KiB
YAML
67 lines
2.3 KiB
YAML
services:
|
|
comfyui:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
args:
|
|
# Pin ComfyUI to a known-good commit/tag if desired
|
|
COMFYUI_REF: "${COMFYUI_REF:-master}"
|
|
# SageAttention ref (e.g., "main", "v2.2.0", or specific commit)
|
|
SAGEATTN_REF: "${SAGEATTN_REF:-main}"
|
|
|
|
image: sparkyui:cu130
|
|
container_name: comfyui
|
|
|
|
# GPU enablement
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
|
|
# LAN exposure
|
|
ports:
|
|
- "${COMFYUI_PORT:-8188}:8188"
|
|
|
|
environment:
|
|
COMFYUI_PORT: "${COMFYUI_PORT:-8188}"
|
|
# Optimized for Grace-Blackwell unified memory architecture
|
|
# Key insight: DON'T use --gpu-only - let the unified memory fabric work naturally
|
|
COMFYUI_FLAGS: "${COMFYUI_FLAGS:---listen 0.0.0.0 --port 8188 --disable-pinned-memory --force-fp16 --fp16-unet --fp16-vae --fp16-text-enc --dont-upcast-attention}"
|
|
NVIDIA_VISIBLE_DEVICES: "all"
|
|
NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
|
|
|
|
# Disable torch.compile/inductor - Triton doesn't support Blackwell sm_121a yet
|
|
TORCH_COMPILE_DISABLE: "1"
|
|
TORCHDYNAMO_DISABLE: "1"
|
|
|
|
# Grace-Blackwell unified memory optimizations
|
|
CUDA_CACHE_DISABLE: "1"
|
|
PYTORCH_NO_CUDA_MEMORY_CACHING: "1"
|
|
CUDA_DEVICE_MAX_CONNECTIONS: "1"
|
|
CUDA_DEVICE_MAX_COPY_CONNECTIONS: "4"
|
|
CUDA_MODULE_LOADING: "EAGER"
|
|
CUDA_MANAGED_FORCE_DEVICE_ALLOC: "1"
|
|
OMP_NUM_THREADS: "20"
|
|
CUBLAS_WORKSPACE_CONFIG: ":0:0"
|
|
|
|
volumes:
|
|
# Models from existing ComfyUI install (read-only)
|
|
- ${COMFYUI_HOST_PATH}/models:/opt/ComfyUI/models:ro
|
|
|
|
# Custom nodes - comment out to use container-only (fresh) custom_nodes
|
|
# If mounted, ComfyUI-Manager installs persist across container restarts
|
|
- ${SPARKYUI_DATA_PATH}/custom_nodes:/opt/ComfyUI/custom_nodes
|
|
|
|
# Outputs/inputs/workflows - persistent across restarts
|
|
- ${SPARKYUI_DATA_PATH}/output:/opt/ComfyUI/output
|
|
- ${SPARKYUI_DATA_PATH}/input:/opt/ComfyUI/input
|
|
- ${SPARKYUI_DATA_PATH}/workflows:/opt/ComfyUI/workflows
|
|
|
|
# Wheel cache (optional - for prebuilt wheels)
|
|
- ${SPARKYUI_DATA_PATH}/wheels:/opt/wheels
|
|
|
|
restart: unless-stopped
|