Initial commit: SparkyUI - ComfyUI for DGX Spark (Blackwell GB10)
Docker-based ComfyUI setup for NVIDIA DGX Spark ARM64 + sm_121: - CUDA 13.0.2 base (required for compute_121 support) - PyTorch 2.9.1+cu130 ARM64 wheels - SageAttention compiled with TORCH_CUDA_ARCH_LIST="12.1" - Triton/torch.compile disabled (no sm_121 support yet) - ComfyUI-Manager auto-installed at runtime - Configurable model/data paths via .env 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,30 @@
|
|||||||
|
# Runtime data (mounted as volumes)
|
||||||
|
custom_nodes/
|
||||||
|
output/
|
||||||
|
input/
|
||||||
|
workflows/
|
||||||
|
|
||||||
|
# Git
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
|
||||||
|
# Environment and secrets
|
||||||
|
.env
|
||||||
|
*.env.local
|
||||||
|
|
||||||
|
# Documentation (not needed in image)
|
||||||
|
*.md
|
||||||
|
CLAUDE.md
|
||||||
|
README.md
|
||||||
|
LICENSE
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Python cache
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
|
||||||
|
# Prebuilt wheels (built separately)
|
||||||
|
wheels/
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
# SparkyUI - ComfyUI for DGX Spark (Blackwell GB10)
|
||||||
|
# Copy this to .env and customize paths as needed
|
||||||
|
|
||||||
|
# Base path where your existing ComfyUI installation lives (for models)
|
||||||
|
COMFYUI_HOST_PATH=/path/to/your/ComfyUI
|
||||||
|
|
||||||
|
# Base path for SparkyUI data (custom_nodes, outputs, inputs, etc.)
|
||||||
|
SPARKYUI_DATA_PATH=/path/to/SparkyUI
|
||||||
|
|
||||||
|
# ComfyUI settings
|
||||||
|
COMFYUI_PORT=8188
|
||||||
|
COMFYUI_FLAGS=--listen 0.0.0.0 --port 8188 --gpu-only
|
||||||
|
|
||||||
|
# Build refs (pin to specific commits/tags for reproducibility)
|
||||||
|
COMFYUI_REF=master
|
||||||
|
SAGEATTN_REF=main
|
||||||
+60
@@ -0,0 +1,60 @@
|
|||||||
|
# Project-specific internal docs
|
||||||
|
CLAUDE.md
|
||||||
|
|
||||||
|
# Environment (contains local paths)
|
||||||
|
.env
|
||||||
|
*.env.local
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
.docker/
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# Runtime directories - ignore contents but keep .gitkeep
|
||||||
|
custom_nodes/*
|
||||||
|
!custom_nodes/.gitkeep
|
||||||
|
|
||||||
|
output/*
|
||||||
|
!output/.gitkeep
|
||||||
|
|
||||||
|
input/*
|
||||||
|
!input/.gitkeep
|
||||||
|
|
||||||
|
workflows/*
|
||||||
|
!workflows/.gitkeep
|
||||||
|
|
||||||
|
# Wheels directory - for prebuilt ARM64/sm_121 binaries
|
||||||
|
# Ignore contents except .gitkeep (add wheels explicitly if needed)
|
||||||
|
wheels/*
|
||||||
|
!wheels/.gitkeep
|
||||||
+58
@@ -0,0 +1,58 @@
|
|||||||
|
# CUDA 13.0 for Blackwell GB10 (sm_121 / compute_121)
|
||||||
|
# CUDA 12.8 only supports up to sm_120, but GB10 is sm_121.
|
||||||
|
# "devel" includes nvcc so we can compile CUDA extensions like SageAttention.
|
||||||
|
FROM nvidia/cuda:13.0.2-devel-ubuntu24.04
|
||||||
|
|
||||||
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
|
ARG COMFYUI_REF=master
|
||||||
|
ARG SAGEATTN_REF=main
|
||||||
|
|
||||||
|
# Base system deps
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
git curl ca-certificates \
|
||||||
|
python3 python3-pip python3-venv python3-dev \
|
||||||
|
build-essential ninja-build cmake pkg-config \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Create venv (keeps python deps isolated inside container)
|
||||||
|
ENV VENV=/opt/venv
|
||||||
|
RUN python3 -m venv $VENV
|
||||||
|
ENV PATH="$VENV/bin:$PATH"
|
||||||
|
|
||||||
|
# Upgrade packaging tools
|
||||||
|
RUN pip install -U pip setuptools wheel
|
||||||
|
|
||||||
|
# ---- PyTorch (ARM64 + CUDA 13.0) ----
|
||||||
|
# PyTorch cu130 wheels work with CUDA 13.0.x runtime.
|
||||||
|
# ARM64 wheels available: torch-2.9.1+cu130, torchvision-0.24.1
|
||||||
|
RUN pip install --index-url https://download.pytorch.org/whl/cu130 \
|
||||||
|
torch torchvision
|
||||||
|
|
||||||
|
# ---- ComfyUI ----
|
||||||
|
RUN git clone https://github.com/comfyanonymous/ComfyUI.git /opt/ComfyUI && \
|
||||||
|
cd /opt/ComfyUI && \
|
||||||
|
git checkout ${COMFYUI_REF} || true
|
||||||
|
|
||||||
|
RUN pip install -r /opt/ComfyUI/requirements.txt
|
||||||
|
|
||||||
|
# ---- ComfyUI-Manager ----
|
||||||
|
# Handled at runtime by entrypoint.sh (clones if missing in mounted volume)
|
||||||
|
# This ensures latest version on each container start
|
||||||
|
|
||||||
|
# ---- SageAttention ----
|
||||||
|
# GB10 is compute capability 12.1 (sm_121).
|
||||||
|
# CUDA 13.0 NVCC supports sm_121, so we compile directly for it.
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST="12.1"
|
||||||
|
ENV CUDA_HOME=/usr/local/cuda
|
||||||
|
|
||||||
|
# Build/install SageAttention from repo with sm_121 support
|
||||||
|
RUN pip install --no-build-isolation "git+https://github.com/thu-ml/SageAttention@${SAGEATTN_REF}" || true
|
||||||
|
|
||||||
|
# Expose ComfyUI
|
||||||
|
EXPOSE 8188
|
||||||
|
|
||||||
|
# Entry script handles runtime updates / flags
|
||||||
|
COPY entrypoint.sh /entrypoint.sh
|
||||||
|
RUN chmod +x /entrypoint.sh
|
||||||
|
|
||||||
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
@@ -0,0 +1,144 @@
|
|||||||
|
# SparkyUI
|
||||||
|
|
||||||
|
**ComfyUI + SageAttention for NVIDIA DGX Spark (Blackwell GB10)**
|
||||||
|
|
||||||
|
A Docker-based ComfyUI setup specifically engineered for the DGX Spark's unique ARM64 + Blackwell architecture.
|
||||||
|
|
||||||
|
## Why This Exists
|
||||||
|
|
||||||
|
The NVIDIA DGX Spark uses the **GB10 GPU** with compute capability **12.1 (sm_121)** - Blackwell architecture. This creates challenges:
|
||||||
|
|
||||||
|
| CUDA Version | Max Compute Capability | Can compile for GB10? |
|
||||||
|
|--------------|------------------------|----------------------|
|
||||||
|
| CUDA 12.8 | sm_120 | **No** |
|
||||||
|
| CUDA 13.0+ | sm_121 | **Yes** |
|
||||||
|
|
||||||
|
Standard ComfyUI containers and PyTorch wheels don't support sm_121. SparkyUI solves this by:
|
||||||
|
|
||||||
|
1. Using **CUDA 13.0.2** base image (supports sm_121)
|
||||||
|
2. Installing **PyTorch cu130** ARM64 wheels
|
||||||
|
3. Compiling **SageAttention** with `TORCH_CUDA_ARCH_LIST="12.1"`
|
||||||
|
4. Disabling **Triton/torch.compile** (doesn't support sm_121 yet)
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone
|
||||||
|
git clone https://github.com/YOUR_USERNAME/SparkyUI.git
|
||||||
|
cd SparkyUI
|
||||||
|
|
||||||
|
# Configure paths
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your paths
|
||||||
|
|
||||||
|
# Build (compiles SageAttention for sm_121 - takes ~10 min)
|
||||||
|
docker compose build
|
||||||
|
|
||||||
|
# Start
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
docker compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
**Access:** http://localhost:8188 (or your DGX Spark's IP on LAN)
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- **NVIDIA DGX Spark** (or other GB10-based system)
|
||||||
|
- **Docker** with NVIDIA Container Toolkit
|
||||||
|
- **NVIDIA Driver** 560+ (tested with 580.95)
|
||||||
|
- **~15GB** disk for Docker image
|
||||||
|
- **Models** from existing ComfyUI install (mounted read-only)
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Copy `.env.example` to `.env` and edit:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Path to your existing ComfyUI models (mounted read-only)
|
||||||
|
COMFYUI_HOST_PATH=/path/to/your/ComfyUI
|
||||||
|
|
||||||
|
# Path for SparkyUI data (custom_nodes, outputs, inputs)
|
||||||
|
SPARKYUI_DATA_PATH=/path/to/SparkyUI
|
||||||
|
|
||||||
|
# Optional: pin to specific versions
|
||||||
|
COMFYUI_REF=master
|
||||||
|
SAGEATTN_REF=main
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ DGX Spark Host │
|
||||||
|
│ Ubuntu 24.04 (DGX OS 7) / Driver 580.x │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ Docker Container (sparkyui:cu130) │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ CUDA 13.0.2 + PyTorch 2.9.1+cu130 │ │
|
||||||
|
│ │ SageAttention 2.2.0 (compiled for sm_121) │ │
|
||||||
|
│ │ ComfyUI 0.7.x + ComfyUI-Manager │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ Key env vars: │ │
|
||||||
|
│ │ TORCH_CUDA_ARCH_LIST="12.1" │ │
|
||||||
|
│ │ TORCHDYNAMO_DISABLE="1" │ │
|
||||||
|
│ └─────────────────────────────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ Port 8188 (LAN) │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Version Compatibility
|
||||||
|
|
||||||
|
Tested combinations:
|
||||||
|
|
||||||
|
| Component | Version | Notes |
|
||||||
|
|-----------|---------|-------|
|
||||||
|
| CUDA Base | 13.0.2 | Required for sm_121 |
|
||||||
|
| PyTorch | 2.9.1+cu130 | ARM64 wheel from PyTorch index |
|
||||||
|
| torchvision | 0.24.1+cu130 | ARM64 wheel |
|
||||||
|
| SageAttention | 2.2.0 | Compiled with sm_121 |
|
||||||
|
| ComfyUI | 0.7.0 | master branch |
|
||||||
|
| Driver | 580.95 | DGX OS 7 default |
|
||||||
|
|
||||||
|
## Known Limitations
|
||||||
|
|
||||||
|
1. **PyTorch Warning**: You'll see a warning about compute capability 12.1 being "outside supported range (8.0-12.0)". This is harmless - PyTorch works, and SageAttention's custom kernels are compiled natively.
|
||||||
|
|
||||||
|
2. **torch.compile Disabled**: Triton doesn't support sm_121 yet. `torch.compile()` is disabled via environment variables. Some nodes may run slower than on supported architectures.
|
||||||
|
|
||||||
|
3. **No GitHub Actions CI**: Can't build for ARM64 + sm_121 in GitHub's hosted runners. Must build locally on DGX Spark.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### "no kernel image is available for execution on the device"
|
||||||
|
Your SageAttention wasn't compiled for sm_121. Rebuild:
|
||||||
|
```bash
|
||||||
|
docker compose build --no-cache
|
||||||
|
```
|
||||||
|
|
||||||
|
### PyTorch can't find CUDA
|
||||||
|
Ensure NVIDIA Container Toolkit is installed:
|
||||||
|
```bash
|
||||||
|
nvidia-ctk --version
|
||||||
|
docker run --rm --gpus all nvidia/cuda:13.0.2-base-ubuntu24.04 nvidia-smi
|
||||||
|
```
|
||||||
|
|
||||||
|
### ComfyUI-Manager missing
|
||||||
|
The entrypoint auto-clones it. Check logs:
|
||||||
|
```bash
|
||||||
|
docker compose logs | grep -i manager
|
||||||
|
```
|
||||||
|
|
||||||
|
## Future
|
||||||
|
|
||||||
|
When these land, SparkyUI can be simplified:
|
||||||
|
- [ ] PyTorch native sm_121 support → remove explicit `TORCH_CUDA_ARCH_LIST`
|
||||||
|
- [ ] Triton sm_121 support → remove `TORCHDYNAMO_DISABLE`
|
||||||
|
- [ ] SageAttention prebuilt ARM64 wheels → remove source build
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
services:
|
||||||
|
comfyui:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
args:
|
||||||
|
# Pin ComfyUI to a known-good commit/tag if desired
|
||||||
|
COMFYUI_REF: "${COMFYUI_REF:-master}"
|
||||||
|
# SageAttention ref (e.g., "main", "v2.2.0", or specific commit)
|
||||||
|
SAGEATTN_REF: "${SAGEATTN_REF:-main}"
|
||||||
|
|
||||||
|
image: sparkyui:cu130
|
||||||
|
container_name: comfyui
|
||||||
|
|
||||||
|
# GPU enablement
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
||||||
|
# LAN exposure
|
||||||
|
ports:
|
||||||
|
- "${COMFYUI_PORT:-8188}:8188"
|
||||||
|
|
||||||
|
environment:
|
||||||
|
COMFYUI_PORT: "${COMFYUI_PORT:-8188}"
|
||||||
|
COMFYUI_FLAGS: "${COMFYUI_FLAGS:---listen 0.0.0.0 --port 8188 --gpu-only}"
|
||||||
|
NVIDIA_VISIBLE_DEVICES: "all"
|
||||||
|
NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
|
||||||
|
# Disable torch.compile/inductor - Triton doesn't support Blackwell sm_121a yet
|
||||||
|
TORCH_COMPILE_DISABLE: "1"
|
||||||
|
TORCHDYNAMO_DISABLE: "1"
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
# Models from existing ComfyUI install (read-only)
|
||||||
|
- ${COMFYUI_HOST_PATH}/models:/opt/ComfyUI/models:ro
|
||||||
|
|
||||||
|
# Custom nodes - comment out to use container-only (fresh) custom_nodes
|
||||||
|
# If mounted, ComfyUI-Manager installs persist across container restarts
|
||||||
|
- ${SPARKYUI_DATA_PATH}/custom_nodes:/opt/ComfyUI/custom_nodes
|
||||||
|
|
||||||
|
# Outputs/inputs/workflows - persistent across restarts
|
||||||
|
- ${SPARKYUI_DATA_PATH}/output:/opt/ComfyUI/output
|
||||||
|
- ${SPARKYUI_DATA_PATH}/input:/opt/ComfyUI/input
|
||||||
|
- ${SPARKYUI_DATA_PATH}/workflows:/opt/ComfyUI/workflows
|
||||||
|
|
||||||
|
# Wheel cache (optional - for prebuilt wheels)
|
||||||
|
- ${SPARKYUI_DATA_PATH}/wheels:/opt/wheels
|
||||||
|
|
||||||
|
restart: unless-stopped
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
COMFY_DIR="/opt/ComfyUI"
|
||||||
|
PORT="${COMFYUI_PORT:-8188}"
|
||||||
|
FLAGS="${COMFYUI_FLAGS:---listen 0.0.0.0 --port ${PORT}}"
|
||||||
|
|
||||||
|
echo "[entrypoint] Python: $(python --version)"
|
||||||
|
echo "[entrypoint] Torch: $(python -c 'import torch; print(torch.__version__)')"
|
||||||
|
echo "[entrypoint] CUDA: $(python -c 'import torch; print(torch.version.cuda)')"
|
||||||
|
echo "[entrypoint] Flags: ${FLAGS}"
|
||||||
|
|
||||||
|
# Ensure ComfyUI-Manager exists in mounted custom_nodes
|
||||||
|
# Check for __init__.py to detect corrupted/partial installs
|
||||||
|
if [[ ! -f "${COMFY_DIR}/custom_nodes/ComfyUI-Manager/__init__.py" ]]; then
|
||||||
|
echo "[entrypoint] ComfyUI-Manager missing or corrupted, cloning latest..."
|
||||||
|
rm -rf "${COMFY_DIR}/custom_nodes/ComfyUI-Manager" 2>/dev/null || true
|
||||||
|
git clone https://github.com/ltdrdata/ComfyUI-Manager.git \
|
||||||
|
"${COMFY_DIR}/custom_nodes/ComfyUI-Manager" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Install any requirements from custom nodes
|
||||||
|
for req in "${COMFY_DIR}"/custom_nodes/*/requirements.txt; do
|
||||||
|
if [[ -f "$req" ]]; then
|
||||||
|
echo "[entrypoint] Installing deps from: $req"
|
||||||
|
pip install -q -r "$req" || true
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
exec python "${COMFY_DIR}/main.py" ${FLAGS}
|
||||||
Reference in New Issue
Block a user