From 1f5aeb5248dee45d9a749eb40869016daba8970a Mon Sep 17 00:00:00 2001 From: Evan Carmen Date: Sat, 3 Jan 2026 20:13:46 -0600 Subject: [PATCH] Initial commit: SparkyUI - ComfyUI for DGX Spark (Blackwell GB10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docker-based ComfyUI setup for NVIDIA DGX Spark ARM64 + sm_121: - CUDA 13.0.2 base (required for compute_121 support) - PyTorch 2.9.1+cu130 ARM64 wheels - SageAttention compiled with TORCH_CUDA_ARCH_LIST="12.1" - Triton/torch.compile disabled (no sm_121 support yet) - ComfyUI-Manager auto-installed at runtime - Configurable model/data paths via .env 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .dockerignore | 30 +++++++++ .env.example | 16 +++++ .gitignore | 60 ++++++++++++++++++ Dockerfile | 58 +++++++++++++++++ README.md | 144 ++++++++++++++++++++++++++++++++++++++++++ custom_nodes/.gitkeep | 0 docker-compose.yml | 53 ++++++++++++++++ entrypoint.sh | 30 +++++++++ input/.gitkeep | 0 output/.gitkeep | 0 wheels/.gitkeep | 0 workflows/.gitkeep | 0 12 files changed, 391 insertions(+) create mode 100644 .dockerignore create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 custom_nodes/.gitkeep create mode 100644 docker-compose.yml create mode 100644 entrypoint.sh create mode 100644 input/.gitkeep create mode 100644 output/.gitkeep create mode 100644 wheels/.gitkeep create mode 100644 workflows/.gitkeep diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b23d622 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,30 @@ +# Runtime data (mounted as volumes) +custom_nodes/ +output/ +input/ +workflows/ + +# Git +.git/ +.gitignore + +# Environment and secrets +.env +*.env.local + +# Documentation (not needed in image) +*.md +CLAUDE.md +README.md +LICENSE + +# IDE +.vscode/ +.idea/ + +# Python cache +__pycache__/ +*.pyc + +# Prebuilt wheels (built separately) +wheels/ diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..c0d2924 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# SparkyUI - ComfyUI for DGX Spark (Blackwell GB10) +# Copy this to .env and customize paths as needed + +# Base path where your existing ComfyUI installation lives (for models) +COMFYUI_HOST_PATH=/path/to/your/ComfyUI + +# Base path for SparkyUI data (custom_nodes, outputs, inputs, etc.) +SPARKYUI_DATA_PATH=/path/to/SparkyUI + +# ComfyUI settings +COMFYUI_PORT=8188 +COMFYUI_FLAGS=--listen 0.0.0.0 --port 8188 --gpu-only + +# Build refs (pin to specific commits/tags for reproducibility) +COMFYUI_REF=master +SAGEATTN_REF=main diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..480ef5f --- /dev/null +++ b/.gitignore @@ -0,0 +1,60 @@ +# Project-specific internal docs +CLAUDE.md + +# Environment (contains local paths) +.env +*.env.local + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg +.venv/ +venv/ + +# Docker +.docker/ + +# OS +.DS_Store +Thumbs.db + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Runtime directories - ignore contents but keep .gitkeep +custom_nodes/* +!custom_nodes/.gitkeep + +output/* +!output/.gitkeep + +input/* +!input/.gitkeep + +workflows/* +!workflows/.gitkeep + +# Wheels directory - for prebuilt ARM64/sm_121 binaries +# Ignore contents except .gitkeep (add wheels explicitly if needed) +wheels/* +!wheels/.gitkeep diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3f9bcd2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,58 @@ +# CUDA 13.0 for Blackwell GB10 (sm_121 / compute_121) +# CUDA 12.8 only supports up to sm_120, but GB10 is sm_121. +# "devel" includes nvcc so we can compile CUDA extensions like SageAttention. +FROM nvidia/cuda:13.0.2-devel-ubuntu24.04 + +ARG DEBIAN_FRONTEND=noninteractive +ARG COMFYUI_REF=master +ARG SAGEATTN_REF=main + +# Base system deps +RUN apt-get update && apt-get install -y --no-install-recommends \ + git curl ca-certificates \ + python3 python3-pip python3-venv python3-dev \ + build-essential ninja-build cmake pkg-config \ + && rm -rf /var/lib/apt/lists/* + +# Create venv (keeps python deps isolated inside container) +ENV VENV=/opt/venv +RUN python3 -m venv $VENV +ENV PATH="$VENV/bin:$PATH" + +# Upgrade packaging tools +RUN pip install -U pip setuptools wheel + +# ---- PyTorch (ARM64 + CUDA 13.0) ---- +# PyTorch cu130 wheels work with CUDA 13.0.x runtime. +# ARM64 wheels available: torch-2.9.1+cu130, torchvision-0.24.1 +RUN pip install --index-url https://download.pytorch.org/whl/cu130 \ + torch torchvision + +# ---- ComfyUI ---- +RUN git clone https://github.com/comfyanonymous/ComfyUI.git /opt/ComfyUI && \ + cd /opt/ComfyUI && \ + git checkout ${COMFYUI_REF} || true + +RUN pip install -r /opt/ComfyUI/requirements.txt + +# ---- ComfyUI-Manager ---- +# Handled at runtime by entrypoint.sh (clones if missing in mounted volume) +# This ensures latest version on each container start + +# ---- SageAttention ---- +# GB10 is compute capability 12.1 (sm_121). +# CUDA 13.0 NVCC supports sm_121, so we compile directly for it. +ENV TORCH_CUDA_ARCH_LIST="12.1" +ENV CUDA_HOME=/usr/local/cuda + +# Build/install SageAttention from repo with sm_121 support +RUN pip install --no-build-isolation "git+https://github.com/thu-ml/SageAttention@${SAGEATTN_REF}" || true + +# Expose ComfyUI +EXPOSE 8188 + +# Entry script handles runtime updates / flags +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..571f0dc --- /dev/null +++ b/README.md @@ -0,0 +1,144 @@ +# SparkyUI + +**ComfyUI + SageAttention for NVIDIA DGX Spark (Blackwell GB10)** + +A Docker-based ComfyUI setup specifically engineered for the DGX Spark's unique ARM64 + Blackwell architecture. + +## Why This Exists + +The NVIDIA DGX Spark uses the **GB10 GPU** with compute capability **12.1 (sm_121)** - Blackwell architecture. This creates challenges: + +| CUDA Version | Max Compute Capability | Can compile for GB10? | +|--------------|------------------------|----------------------| +| CUDA 12.8 | sm_120 | **No** | +| CUDA 13.0+ | sm_121 | **Yes** | + +Standard ComfyUI containers and PyTorch wheels don't support sm_121. SparkyUI solves this by: + +1. Using **CUDA 13.0.2** base image (supports sm_121) +2. Installing **PyTorch cu130** ARM64 wheels +3. Compiling **SageAttention** with `TORCH_CUDA_ARCH_LIST="12.1"` +4. Disabling **Triton/torch.compile** (doesn't support sm_121 yet) + +## Quick Start + +```bash +# Clone +git clone https://github.com/YOUR_USERNAME/SparkyUI.git +cd SparkyUI + +# Configure paths +cp .env.example .env +# Edit .env with your paths + +# Build (compiles SageAttention for sm_121 - takes ~10 min) +docker compose build + +# Start +docker compose up -d + +# View logs +docker compose logs -f +``` + +**Access:** http://localhost:8188 (or your DGX Spark's IP on LAN) + +## Requirements + +- **NVIDIA DGX Spark** (or other GB10-based system) +- **Docker** with NVIDIA Container Toolkit +- **NVIDIA Driver** 560+ (tested with 580.95) +- **~15GB** disk for Docker image +- **Models** from existing ComfyUI install (mounted read-only) + +## Configuration + +Copy `.env.example` to `.env` and edit: + +```bash +# Path to your existing ComfyUI models (mounted read-only) +COMFYUI_HOST_PATH=/path/to/your/ComfyUI + +# Path for SparkyUI data (custom_nodes, outputs, inputs) +SPARKYUI_DATA_PATH=/path/to/SparkyUI + +# Optional: pin to specific versions +COMFYUI_REF=master +SAGEATTN_REF=main +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ DGX Spark Host │ +│ Ubuntu 24.04 (DGX OS 7) / Driver 580.x │ +│ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ Docker Container (sparkyui:cu130) │ │ +│ │ │ │ +│ │ CUDA 13.0.2 + PyTorch 2.9.1+cu130 │ │ +│ │ SageAttention 2.2.0 (compiled for sm_121) │ │ +│ │ ComfyUI 0.7.x + ComfyUI-Manager │ │ +│ │ │ │ +│ │ Key env vars: │ │ +│ │ TORCH_CUDA_ARCH_LIST="12.1" │ │ +│ │ TORCHDYNAMO_DISABLE="1" │ │ +│ └─────────────────────────────────────────────────────┘ │ +│ │ │ +│ Port 8188 (LAN) │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Version Compatibility + +Tested combinations: + +| Component | Version | Notes | +|-----------|---------|-------| +| CUDA Base | 13.0.2 | Required for sm_121 | +| PyTorch | 2.9.1+cu130 | ARM64 wheel from PyTorch index | +| torchvision | 0.24.1+cu130 | ARM64 wheel | +| SageAttention | 2.2.0 | Compiled with sm_121 | +| ComfyUI | 0.7.0 | master branch | +| Driver | 580.95 | DGX OS 7 default | + +## Known Limitations + +1. **PyTorch Warning**: You'll see a warning about compute capability 12.1 being "outside supported range (8.0-12.0)". This is harmless - PyTorch works, and SageAttention's custom kernels are compiled natively. + +2. **torch.compile Disabled**: Triton doesn't support sm_121 yet. `torch.compile()` is disabled via environment variables. Some nodes may run slower than on supported architectures. + +3. **No GitHub Actions CI**: Can't build for ARM64 + sm_121 in GitHub's hosted runners. Must build locally on DGX Spark. + +## Troubleshooting + +### "no kernel image is available for execution on the device" +Your SageAttention wasn't compiled for sm_121. Rebuild: +```bash +docker compose build --no-cache +``` + +### PyTorch can't find CUDA +Ensure NVIDIA Container Toolkit is installed: +```bash +nvidia-ctk --version +docker run --rm --gpus all nvidia/cuda:13.0.2-base-ubuntu24.04 nvidia-smi +``` + +### ComfyUI-Manager missing +The entrypoint auto-clones it. Check logs: +```bash +docker compose logs | grep -i manager +``` + +## Future + +When these land, SparkyUI can be simplified: +- [ ] PyTorch native sm_121 support → remove explicit `TORCH_CUDA_ARCH_LIST` +- [ ] Triton sm_121 support → remove `TORCHDYNAMO_DISABLE` +- [ ] SageAttention prebuilt ARM64 wheels → remove source build + +## License + +MIT diff --git a/custom_nodes/.gitkeep b/custom_nodes/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..9aa88d3 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,53 @@ +services: + comfyui: + build: + context: . + dockerfile: Dockerfile + args: + # Pin ComfyUI to a known-good commit/tag if desired + COMFYUI_REF: "${COMFYUI_REF:-master}" + # SageAttention ref (e.g., "main", "v2.2.0", or specific commit) + SAGEATTN_REF: "${SAGEATTN_REF:-main}" + + image: sparkyui:cu130 + container_name: comfyui + + # GPU enablement + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + # LAN exposure + ports: + - "${COMFYUI_PORT:-8188}:8188" + + environment: + COMFYUI_PORT: "${COMFYUI_PORT:-8188}" + COMFYUI_FLAGS: "${COMFYUI_FLAGS:---listen 0.0.0.0 --port 8188 --gpu-only}" + NVIDIA_VISIBLE_DEVICES: "all" + NVIDIA_DRIVER_CAPABILITIES: "compute,utility" + # Disable torch.compile/inductor - Triton doesn't support Blackwell sm_121a yet + TORCH_COMPILE_DISABLE: "1" + TORCHDYNAMO_DISABLE: "1" + + volumes: + # Models from existing ComfyUI install (read-only) + - ${COMFYUI_HOST_PATH}/models:/opt/ComfyUI/models:ro + + # Custom nodes - comment out to use container-only (fresh) custom_nodes + # If mounted, ComfyUI-Manager installs persist across container restarts + - ${SPARKYUI_DATA_PATH}/custom_nodes:/opt/ComfyUI/custom_nodes + + # Outputs/inputs/workflows - persistent across restarts + - ${SPARKYUI_DATA_PATH}/output:/opt/ComfyUI/output + - ${SPARKYUI_DATA_PATH}/input:/opt/ComfyUI/input + - ${SPARKYUI_DATA_PATH}/workflows:/opt/ComfyUI/workflows + + # Wheel cache (optional - for prebuilt wheels) + - ${SPARKYUI_DATA_PATH}/wheels:/opt/wheels + + restart: unless-stopped diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..ca63532 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +COMFY_DIR="/opt/ComfyUI" +PORT="${COMFYUI_PORT:-8188}" +FLAGS="${COMFYUI_FLAGS:---listen 0.0.0.0 --port ${PORT}}" + +echo "[entrypoint] Python: $(python --version)" +echo "[entrypoint] Torch: $(python -c 'import torch; print(torch.__version__)')" +echo "[entrypoint] CUDA: $(python -c 'import torch; print(torch.version.cuda)')" +echo "[entrypoint] Flags: ${FLAGS}" + +# Ensure ComfyUI-Manager exists in mounted custom_nodes +# Check for __init__.py to detect corrupted/partial installs +if [[ ! -f "${COMFY_DIR}/custom_nodes/ComfyUI-Manager/__init__.py" ]]; then + echo "[entrypoint] ComfyUI-Manager missing or corrupted, cloning latest..." + rm -rf "${COMFY_DIR}/custom_nodes/ComfyUI-Manager" 2>/dev/null || true + git clone https://github.com/ltdrdata/ComfyUI-Manager.git \ + "${COMFY_DIR}/custom_nodes/ComfyUI-Manager" || true +fi + +# Install any requirements from custom nodes +for req in "${COMFY_DIR}"/custom_nodes/*/requirements.txt; do + if [[ -f "$req" ]]; then + echo "[entrypoint] Installing deps from: $req" + pip install -q -r "$req" || true + fi +done + +exec python "${COMFY_DIR}/main.py" ${FLAGS} diff --git a/input/.gitkeep b/input/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/wheels/.gitkeep b/wheels/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/workflows/.gitkeep b/workflows/.gitkeep new file mode 100644 index 0000000..e69de29