From c803ea6146de03a71a578eefec5e39b2e1f5c766 Mon Sep 17 00:00:00 2001 From: Evan Carmen Date: Thu, 21 May 2026 11:02:06 -0500 Subject: [PATCH] fix: intermediate_device() returns cuda on unified memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Grace-Blackwell (GB10), CPU and GPU share the same physical RAM. intermediate_device() was returning 'cpu', which means ComfyUI allocates output buffers (like VAE decode) through the CPU allocator on the same physical memory pool it thinks is free VRAM. This causes: 1. Memory accounting mismatch — ComfyUI thinks intermediates are 'over there' on CPU and overestimates available VRAM 2. Unnecessary .to(device) copies through separate allocator heaps 3. Heap fragmentation across the unified memory pool Now matches text_encoder_offload_device() and vae_offload_device() which already return get_torch_device() on UNIFIED_MEMORY. --- patches/model_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patches/model_management.py b/patches/model_management.py index 43b95be..ec9edb5 100644 --- a/patches/model_management.py +++ b/patches/model_management.py @@ -1106,7 +1106,7 @@ def text_encoder_dtype(device=None): def intermediate_device(): - if args.gpu_only: + if args.gpu_only or UNIFIED_MEMORY: return get_torch_device() else: return torch.device("cpu")