Fix code examples and improve clarity (#3)
* Fix code examples and improve clarity Fix one broken code example and improve clarity to all * Update flux2_dev_hf.md simplify more * Update docs/flux2_dev_hf.md * add ref to main readme
This commit is contained in:
@@ -84,7 +84,7 @@ Additionally, we are recommending implementing a solution to mark the metadata o
|
|||||||
|
|
||||||
## 🧨 Lower VRAM diffusers example
|
## 🧨 Lower VRAM diffusers example
|
||||||
|
|
||||||
The below example should run on a RTX 4090.
|
The below example should run on a RTX 4090. For more examples check the [diffusers quantization guide here](docs/flux2_dev_hf.md)
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ Install diffusers from `main`
|
|||||||
pip install git+https://github.com/huggingface/diffusers.git
|
pip install git+https://github.com/huggingface/diffusers.git
|
||||||
```
|
```
|
||||||
|
|
||||||
After accepting the gating on this repository, login with Hugging Face on your terminal
|
After accepting the gating on the [FLUX.2-dev repository](https://huggingface.co/black-forest-labs/FLUX.2-dev), login with Hugging Face on your terminal
|
||||||
```sh
|
```sh
|
||||||
hf auth login
|
hf auth login
|
||||||
```
|
```
|
||||||
@@ -28,13 +28,13 @@ The text-embeddings are calculated in bf16 in the cloud and you only load the tr
|
|||||||
|
|
||||||
```py
|
```py
|
||||||
import torch
|
import torch
|
||||||
from diffusers import Flux2Pipeline, Flux2Transformer2DModel
|
from diffusers import Flux2Pipeline
|
||||||
from diffusers.utils import load_image
|
from diffusers.utils import load_image
|
||||||
from huggingface_hub import get_token
|
from huggingface_hub import get_token
|
||||||
import requests
|
import requests
|
||||||
import io
|
import io
|
||||||
|
|
||||||
repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
|
repo_id = "diffusers/FLUX.2-dev-bnb-4bit" #quantized text-encoder and DiT. VAE still in bf16
|
||||||
device = "cuda:0"
|
device = "cuda:0"
|
||||||
torch_dtype = torch.bfloat16
|
torch_dtype = torch.bfloat16
|
||||||
|
|
||||||
@@ -52,14 +52,15 @@ def remote_text_encoder(prompts):
|
|||||||
return prompt_embeds.to(device)
|
return prompt_embeds.to(device)
|
||||||
|
|
||||||
pipe = Flux2Pipeline.from_pretrained(
|
pipe = Flux2Pipeline.from_pretrained(
|
||||||
repo_id, transformer=transformer, text_encoder=None, torch_dtype=torch_dtype
|
repo_id, text_encoder=None, torch_dtype=torch_dtype
|
||||||
).to(device)
|
).to(device)
|
||||||
|
|
||||||
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
|
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
|
||||||
|
|
||||||
|
#cat_image = load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")
|
||||||
image = pipe(
|
image = pipe(
|
||||||
prompt_embeds=remote_text_encoder(prompt),
|
prompt_embeds=remote_text_encoder(prompt),
|
||||||
#image=load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png") #optional image input
|
#image=load_image(cat_image) #optional image input
|
||||||
generator=torch.Generator(device=device).manual_seed(42),
|
generator=torch.Generator(device=device).manual_seed(42),
|
||||||
num_inference_steps=50, #28 steps can be a good trade-off
|
num_inference_steps=50, #28 steps can be a good trade-off
|
||||||
guidance_scale=4,
|
guidance_scale=4,
|
||||||
@@ -75,30 +76,23 @@ The text-encoder is offloaded from VRAM for the transformer to run with `pipe.en
|
|||||||
|
|
||||||
```py
|
```py
|
||||||
import torch
|
import torch
|
||||||
from transformers import Mistral3ForConditionalGeneration
|
from diffusers import Flux2Pipeline
|
||||||
from diffusers import Flux2Pipeline, Flux2Transformer2DModel
|
from diffusers.utils import load_image
|
||||||
|
|
||||||
repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
|
repo_id = "diffusers/FLUX.2-dev-bnb-4bit" #quantized text-encoder and DiT. VAE still in bf16
|
||||||
device = "cuda:0"
|
device = "cuda:0"
|
||||||
torch_dtype = torch.bfloat16
|
torch_dtype = torch.bfloat16
|
||||||
|
|
||||||
transformer = Flux2Transformer2DModel.from_pretrained(
|
|
||||||
repo_id, subfolder="transformer", torch_dtype=torch_dtype, device_map="cpu"
|
|
||||||
)
|
|
||||||
text_encoder = Mistral3ForConditionalGeneration.from_pretrained(
|
|
||||||
repo_id, subfolder="text_encoder", dtype=torch_dtype, device_map="cpu"
|
|
||||||
)
|
|
||||||
|
|
||||||
pipe = Flux2Pipeline.from_pretrained(
|
pipe = Flux2Pipeline.from_pretrained(
|
||||||
repo_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch_dtype
|
repo_id, torch_dtype=torch_dtype
|
||||||
)
|
)
|
||||||
pipe.enable_model_cpu_offload()
|
pipe.enable_model_cpu_offload()
|
||||||
|
|
||||||
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL + Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
|
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL + Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
|
||||||
|
#cat_image = load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")
|
||||||
image = pipe(
|
image = pipe(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
#image=[load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")] #multi-image input
|
#image=[cat_image] #multi-image input
|
||||||
generator=torch.Generator(device=device).manual_seed(42),
|
generator=torch.Generator(device=device).manual_seed(42),
|
||||||
num_inference_steps=50,
|
num_inference_steps=50,
|
||||||
guidance_scale=4,
|
guidance_scale=4,
|
||||||
@@ -113,12 +107,13 @@ To understand how different quantizations affect the model's abilities and quali
|
|||||||
|
|
||||||
## 💿 More VRAM (80G+)
|
## 💿 More VRAM (80G+)
|
||||||
|
|
||||||
Even an H100 can't hold the text-encoder, transormer and VAE at the same time. However, here it is a matter of activating the `pipe.enable_model_cpu_offload()`
|
Even an H100 can't hold the text-encoder, transormer and VAE at the same time. However, as they each fit individually, it is a matter of activating the `pipe.enable_model_cpu_offload()`
|
||||||
And for H200, B200 or larger carts, everything fits.
|
For H200, B200 or larger cards, everything fits.
|
||||||
|
|
||||||
```py
|
```py
|
||||||
import torch
|
import torch
|
||||||
from diffusers import Flux2Pipeline
|
from diffusers import Flux2Pipeline
|
||||||
|
from diffusers.utils import load_image
|
||||||
|
|
||||||
repo_id = "black-forest-labs/FLUX.2-dev"
|
repo_id = "black-forest-labs/FLUX.2-dev"
|
||||||
device = "cuda:0"
|
device = "cuda:0"
|
||||||
@@ -127,13 +122,14 @@ torch_dtype = torch.bfloat16
|
|||||||
pipe = Flux2Pipeline.from_pretrained(
|
pipe = Flux2Pipeline.from_pretrained(
|
||||||
repo_id, torch_dtype=torch_dtype
|
repo_id, torch_dtype=torch_dtype
|
||||||
)
|
)
|
||||||
pipe.enable_model_cpu_offload() #deactivate for >80G VRAM carts like H200, B200, etc. and do a `pipe.to(device)` instead
|
pipe.enable_model_cpu_offload() #no need to do cpu offload for >80G VRAM carts like H200, B200, etc. and do a `pipe.to(device)` instead
|
||||||
|
|
||||||
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
|
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
|
||||||
|
|
||||||
|
#cat_image = load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")
|
||||||
image = pipe(
|
image = pipe(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
#image=[load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")] #multi-image input
|
#image=[cat_image] #multi-image input
|
||||||
generator=torch.Generator(device=device).manual_seed(42),
|
generator=torch.Generator(device=device).manual_seed(42),
|
||||||
num_inference_steps=50,
|
num_inference_steps=50,
|
||||||
guidance_scale=4,
|
guidance_scale=4,
|
||||||
@@ -146,7 +142,8 @@ image.save("flux2_output.png")
|
|||||||
`pipe.enable_model_cpu_offload()` slows you down a bit. You can move as fast as possible on the H100 with the remote text-encoder
|
`pipe.enable_model_cpu_offload()` slows you down a bit. You can move as fast as possible on the H100 with the remote text-encoder
|
||||||
```py
|
```py
|
||||||
import torch
|
import torch
|
||||||
from diffusers import Flux2Pipeline, Flux2Transformer2DModel
|
from diffusers import Flux2Pipeline
|
||||||
|
from diffusers.utils import load_image
|
||||||
from huggingface_hub import get_token
|
from huggingface_hub import get_token
|
||||||
import requests
|
import requests
|
||||||
import io
|
import io
|
||||||
@@ -175,9 +172,10 @@ pipe = Flux2Pipeline.from_pretrained(
|
|||||||
|
|
||||||
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL + Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
|
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL + Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
|
||||||
|
|
||||||
|
#cat_image = load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")
|
||||||
image = pipe(
|
image = pipe(
|
||||||
prompt_embeds=remote_text_encoder(prompt),
|
prompt_embeds=remote_text_encoder(prompt),
|
||||||
#image=[load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")] #optional multi-image input
|
#image=[cat_image] #optional multi-image input
|
||||||
generator=torch.Generator(device=device).manual_seed(42),
|
generator=torch.Generator(device=device).manual_seed(42),
|
||||||
num_inference_steps=50,
|
num_inference_steps=50,
|
||||||
guidance_scale=4,
|
guidance_scale=4,
|
||||||
@@ -188,4 +186,4 @@ image.save("flux2_output.png")
|
|||||||
|
|
||||||
## 🧮 Other VRAM sizes
|
## 🧮 Other VRAM sizes
|
||||||
|
|
||||||
If you have different GPU sizes, you can experiment with different quantizations, for example, for 40-48G VRAM GPUs, (8-bit) quantization instead of 4-bit can be a good trade-off. You can learn more on the [diffusers FLUX.2 release blog](https://huggingface.co/blog/flux2)
|
If you have different GPU sizes, you can experiment with different quantizations, for example, for 40-48G VRAM GPUs, (8-bit) quantization instead of 4-bit can be a good trade-off. You can learn more on the [diffusers FLUX.2 release blog](https://huggingface.co/blog/flux2)
|
||||||
|
|||||||
Reference in New Issue
Block a user