diff --git a/apps/multimodal/vision-based-pdf-multi-vector/leann_multi_vector.py b/apps/multimodal/vision-based-pdf-multi-vector/leann_multi_vector.py
index 7168ac2..9138261 100644
--- a/apps/multimodal/vision-based-pdf-multi-vector/leann_multi_vector.py
+++ b/apps/multimodal/vision-based-pdf-multi-vector/leann_multi_vector.py
@@ -1,6 +1,7 @@
 import concurrent.futures
 import glob
 import json
+import logging
 import os
 import re
 import sys
@@ -12,6 +13,8 @@ import numpy as np
 from PIL import Image
 from tqdm import tqdm
 
+logger = logging.getLogger(__name__)
+
 
 def _ensure_repo_paths_importable(current_file: str) -> None:
     """Make local leann packages importable without installing (mirrors multi-vector-leann.py)."""
@@ -203,6 +206,8 @@ def _select_device_and_dtype():
 
 
 def _load_colvision(model_choice: str):
+    import os
+
     import torch
     from colpali_engine.models import (
         ColPali,
@@ -214,6 +219,16 @@ def _load_colvision(model_choice: str):
     from colpali_engine.models.paligemma.colpali.processing_colpali import ColPaliProcessor
     from transformers.utils.import_utils import is_flash_attn_2_available
 
+    # Force HuggingFace Hub to use HF endpoint, avoid Google Drive
+    # Set environment variables to ensure models are downloaded from HuggingFace
+    os.environ.setdefault("HF_ENDPOINT", "https://huggingface.co")
+    os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
+
+    # Log model loading info
+    logger.info(f"Loading ColVision model: {model_choice}")
+    logger.info(f"HF_ENDPOINT: {os.environ.get('HF_ENDPOINT', 'not set')}")
+    logger.info("Models will be downloaded from HuggingFace Hub, not Google Drive")
+
     device_str, device, dtype = _select_device_and_dtype()
 
     # Determine model name and type
@@ -254,29 +269,36 @@ def _load_colvision(model_choice: str):
         "flash_attention_2" if (device_str == "cuda" and is_flash_attn_2_available()) else "eager"
     )
 
+    # Load model from HuggingFace Hub (not Google Drive)
+    # Use local_files_only=False to ensure download from HF if not cached
     if model_type == "colqwen2.5":
         model = ColQwen2_5.from_pretrained(
             model_name,
             torch_dtype=torch.bfloat16,
             device_map=device,
             attn_implementation=attn_implementation,
+            local_files_only=False,  # Ensure download from HuggingFace Hub
         ).eval()
-        processor = ColQwen2_5_Processor.from_pretrained(model_name)
+        processor = ColQwen2_5_Processor.from_pretrained(model_name, local_files_only=False)
     elif model_type == "colqwen2":
         model = ColQwen2.from_pretrained(
             model_name,
             torch_dtype=torch.bfloat16,
             device_map=device,
             attn_implementation=attn_implementation,
+            local_files_only=False,  # Ensure download from HuggingFace Hub
         ).eval()
-        processor = ColQwen2Processor.from_pretrained(model_name)
+        processor = ColQwen2Processor.from_pretrained(model_name, local_files_only=False)
     else:  # colpali
         model = ColPali.from_pretrained(
             model_name,
             torch_dtype=torch.bfloat16,
             device_map=device,
+            local_files_only=False,  # Ensure download from HuggingFace Hub
         ).eval()
-        processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name))
+        processor = cast(
+            ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name, local_files_only=False)
+        )
 
     return model_name, model, processor, device_str, device, dtype
 
diff --git a/packages/leann-mcp/README.md b/packages/leann-mcp/README.md
index 356065d..cde8d29 100644
--- a/packages/leann-mcp/README.md
+++ b/packages/leann-mcp/README.md
@@ -53,6 +53,11 @@ leann build my-project --docs $(git ls-files)
 # Start Claude Code
 claude
 ```
+**Performance tip**: For maximum speed when storage space is not a concern, add the `--no-recompute` flag to your build command. This materializes all tensors and stores them on disk, avoiding recomputation on subsequent builds:
+
+```bash
+leann build my-project --docs $(git ls-files) --no-recompute
+```
 
 ## 🚀 Advanced Usage Examples to build the index