From 7ddb4772c016bf8c0fe2fdfc09576dbdcaff3cfe Mon Sep 17 00:00:00 2001
From: Yichuan Wang <73766326+yichuan-w@users.noreply.github.com>
Date: Fri, 19 Dec 2025 17:29:14 -0800
Subject: [PATCH] Feature/custom folder multi vector/ add Readme to LEANN MCP
 (#189)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add custom folder support and improve image loading for multi-vector retrieval

- Enhanced _load_images_from_dir with recursive search support and better error handling
- Added support for WebP format and RGB conversion for all image modes
- Added custom folder CLI arguments (--custom-folder, --recursive, --rebuild-index)
- Improved documentation and removed completed TODO comment

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

* Format code style in leann_multi_vector.py for better readability

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

* docs: polish README performance tip section

- Fix typo: 'matrilize' -> 'materialize'
- Improve clarity and formatting of --no-recompute flag explanation
- Add code block for better readability

* format

---------

Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .../leann_multi_vector.py                     | 28 +++++++++++++++++--
 packages/leann-mcp/README.md                  |  5 ++++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/apps/multimodal/vision-based-pdf-multi-vector/leann_multi_vector.py b/apps/multimodal/vision-based-pdf-multi-vector/leann_multi_vector.py
index 7168ac2..9138261 100644
--- a/apps/multimodal/vision-based-pdf-multi-vector/leann_multi_vector.py
+++ b/apps/multimodal/vision-based-pdf-multi-vector/leann_multi_vector.py
@@ -1,6 +1,7 @@
 import concurrent.futures
 import glob
 import json
+import logging
 import os
 import re
 import sys
@@ -12,6 +13,8 @@ import numpy as np
 from PIL import Image
 from tqdm import tqdm
 
+logger = logging.getLogger(__name__)
+
 
 def _ensure_repo_paths_importable(current_file: str) -> None:
     """Make local leann packages importable without installing (mirrors multi-vector-leann.py)."""
@@ -203,6 +206,8 @@ def _select_device_and_dtype():
 
 
 def _load_colvision(model_choice: str):
+    import os
+
     import torch
     from colpali_engine.models import (
         ColPali,
@@ -214,6 +219,16 @@ def _load_colvision(model_choice: str):
     from colpali_engine.models.paligemma.colpali.processing_colpali import ColPaliProcessor
     from transformers.utils.import_utils import is_flash_attn_2_available
 
+    # Force HuggingFace Hub to use HF endpoint, avoid Google Drive
+    # Set environment variables to ensure models are downloaded from HuggingFace
+    os.environ.setdefault("HF_ENDPOINT", "https://huggingface.co")
+    os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
+
+    # Log model loading info
+    logger.info(f"Loading ColVision model: {model_choice}")
+    logger.info(f"HF_ENDPOINT: {os.environ.get('HF_ENDPOINT', 'not set')}")
+    logger.info("Models will be downloaded from HuggingFace Hub, not Google Drive")
+
     device_str, device, dtype = _select_device_and_dtype()
 
     # Determine model name and type
@@ -254,29 +269,36 @@ def _load_colvision(model_choice: str):
         "flash_attention_2" if (device_str == "cuda" and is_flash_attn_2_available()) else "eager"
     )
 
+    # Load model from HuggingFace Hub (not Google Drive)
+    # Use local_files_only=False to ensure download from HF if not cached
     if model_type == "colqwen2.5":
         model = ColQwen2_5.from_pretrained(
             model_name,
             torch_dtype=torch.bfloat16,
             device_map=device,
             attn_implementation=attn_implementation,
+            local_files_only=False,  # Ensure download from HuggingFace Hub
         ).eval()
-        processor = ColQwen2_5_Processor.from_pretrained(model_name)
+        processor = ColQwen2_5_Processor.from_pretrained(model_name, local_files_only=False)
     elif model_type == "colqwen2":
         model = ColQwen2.from_pretrained(
             model_name,
             torch_dtype=torch.bfloat16,
             device_map=device,
             attn_implementation=attn_implementation,
+            local_files_only=False,  # Ensure download from HuggingFace Hub
         ).eval()
-        processor = ColQwen2Processor.from_pretrained(model_name)
+        processor = ColQwen2Processor.from_pretrained(model_name, local_files_only=False)
     else:  # colpali
         model = ColPali.from_pretrained(
             model_name,
             torch_dtype=torch.bfloat16,
             device_map=device,
+            local_files_only=False,  # Ensure download from HuggingFace Hub
         ).eval()
-        processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name))
+        processor = cast(
+            ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name, local_files_only=False)
+        )
 
     return model_name, model, processor, device_str, device, dtype
 
diff --git a/packages/leann-mcp/README.md b/packages/leann-mcp/README.md
index 356065d..cde8d29 100644
--- a/packages/leann-mcp/README.md
+++ b/packages/leann-mcp/README.md
@@ -53,6 +53,11 @@ leann build my-project --docs $(git ls-files)
 # Start Claude Code
 claude
 ```
+**Performance tip**: For maximum speed when storage space is not a concern, add the `--no-recompute` flag to your build command. This materializes all tensors and stores them on disk, avoiding recomputation on subsequent builds:
+
+```bash
+leann build my-project --docs $(git ls-files) --no-recompute
+```
 
 ## 🚀 Advanced Usage Examples to build the index