From 54df6310c5b202291cdfa8c9826acb8c144db0a8 Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Sun, 3 Aug 2025 21:16:52 -0700 Subject: [PATCH] fix: diskann build and prevent termination from hanging - Fix OpenMP library linking in DiskANN CMake configuration - Add timeout protection for HuggingFace model loading to prevent hangs - Improve embedding server process termination with better timeouts - Make DiskANN backend default enabled alongside HNSW - Update documentation to reflect both backends included by default --- .../leann-backend-diskann/third_party/DiskANN | 2 +- packages/leann-core/src/leann/chat.py | 43 +++++++++++++++---- .../src/leann/embedding_server_manager.py | 12 +++++- packages/leann/README.md | 9 ++-- packages/leann/pyproject.toml | 7 ++- 5 files changed, 52 insertions(+), 21 deletions(-) diff --git a/packages/leann-backend-diskann/third_party/DiskANN b/packages/leann-backend-diskann/third_party/DiskANN index af2a264..67a2611 160000 --- a/packages/leann-backend-diskann/third_party/DiskANN +++ b/packages/leann-backend-diskann/third_party/DiskANN @@ -1 +1 @@ -Subproject commit af2a26481e65232b57b82d96e68833cdee9f7635 +Subproject commit 67a2611ad14bc11d84dfdb554c5567cfb78a2656 diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py index d97cd74..621dab2 100644 --- a/packages/leann-core/src/leann/chat.py +++ b/packages/leann-core/src/leann/chat.py @@ -542,14 +542,41 @@ class HFChat(LLMInterface): self.device = "cpu" logger.info("No GPU detected. Using CPU.") - # Load tokenizer and model - self.tokenizer = AutoTokenizer.from_pretrained(model_name) - self.model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float16 if self.device != "cpu" else torch.float32, - device_map="auto" if self.device != "cpu" else None, - trust_remote_code=True, - ) + # Load tokenizer and model with timeout protection + try: + import signal + + def timeout_handler(signum, frame): + raise TimeoutError("Model download/loading timed out") + + # Set timeout for model loading (60 seconds) + old_handler = signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(60) + + try: + logger.info(f"Loading tokenizer for {model_name}...") + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + + logger.info(f"Loading model {model_name}...") + self.model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16 if self.device != "cpu" else torch.float32, + device_map="auto" if self.device != "cpu" else None, + trust_remote_code=True, + ) + logger.info(f"Successfully loaded {model_name}") + finally: + signal.alarm(0) # Cancel the alarm + signal.signal(signal.SIGALRM, old_handler) # Restore old handler + + except TimeoutError: + logger.error(f"Model loading timed out for {model_name}") + raise RuntimeError( + f"Model loading timed out for {model_name}. Please check your internet connection or try a smaller model." + ) + except Exception as e: + logger.error(f"Failed to load model {model_name}: {e}") + raise # Move model to device if not using device_map if self.device != "cpu" and "device_map" not in str(self.model): diff --git a/packages/leann-core/src/leann/embedding_server_manager.py b/packages/leann-core/src/leann/embedding_server_manager.py index 5a75ac7..2e1c12b 100644 --- a/packages/leann-core/src/leann/embedding_server_manager.py +++ b/packages/leann-core/src/leann/embedding_server_manager.py @@ -354,13 +354,21 @@ class EmbeddingServerManager: self.server_process.terminate() try: - self.server_process.wait(timeout=5) + self.server_process.wait(timeout=3) logger.info(f"Server process {self.server_process.pid} terminated.") except subprocess.TimeoutExpired: logger.warning( - f"Server process {self.server_process.pid} did not terminate gracefully, killing it." + f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it." ) self.server_process.kill() + try: + self.server_process.wait(timeout=2) + logger.info(f"Server process {self.server_process.pid} killed successfully.") + except subprocess.TimeoutExpired: + logger.error( + f"Failed to kill server process {self.server_process.pid} - it may be hung" + ) + # Don't hang indefinitely # Clean up process resources to prevent resource tracker warnings try: diff --git a/packages/leann/README.md b/packages/leann/README.md index 4281ef1..7a59958 100644 --- a/packages/leann/README.md +++ b/packages/leann/README.md @@ -5,11 +5,8 @@ LEANN is a revolutionary vector database that democratizes personal AI. Transfor ## Installation ```bash -# Default installation (HNSW backend, recommended) +# Default installation (includes both HNSW and DiskANN backends) uv pip install leann - -# With DiskANN backend (for large-scale deployments) -uv pip install leann[diskann] ``` ## Quick Start @@ -19,8 +16,8 @@ from leann import LeannBuilder, LeannSearcher, LeannChat from pathlib import Path INDEX_PATH = str(Path("./").resolve() / "demo.leann") -# Build an index -builder = LeannBuilder(backend_name="hnsw") +# Build an index (choose backend: "hnsw" or "diskann") +builder = LeannBuilder(backend_name="hnsw") # or "diskann" for large-scale deployments builder.add_text("LEANN saves 97% storage compared to traditional vector databases.") builder.add_text("Tung Tung Tung Sahur called—they need their banana‑crocodile hybrid back") builder.build_index(INDEX_PATH) diff --git a/packages/leann/pyproject.toml b/packages/leann/pyproject.toml index 9e94a4b..c72ea7f 100644 --- a/packages/leann/pyproject.toml +++ b/packages/leann/pyproject.toml @@ -24,16 +24,15 @@ classifiers = [ "Programming Language :: Python :: 3.12", ] -# Default installation: core + hnsw +# Default installation: core + hnsw + diskann dependencies = [ "leann-core>=0.1.0", "leann-backend-hnsw>=0.1.0", + "leann-backend-diskann>=0.1.0", ] [project.optional-dependencies] -diskann = [ - "leann-backend-diskann>=0.1.0", -] +# All backends now included by default [project.urls] Repository = "https://github.com/yichuan-w/LEANN"