feat: implement smart memory configuration for DiskANN (#16 )

- Add intelligent memory calculation based on data size and system specs - search_memory_maximum: 1/10 of embedding size (controls PQ compression) - build_memory_maximum: 50% of available RAM (controls sharding) - Provides optimal balance between performance and memory usage - Automatic fallback to default values if parameters are explicitly provided
chore: release v0.2.0
2025-08-04 14:36:29 -07:00 · 2025-08-04 21:34:31 +00:00 · 2025-08-04 14:15:52 -07:00 · 2025-08-03 23:06:24 -07:00 · 2025-08-03 21:16:52 -07:00
10 changed files with 118 additions and 30 deletions
--- a/apps/document_rag.py
+++ b/apps/document_rag.py
@@ -99,7 +99,9 @@ if __name__ == "__main__":
    print("- 'What are the main techniques LEANN uses?'")
    print("- 'What is the technique DLPM?'")
    print("- 'Who does Elizabeth Bennet marry?'")
-    print("- 'What is the problem of developing pan gu model? (盘古大模型开发中遇到什么问题?)'")
+    print(
        "- 'What is the problem of developing pan gu model Huawei meets? (盘古大模型开发中遇到什么问题?)'"
    )
    print("\nOr run without --query for interactive mode\n")
    rag = DocumentRAG()
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
@@ -7,6 +7,7 @@ from pathlib import Path
 from typing import Any, Literal
 import numpy as np
 import psutil
 from leann.interface import (
    LeannBackendBuilderInterface,
    LeannBackendFactoryInterface,
@@ -84,6 +85,43 @@ def _write_vectors_to_bin(data: np.ndarray, file_path: Path):
        f.write(data.tobytes())
 def _calculate_smart_memory_config(data: np.ndarray) -> tuple[float, float]:
    """
    Calculate smart memory configuration for DiskANN based on data size and system specs.
    Args:
        data: The embedding data array
    Returns:
        tuple: (search_memory_maximum, build_memory_maximum) in GB
    """
    num_vectors, dim = data.shape
    # Calculate embedding storage size
    embedding_size_bytes = num_vectors * dim * 4  # float32 = 4 bytes
    embedding_size_gb = embedding_size_bytes / (1024**3)
    # search_memory_maximum: 1/10 of embedding size for optimal PQ compression
    # This controls Product Quantization size - smaller means more compression
    search_memory_gb = max(0.1, embedding_size_gb / 10)  # At least 100MB
    # build_memory_maximum: Based on available system RAM for sharding control
    # This controls how much memory DiskANN uses during index construction
    available_memory_gb = psutil.virtual_memory().available / (1024**3)
    total_memory_gb = psutil.virtual_memory().total / (1024**3)
    # Use 50% of available memory, but at least 2GB and at most 75% of total
    build_memory_gb = max(2.0, min(available_memory_gb * 0.5, total_memory_gb * 0.75))
    logger.info(
        f"Smart memory config - Data: {embedding_size_gb:.2f}GB, "
        f"Search mem: {search_memory_gb:.2f}GB (PQ control), "
        f"Build mem: {build_memory_gb:.2f}GB (sharding control)"
    )
    return search_memory_gb, build_memory_gb
@register_backend("diskann")
 class DiskannBackend(LeannBackendFactoryInterface):
    @staticmethod
@@ -121,6 +159,16 @@ class DiskannBuilder(LeannBackendBuilderInterface):
                f"Unsupported distance_metric '{build_kwargs.get('distance_metric', 'unknown')}'."
            )
        # Calculate smart memory configuration if not explicitly provided
        if (
            "search_memory_maximum" not in build_kwargs
            or "build_memory_maximum" not in build_kwargs
        ):
            smart_search_mem, smart_build_mem = _calculate_smart_memory_config(data)
        else:
            smart_search_mem = build_kwargs.get("search_memory_maximum", 4.0)
            smart_build_mem = build_kwargs.get("build_memory_maximum", 8.0)
        try:
            from . import _diskannpy as diskannpy  # type: ignore
@@ -131,8 +179,8 @@ class DiskannBuilder(LeannBackendBuilderInterface):
                    index_prefix,
                    build_kwargs.get("complexity", 64),
                    build_kwargs.get("graph_degree", 32),
-                    build_kwargs.get("search_memory_maximum", 4.0),
+                    build_kwargs.get("search_memory_maximum", smart_search_mem),
-                    build_kwargs.get("build_memory_maximum", 8.0),
+                    build_kwargs.get("build_memory_maximum", smart_build_mem),
                    build_kwargs.get("num_threads", 8),
                    build_kwargs.get("pq_disk_bytes", 0),
                    "",
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
 [project]
 name = "leann-backend-diskann"
-version = "0.1.16"
+version = "0.2.0"
-dependencies = ["leann-core==0.1.16", "numpy", "protobuf>=3.19.0"]
+dependencies = ["leann-core==0.2.0", "numpy", "protobuf>=3.19.0"]
 [tool.scikit-build]
 # Key: simplified CMake path
--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
 [project]
 name = "leann-backend-hnsw"
-version = "0.1.16"
+version = "0.2.0"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.1.16",
+    "leann-core==0.2.0",
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "leann-core"
-version = "0.1.16"
+version = "0.2.0"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/packages/leann-core/src/leann/api.py
+++ b/packages/leann-core/src/leann/api.py
@@ -636,7 +636,10 @@ class LeannChat:
            "Please provide the best answer you can based on this context and your knowledge."
        )
        ask_time = time.time()
        ans = self.llm.ask(prompt, **llm_kwargs)
        ask_time = time.time() - ask_time
        logger.info(f"  Ask time: {ask_time} seconds")
        return ans
    def start_interactive(self):
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -358,7 +358,11 @@ def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None:
                error_msg += f"\n\nModel '{model_name}' was not found in Ollama's library."
                if suggestions:
-                    error_msg += "\n\nDid you mean one of these installed models?\n"
+                    error_msg += (
                        "\n\nDid you mean one of these installed models?\n"
                        + "\nTry to use ollama pull to install the model you need\n"
                    )
                    for i, suggestion in enumerate(suggestions, 1):
                        error_msg += f"  {i}. {suggestion}\n"
                else:
@@ -542,14 +546,41 @@ class HFChat(LLMInterface):
            self.device = "cpu"
            logger.info("No GPU detected. Using CPU.")
-        # Load tokenizer and model
+        # Load tokenizer and model with timeout protection
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        try:
-        self.model = AutoModelForCausalLM.from_pretrained(
+            import signal
-            model_name,
+
-            torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
+            def timeout_handler(signum, frame):
-            device_map="auto" if self.device != "cpu" else None,
+                raise TimeoutError("Model download/loading timed out")
-            trust_remote_code=True,
+
-        )
+            # Set timeout for model loading (60 seconds)
            old_handler = signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(60)
            try:
                logger.info(f"Loading tokenizer for {model_name}...")
                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
                logger.info(f"Loading model {model_name}...")
                self.model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
                    device_map="auto" if self.device != "cpu" else None,
                    trust_remote_code=True,
                )
                logger.info(f"Successfully loaded {model_name}")
            finally:
                signal.alarm(0)  # Cancel the alarm
                signal.signal(signal.SIGALRM, old_handler)  # Restore old handler
        except TimeoutError:
            logger.error(f"Model loading timed out for {model_name}")
            raise RuntimeError(
                f"Model loading timed out for {model_name}. Please check your internet connection or try a smaller model."
            )
        except Exception as e:
            logger.error(f"Failed to load model {model_name}: {e}")
            raise
        # Move model to device if not using device_map
        if self.device != "cpu" and "device_map" not in str(self.model):
--- a/packages/leann-core/src/leann/embedding_server_manager.py
+++ b/packages/leann-core/src/leann/embedding_server_manager.py
@@ -354,13 +354,21 @@ class EmbeddingServerManager:
        self.server_process.terminate()
        try:
-            self.server_process.wait(timeout=5)
+            self.server_process.wait(timeout=3)
            logger.info(f"Server process {self.server_process.pid} terminated.")
        except subprocess.TimeoutExpired:
            logger.warning(
-                f"Server process {self.server_process.pid} did not terminate gracefully, killing it."
+                f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
            )
            self.server_process.kill()
            try:
                self.server_process.wait(timeout=2)
                logger.info(f"Server process {self.server_process.pid} killed successfully.")
            except subprocess.TimeoutExpired:
                logger.error(
                    f"Failed to kill server process {self.server_process.pid} - it may be hung"
                )
                # Don't hang indefinitely
        # Clean up process resources to prevent resource tracker warnings
        try:
--- a/packages/leann/README.md
+++ b/packages/leann/README.md
@@ -5,11 +5,8 @@ LEANN is a revolutionary vector database that democratizes personal AI. Transfor
 ## Installation
 ```bash
-# Default installation (HNSW backend, recommended)
+# Default installation (includes both HNSW and DiskANN backends)
 uv pip install leann
 # With DiskANN backend (for large-scale deployments)
 uv pip install leann[diskann]
 ```
 ## Quick Start
@@ -19,8 +16,8 @@ from leann import LeannBuilder, LeannSearcher, LeannChat
 from pathlib import Path
 INDEX_PATH = str(Path("./").resolve() / "demo.leann")
-# Build an index
+# Build an index (choose backend: "hnsw" or "diskann")
-builder = LeannBuilder(backend_name="hnsw")
+builder = LeannBuilder(backend_name="hnsw")  # or "diskann" for large-scale deployments
 builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
 builder.add_text("Tung Tung Tung Sahur called—they need their banana‑crocodile hybrid back")
 builder.build_index(INDEX_PATH)
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "leann"
-version = "0.1.16"
+version = "0.2.0"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -24,16 +24,15 @@ classifiers = [
    "Programming Language :: Python :: 3.12",
 ]
-# Default installation: core + hnsw
+# Default installation: core + hnsw + diskann
 dependencies = [
    "leann-core>=0.1.0",
    "leann-backend-hnsw>=0.1.0",
    "leann-backend-diskann>=0.1.0",
 ]
 [project.optional-dependencies]
-diskann = [
+# All backends now included by default
    "leann-backend-diskann>=0.1.0",
 ]
 [project.urls]
 Repository = "https://github.com/yichuan-w/LEANN"
Author	SHA1	Message	Date
Andy Lee	dd71ac8d71	feat: implement smart memory configuration for DiskANN (#16 ) - Add intelligent memory calculation based on data size and system specs - search_memory_maximum: 1/10 of embedding size (controls PQ compression) - build_memory_maximum: 50% of available RAM (controls sharding) - Provides optimal balance between performance and memory usage - Automatic fallback to default values if parameters are explicitly provided	2025-08-04 14:36:29 -07:00
GitHub Actions	8bee1d4100	chore: release v0.2.0	2025-08-04 21:34:31 +00:00
yichuan520030910320	33521d6d00	add logs	2025-08-04 14:15:52 -07:00
Andy Lee	8899734952	refactor: Unify examples interface with BaseRAGExample (#12 ) * refactor: Unify examples interface with BaseRAGExample - Create BaseRAGExample base class for all RAG examples - Refactor 4 examples to use unified interface: - document_rag.py (replaces main_cli_example.py) - email_rag.py (replaces mail_reader_leann.py) - browser_rag.py (replaces google_history_reader_leann.py) - wechat_rag.py (replaces wechat_history_reader_leann.py) - Maintain 100% parameter compatibility with original files - Add interactive mode support for all examples - Unify parameter names (--max-items replaces --max-emails/--max-entries) - Update README.md with new examples usage - Add PARAMETER_CONSISTENCY.md documenting all parameter mappings - Keep main_cli_example.py for backward compatibility with migration notice All default values, LeannBuilder parameters, and chunking settings remain identical to ensure full compatibility with existing indexes. * fix: Update CI tests for new unified examples interface - Rename test_main_cli.py to test_document_rag.py - Update all references from main_cli_example.py to document_rag.py - Update tests/README.md documentation The tests now properly test the new unified interface while maintaining the same test coverage and functionality. * fix: Fix pre-commit issues and update tests - Fix import sorting and unused imports - Update type annotations to use built-in types (list, dict) instead of typing.List/Dict - Fix trailing whitespace and end-of-file issues - Fix Chinese fullwidth comma to regular comma - Update test_main_cli.py to test_document_rag.py - Add backward compatibility test for main_cli_example.py - Pass all pre-commit hooks (ruff, ruff-format, etc.) * refactor: Remove old example scripts and migration references - Delete old example scripts (mail_reader_leann.py, google_history_reader_leann.py, etc.) - Remove migration hints and backward compatibility - Update tests to use new unified examples directly - Clean up all references to old script names - Users now only see the new unified interface * fix: Restore embedding-mode parameter to all examples - All examples now have --embedding-mode parameter (unified interface benefit) - Default is 'sentence-transformers' (consistent with original behavior) - Users can now use OpenAI or MLX embeddings with any data source - Maintains functional equivalence with original scripts * docs: Improve parameter categorization in README - Clearly separate core (shared) vs specific parameters - Move LLM and embedding examples to 'Example Commands' section - Add descriptive comments for all specific parameters - Keep only truly data-source-specific parameters in specific sections * docs: Make example commands more representative - Add default values to parameter descriptions - Replace generic examples with real-world use cases - Focus on data-source-specific features in examples - Remove redundant demonstrations of common parameters * docs: Reorganize parameter documentation structure - Move common parameters to a dedicated section before all examples - Rename sections to 'X-Specific Arguments' for clarity - Remove duplicate common parameters from individual examples - Better information architecture for users * docs: polish applications * docs: Add CLI installation instructions - Add two installation options: venv and global uv tool - Clearly explain when to use each option - Make CLI more accessible for daily use * docs: Clarify CLI global installation process - Explain the transition from venv to global installation - Add upgrade command for global installation - Make it clear that global install allows usage without venv activation * docs: Add collapsible section for CLI installation - Wrap CLI installation instructions in details/summary tags - Keep consistent with other collapsible sections in README - Improve document readability and navigation * style: format * docs: Fix collapsible sections - Make Common Parameters collapsible (as it's lengthy reference material) - Keep CLI Installation visible (important for users to see immediately) - Better information hierarchy * docs: Add introduction for Common Parameters section - Add 'Flexible Configuration' heading with descriptive sentence - Create parallel structure with 'Generation Model Setup' section - Improve document flow and readability * docs: nit * fix: Fix issues in unified examples - Add smart path detection for data directory - Fix add_texts -> add_text method call - Handle both running from project root and examples directory * fix: Fix async/await and add_text issues in unified examples - Remove incorrect await from chat.ask() calls (not async) - Fix add_texts -> add_text method calls - Verify search-complexity correctly maps to efSearch parameter - All examples now run successfully * feat: Address review comments - Add complexity parameter to LeannChat initialization (default: search_complexity) - Fix chunk-size default in README documentation (256, not 2048) - Add more index building parameters as CLI arguments: - --backend-name (hnsw/diskann) - --graph-degree (default: 32) - --build-complexity (default: 64) - --no-compact (disable compact storage) - --no-recompute (disable embedding recomputation) - Update README to document all new parameters * feat: Add chunk-size parameters and improve file type filtering - Add --chunk-size and --chunk-overlap parameters to all RAG examples - Preserve original default values for each data source: - Document: 256/128 (optimized for general documents) - Email: 256/25 (smaller overlap for email threads) - Browser: 256/128 (standard for web content) - WeChat: 192/64 (smaller chunks for chat messages) - Make --file-types optional filter instead of restriction in document_rag - Update README to clarify interactive mode and parameter usage - Fix LLM default model documentation (gpt-4o, not gpt-4o-mini) * feat: Update documentation based on review feedback - Add MLX embedding example to README - Clarify examples/data content description (two papers, Pride and Prejudice, Chinese README) - Move chunk parameters to common parameters section - Remove duplicate chunk parameters from document-specific section * docs: Emphasize diverse data sources in examples/data description * fix: update default embedding models for better performance - Change WeChat, Browser, and Email RAG examples to use all-MiniLM-L6-v2 - Previous Qwen/Qwen3-Embedding-0.6B was too slow for these use cases - all-MiniLM-L6-v2 is a fast 384-dim model, ideal for large-scale personal data * add response highlight * change rebuild logic * fix some example * feat: check if k is larger than #docs * fix: WeChat history reader bugs and refactor wechat_rag to use unified architecture * fix email wrong -1 to process all file * refactor: reorgnize all examples/ and test/ * refactor: reorganize examples and add link checker * fix: add init.py * fix: handle certificate errors in link checker * fix wechat * merge * docs: update README to use proper module imports for apps - Change from 'python apps/xxx.py' to 'python -m apps.xxx' - More professional and pythonic module calling - Ensures proper module resolution and imports - Better separation between apps/ (production tools) and examples/ (demos) --------- Co-authored-by: yichuan520030910320 <yichuan_wang@berkeley.edu>	2025-08-03 23:06:24 -07:00
Andy Lee	54df6310c5	fix: diskann build and prevent termination from hanging - Fix OpenMP library linking in DiskANN CMake configuration - Add timeout protection for HuggingFace model loading to prevent hangs - Improve embedding server process termination with better timeouts - Make DiskANN backend default enabled alongside HNSW - Update documentation to reflect both backends included by default	2025-08-03 21:16:52 -07:00