docs: update README to use proper module imports for apps

- Change from 'python apps/xxx.py' to 'python -m apps.xxx' - More professional and pythonic module calling - Ensures proper module resolution and imports - Better separation between apps/ (production tools) and examples/ (demos)
merge
2025-08-03 23:05:48 -07:00 · 2025-08-03 23:02:45 -07:00 · 2025-08-03 23:02:12 -07:00 · 2025-08-03 23:02:06 -07:00 · 2025-08-03 22:42:16 -07:00 · 2025-08-03 22:41:20 -07:00
10 changed files with 30 additions and 118 deletions
--- a/apps/document_rag.py
+++ b/apps/document_rag.py
@@ -99,9 +99,7 @@ if __name__ == "__main__":
    print("- 'What are the main techniques LEANN uses?'")
    print("- 'What is the technique DLPM?'")
    print("- 'Who does Elizabeth Bennet marry?'")
-    print(
-        "- 'What is the problem of developing pan gu model Huawei meets? (盘古大模型开发中遇到什么问题?)'"
-    )
+    print("- 'What is the problem of developing pan gu model? (盘古大模型开发中遇到什么问题?)'")
    print("\nOr run without --query for interactive mode\n")

    rag = DocumentRAG()
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
@@ -7,7 +7,6 @@ from pathlib import Path
 from typing import Any, Literal

 import numpy as np
-import psutil
 from leann.interface import (
    LeannBackendBuilderInterface,
    LeannBackendFactoryInterface,
@@ -85,43 +84,6 @@ def _write_vectors_to_bin(data: np.ndarray, file_path: Path):
        f.write(data.tobytes())


-def _calculate_smart_memory_config(data: np.ndarray) -> tuple[float, float]:
-    """
-    Calculate smart memory configuration for DiskANN based on data size and system specs.
-
-    Args:
-        data: The embedding data array
-
-    Returns:
-        tuple: (search_memory_maximum, build_memory_maximum) in GB
-    """
-    num_vectors, dim = data.shape
-
-    # Calculate embedding storage size
-    embedding_size_bytes = num_vectors * dim * 4  # float32 = 4 bytes
-    embedding_size_gb = embedding_size_bytes / (1024**3)
-
-    # search_memory_maximum: 1/10 of embedding size for optimal PQ compression
-    # This controls Product Quantization size - smaller means more compression
-    search_memory_gb = max(0.1, embedding_size_gb / 10)  # At least 100MB
-
-    # build_memory_maximum: Based on available system RAM for sharding control
-    # This controls how much memory DiskANN uses during index construction
-    available_memory_gb = psutil.virtual_memory().available / (1024**3)
-    total_memory_gb = psutil.virtual_memory().total / (1024**3)
-
-    # Use 50% of available memory, but at least 2GB and at most 75% of total
-    build_memory_gb = max(2.0, min(available_memory_gb * 0.5, total_memory_gb * 0.75))
-
-    logger.info(
-        f"Smart memory config - Data: {embedding_size_gb:.2f}GB, "
-        f"Search mem: {search_memory_gb:.2f}GB (PQ control), "
-        f"Build mem: {build_memory_gb:.2f}GB (sharding control)"
-    )
-
-    return search_memory_gb, build_memory_gb
-
-
@register_backend("diskann")
 class DiskannBackend(LeannBackendFactoryInterface):
    @staticmethod
@@ -159,16 +121,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
                f"Unsupported distance_metric '{build_kwargs.get('distance_metric', 'unknown')}'."
            )

-        # Calculate smart memory configuration if not explicitly provided
-        if (
-            "search_memory_maximum" not in build_kwargs
-            or "build_memory_maximum" not in build_kwargs
-        ):
-            smart_search_mem, smart_build_mem = _calculate_smart_memory_config(data)
-        else:
-            smart_search_mem = build_kwargs.get("search_memory_maximum", 4.0)
-            smart_build_mem = build_kwargs.get("build_memory_maximum", 8.0)
-
        try:
            from . import _diskannpy as diskannpy  # type: ignore

@@ -179,8 +131,8 @@ class DiskannBuilder(LeannBackendBuilderInterface):
                    index_prefix,
                    build_kwargs.get("complexity", 64),
                    build_kwargs.get("graph_degree", 32),
-                    build_kwargs.get("search_memory_maximum", smart_search_mem),
-                    build_kwargs.get("build_memory_maximum", smart_build_mem),
+                    build_kwargs.get("search_memory_maximum", 4.0),
+                    build_kwargs.get("build_memory_maximum", 8.0),
                    build_kwargs.get("num_threads", 8),
                    build_kwargs.get("pq_disk_bytes", 0),
                    "",
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-diskann"
-version = "0.2.0"
-dependencies = ["leann-core==0.2.0", "numpy", "protobuf>=3.19.0"]
+version = "0.1.16"
+dependencies = ["leann-core==0.1.16", "numpy", "protobuf>=3.19.0"]

 [tool.scikit-build]
 # Key: simplified CMake path
--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"

 [project]
 name = "leann-backend-hnsw"
-version = "0.2.0"
+version = "0.1.16"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.2.0",
+    "leann-core==0.1.16",
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann-core"
-version = "0.2.0"
+version = "0.1.16"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/packages/leann-core/src/leann/api.py
+++ b/packages/leann-core/src/leann/api.py
@@ -636,10 +636,7 @@ class LeannChat:
            "Please provide the best answer you can based on this context and your knowledge."
        )

-        ask_time = time.time()
        ans = self.llm.ask(prompt, **llm_kwargs)
-        ask_time = time.time() - ask_time
-        logger.info(f"  Ask time: {ask_time} seconds")
        return ans

    def start_interactive(self):
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -358,11 +358,7 @@ def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None:
                error_msg += f"\n\nModel '{model_name}' was not found in Ollama's library."

                if suggestions:
-                    error_msg += (
-                        "\n\nDid you mean one of these installed models?\n"
-                        + "\nTry to use ollama pull to install the model you need\n"
-                    )
-
+                    error_msg += "\n\nDid you mean one of these installed models?\n"
                    for i, suggestion in enumerate(suggestions, 1):
                        error_msg += f"  {i}. {suggestion}\n"
                else:
@@ -546,41 +542,14 @@ class HFChat(LLMInterface):
            self.device = "cpu"
            logger.info("No GPU detected. Using CPU.")

-        # Load tokenizer and model with timeout protection
-        try:
-            import signal
-
-            def timeout_handler(signum, frame):
-                raise TimeoutError("Model download/loading timed out")
-
-            # Set timeout for model loading (60 seconds)
-            old_handler = signal.signal(signal.SIGALRM, timeout_handler)
-            signal.alarm(60)
-
-            try:
-                logger.info(f"Loading tokenizer for {model_name}...")
-                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-
-                logger.info(f"Loading model {model_name}...")
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
-                    device_map="auto" if self.device != "cpu" else None,
-                    trust_remote_code=True,
-                )
-                logger.info(f"Successfully loaded {model_name}")
-            finally:
-                signal.alarm(0)  # Cancel the alarm
-                signal.signal(signal.SIGALRM, old_handler)  # Restore old handler
-
-        except TimeoutError:
-            logger.error(f"Model loading timed out for {model_name}")
-            raise RuntimeError(
-                f"Model loading timed out for {model_name}. Please check your internet connection or try a smaller model."
-            )
-        except Exception as e:
-            logger.error(f"Failed to load model {model_name}: {e}")
-            raise
+        # Load tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
+            device_map="auto" if self.device != "cpu" else None,
+            trust_remote_code=True,
+        )

        # Move model to device if not using device_map
        if self.device != "cpu" and "device_map" not in str(self.model):
--- a/packages/leann-core/src/leann/embedding_server_manager.py
+++ b/packages/leann-core/src/leann/embedding_server_manager.py
@@ -354,21 +354,13 @@ class EmbeddingServerManager:
        self.server_process.terminate()

        try:
-            self.server_process.wait(timeout=3)
+            self.server_process.wait(timeout=5)
            logger.info(f"Server process {self.server_process.pid} terminated.")
        except subprocess.TimeoutExpired:
            logger.warning(
-                f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
+                f"Server process {self.server_process.pid} did not terminate gracefully, killing it."
            )
            self.server_process.kill()
-            try:
-                self.server_process.wait(timeout=2)
-                logger.info(f"Server process {self.server_process.pid} killed successfully.")
-            except subprocess.TimeoutExpired:
-                logger.error(
-                    f"Failed to kill server process {self.server_process.pid} - it may be hung"
-                )
-                # Don't hang indefinitely

        # Clean up process resources to prevent resource tracker warnings
        try:
--- a/packages/leann/README.md
+++ b/packages/leann/README.md
@@ -5,8 +5,11 @@ LEANN is a revolutionary vector database that democratizes personal AI. Transfor
 ## Installation

 ```bash
-# Default installation (includes both HNSW and DiskANN backends)
+# Default installation (HNSW backend, recommended)
 uv pip install leann
+
+# With DiskANN backend (for large-scale deployments)
+uv pip install leann[diskann]
 ```

 ## Quick Start
@@ -16,8 +19,8 @@ from leann import LeannBuilder, LeannSearcher, LeannChat
 from pathlib import Path
 INDEX_PATH = str(Path("./").resolve() / "demo.leann")

-# Build an index (choose backend: "hnsw" or "diskann")
-builder = LeannBuilder(backend_name="hnsw")  # or "diskann" for large-scale deployments
+# Build an index
+builder = LeannBuilder(backend_name="hnsw")
 builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
 builder.add_text("Tung Tung Tung Sahur called—they need their banana‑crocodile hybrid back")
 builder.build_index(INDEX_PATH)
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "leann"
-version = "0.2.0"
+version = "0.1.16"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -24,15 +24,16 @@ classifiers = [
    "Programming Language :: Python :: 3.12",
 ]

-# Default installation: core + hnsw + diskann
+# Default installation: core + hnsw
 dependencies = [
    "leann-core>=0.1.0",
    "leann-backend-hnsw>=0.1.0",
-    "leann-backend-diskann>=0.1.0",
 ]

 [project.optional-dependencies]
-# All backends now included by default
+diskann = [
+    "leann-backend-diskann>=0.1.0",
+]

 [project.urls]
 Repository = "https://github.com/yichuan-w/LEANN"
Author	SHA1	Message	Date
Andy Lee	0877960547	docs: update README to use proper module imports for apps - Change from 'python apps/xxx.py' to 'python -m apps.xxx' - More professional and pythonic module calling - Ensures proper module resolution and imports - Better separation between apps/ (production tools) and examples/ (demos)	2025-08-03 23:05:48 -07:00
yichuan520030910320	d68af63d05	merge	2025-08-03 23:02:45 -07:00
yichuan520030910320	b844aca968	Merge branch 'refactor-app' of https://github.com/yichuan-w/LEANN into refactor-app	2025-08-03 23:02:12 -07:00
yichuan520030910320	85277ba67a	fix wechat	2025-08-03 23:02:06 -07:00
Andy Lee	e9562acdc2	fix: handle certificate errors in link checker	2025-08-03 22:42:16 -07:00
Andy Lee	7fd3db1ddb	fix: add init.py	2025-08-03 22:41:20 -07:00
Andy Lee	c1ccc51a75	refactor: reorganize examples and add link checker	2025-08-03 22:40:15 -07:00
Andy Lee	b0239b6e4d	refactor: reorgnize all examples/ and test/	2025-08-03 22:37:45 -07:00
yichuan520030910320	58556ef44c	merge	2025-08-03 22:29:30 -07:00
yichuan520030910320	87c930d705	fix email wrong -1 to process all file	2025-08-03 22:27:04 -07:00
Andy Lee	86f919a6da	fix: WeChat history reader bugs and refactor wechat_rag to use unified architecture	2025-08-03 21:54:40 -07:00
Andy Lee	f8d34663b4	feat: check if k is larger than #docs	2025-08-03 21:41:53 -07:00
yichuan520030910320	568cf597f4	fix some example	2025-08-03 21:19:05 -07:00
yichuan520030910320	baf70dc411	change rebuild logic	2025-08-03 20:54:52 -07:00
yichuan520030910320	7ad2ec39d6	add response highlight	2025-08-03 20:32:07 -07:00
Andy Lee	31fd3c816a	fix: update default embedding models for better performance - Change WeChat, Browser, and Email RAG examples to use all-MiniLM-L6-v2 - Previous Qwen/Qwen3-Embedding-0.6B was too slow for these use cases - all-MiniLM-L6-v2 is a fast 384-dim model, ideal for large-scale personal data	2025-08-02 19:04:59 -07:00
Andy Lee	1f6c7f2f5a	docs: Emphasize diverse data sources in examples/data description	2025-07-30 22:42:34 -07:00
Andy Lee	c1124eb349	feat: Update documentation based on review feedback - Add MLX embedding example to README - Clarify examples/data content description (two papers, Pride and Prejudice, Chinese README) - Move chunk parameters to common parameters section - Remove duplicate chunk parameters from document-specific section	2025-07-30 18:05:39 -07:00
Andy Lee	274bbb19ea	feat: Add chunk-size parameters and improve file type filtering - Add --chunk-size and --chunk-overlap parameters to all RAG examples - Preserve original default values for each data source: - Document: 256/128 (optimized for general documents) - Email: 256/25 (smaller overlap for email threads) - Browser: 256/128 (standard for web content) - WeChat: 192/64 (smaller chunks for chat messages) - Make --file-types optional filter instead of restriction in document_rag - Update README to clarify interactive mode and parameter usage - Fix LLM default model documentation (gpt-4o, not gpt-4o-mini)	2025-07-29 18:31:56 -07:00
Andy Lee	8c152c7a31	feat: Address review comments - Add complexity parameter to LeannChat initialization (default: search_complexity) - Fix chunk-size default in README documentation (256, not 2048) - Add more index building parameters as CLI arguments: - --backend-name (hnsw/diskann) - --graph-degree (default: 32) - --build-complexity (default: 64) - --no-compact (disable compact storage) - --no-recompute (disable embedding recomputation) - Update README to document all new parameters	2025-07-29 16:59:24 -07:00
Andy Lee	ce77eef13a	fix: Fix async/await and add_text issues in unified examples - Remove incorrect await from chat.ask() calls (not async) - Fix add_texts -> add_text method calls - Verify search-complexity correctly maps to efSearch parameter - All examples now run successfully	2025-07-29 16:00:58 -07:00
Andy Lee	9d77175ac8	fix: Fix issues in unified examples - Add smart path detection for data directory - Fix add_texts -> add_text method call - Handle both running from project root and examples directory	2025-07-29 15:55:46 -07:00
Andy Lee	7fbb6c98ef	docs: nit	2025-07-29 14:30:04 -07:00
Andy Lee	914a248c28	docs: Add introduction for Common Parameters section - Add 'Flexible Configuration' heading with descriptive sentence - Create parallel structure with 'Generation Model Setup' section - Improve document flow and readability	2025-07-29 14:16:33 -07:00
Andy Lee	55fc5862f9	docs: Fix collapsible sections - Make Common Parameters collapsible (as it's lengthy reference material) - Keep CLI Installation visible (important for users to see immediately) - Better information hierarchy	2025-07-29 14:14:26 -07:00
Andy Lee	fd97b8dfa8	style: format	2025-07-29 14:11:49 -07:00
Andy Lee	57959947a1	docs: Add collapsible section for CLI installation - Wrap CLI installation instructions in details/summary tags - Keep consistent with other collapsible sections in README - Improve document readability and navigation	2025-07-29 14:10:30 -07:00
Andy Lee	cc0c091ca5	docs: Clarify CLI global installation process - Explain the transition from venv to global installation - Add upgrade command for global installation - Make it clear that global install allows usage without venv activation	2025-07-29 14:06:16 -07:00
Andy Lee	ff389c7d8d	docs: Add CLI installation instructions - Add two installation options: venv and global uv tool - Clearly explain when to use each option - Make CLI more accessible for daily use	2025-07-29 14:05:33 -07:00
Andy Lee	6780a8eaba	docs: polish applications	2025-07-29 14:04:34 -07:00
Andy Lee	984056f126	docs: Reorganize parameter documentation structure - Move common parameters to a dedicated section before all examples - Rename sections to 'X-Specific Arguments' for clarity - Remove duplicate common parameters from individual examples - Better information architecture for users	2025-07-29 14:01:19 -07:00
Andy Lee	bd4451bf50	docs: Make example commands more representative - Add default values to parameter descriptions - Replace generic examples with real-world use cases - Focus on data-source-specific features in examples - Remove redundant demonstrations of common parameters	2025-07-29 13:59:29 -07:00
Andy Lee	34e313f64a	docs: Improve parameter categorization in README - Clearly separate core (shared) vs specific parameters - Move LLM and embedding examples to 'Example Commands' section - Add descriptive comments for all specific parameters - Keep only truly data-source-specific parameters in specific sections	2025-07-29 13:54:47 -07:00
Andy Lee	ddc789b231	fix: Restore embedding-mode parameter to all examples - All examples now have --embedding-mode parameter (unified interface benefit) - Default is 'sentence-transformers' (consistent with original behavior) - Users can now use OpenAI or MLX embeddings with any data source - Maintains functional equivalence with original scripts	2025-07-29 13:33:40 -07:00
Andy Lee	ff1b622bdd	refactor: Remove old example scripts and migration references - Delete old example scripts (mail_reader_leann.py, google_history_reader_leann.py, etc.) - Remove migration hints and backward compatibility - Update tests to use new unified examples directly - Clean up all references to old script names - Users now only see the new unified interface	2025-07-29 12:39:36 -07:00
Andy Lee	3cde4fc7b3	fix: Fix pre-commit issues and update tests - Fix import sorting and unused imports - Update type annotations to use built-in types (list, dict) instead of typing.List/Dict - Fix trailing whitespace and end-of-file issues - Fix Chinese fullwidth comma to regular comma - Update test_main_cli.py to test_document_rag.py - Add backward compatibility test for main_cli_example.py - Pass all pre-commit hooks (ruff, ruff-format, etc.)	2025-07-29 10:19:05 -07:00
Andy Lee	4e3bcda5fa	fix: Update CI tests for new unified examples interface - Rename test_main_cli.py to test_document_rag.py - Update all references from main_cli_example.py to document_rag.py - Update tests/README.md documentation The tests now properly test the new unified interface while maintaining the same test coverage and functionality.	2025-07-28 23:16:51 -07:00
Andy Lee	46f6f76fc3	refactor: Unify examples interface with BaseRAGExample - Create BaseRAGExample base class for all RAG examples - Refactor 4 examples to use unified interface: - document_rag.py (replaces main_cli_example.py) - email_rag.py (replaces mail_reader_leann.py) - browser_rag.py (replaces google_history_reader_leann.py) - wechat_rag.py (replaces wechat_history_reader_leann.py) - Maintain 100% parameter compatibility with original files - Add interactive mode support for all examples - Unify parameter names (--max-items replaces --max-emails/--max-entries) - Update README.md with new examples usage - Add PARAMETER_CONSISTENCY.md documenting all parameter mappings - Keep main_cli_example.py for backward compatibility with migration notice All default values, LeannBuilder parameters, and chunking settings remain identical to ensure full compatibility with existing indexes.	2025-07-28 23:11:16 -07:00