Compare commits

...

4 Commits

Author SHA1 Message Date
Andy Lee
fcbcde1ea8 feat: implement smart memory configuration for DiskANN
- Add intelligent memory calculation based on data size and system specs
- search_memory_maximum: 1/10 of embedding size (controls PQ compression)
- build_memory_maximum: 50% of available RAM (controls sharding)
- Provides optimal balance between performance and memory usage
- Automatic fallback to default values if parameters are explicitly provided
2025-08-03 22:54:08 -07:00
Andy Lee
54df6310c5 fix: diskann build and prevent termination from hanging
- Fix OpenMP library linking in DiskANN CMake configuration
- Add timeout protection for HuggingFace model loading to prevent hangs
- Improve embedding server process termination with better timeouts
- Make DiskANN backend default enabled alongside HNSW
- Update documentation to reflect both backends included by default
2025-08-03 21:16:52 -07:00
yichuan520030910320
19bcc07814 change readme discription 2025-07-28 20:52:45 -07:00
yichuan520030910320
8356e3c668 changr to openai main cli 2025-07-28 17:39:14 -07:00
8 changed files with 125 additions and 33 deletions

View File

@@ -174,15 +174,28 @@ Ask questions directly about your personal PDFs, documents, and any directory co
<img src="videos/paper_clear.gif" alt="LEANN Document Search Demo" width="600">
</p>
The example below asks a question about summarizing two papers (uses default data in `examples/data`):
The example below asks a question about summarizing two papers (uses default data in `examples/data`) and this is the easiest example to run here:
```
# Or use python directly
```bash
source .venv/bin/activate
python ./examples/main_cli_example.py
```
<details>
<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>
```bash
# Use custom index directory
python examples/main_cli_example.py --index-dir "./my_custom_index"
# Use custom data directory
python examples/main_cli_example.py --data-dir "./my_documents"
# Ask a specific question
python examples/main_cli_example.py --query "What are the main findings in these papers?"
```
</details>
### 📧 Your Personal Email Secretary: RAG on Apple Mail!
@@ -195,12 +208,12 @@ python ./examples/main_cli_example.py
**Note:** You need to grant full disk access to your terminal/VS Code in System Preferences → Privacy & Security → Full Disk Access.
```bash
python examples/mail_reader_leann.py --query "What's the food I ordered by doordash or Uber eat mostly?"
python examples/mail_reader_leann.py --query "What's the food I ordered by DoorDash or Uber Eats mostly?"
```
**780K email chunks → 78MB storage** Finally, search your email like you search Google.
**780K email chunks → 78MB storage.** Finally, search your email like you search Google.
<details>
<summary><strong>📋 Click to expand: Command Examples</strong></summary>
<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>
```bash
# Use default mail path (works for most macOS setups)
@@ -242,7 +255,7 @@ python examples/google_history_reader_leann.py --query "Tell me my browser histo
**38K browser entries → 6MB storage.** Your browser history becomes your personal search engine.
<details>
<summary><strong>📋 Click to expand: Command Examples</strong></summary>
<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>
```bash
# Use default Chrome profile (auto-finds all profiles)
@@ -319,7 +332,7 @@ Failed to find or export WeChat data. Exiting.
</details>
<details>
<summary><strong>📋 Click to expand: Command Examples</strong></summary>
<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>
```bash
# Use default settings (recommended for first run)

View File

@@ -94,14 +94,14 @@ if __name__ == "__main__":
parser.add_argument(
"--llm",
type=str,
default="hf",
default="openai",
choices=["simulated", "ollama", "hf", "openai"],
help="The LLM backend to use.",
)
parser.add_argument(
"--model",
type=str,
default="Qwen/Qwen3-0.6B",
default="gpt-4o",
help="The model name to use (e.g., 'llama3:8b' for ollama, 'deepseek-ai/deepseek-llm-7b-chat' for hf, 'gpt-4o' for openai).",
)
parser.add_argument(

View File

@@ -7,6 +7,7 @@ from pathlib import Path
from typing import Any, Literal
import numpy as np
import psutil
from leann.interface import (
LeannBackendBuilderInterface,
LeannBackendFactoryInterface,
@@ -84,6 +85,43 @@ def _write_vectors_to_bin(data: np.ndarray, file_path: Path):
f.write(data.tobytes())
def _calculate_smart_memory_config(data: np.ndarray) -> tuple[float, float]:
"""
Calculate smart memory configuration for DiskANN based on data size and system specs.
Args:
data: The embedding data array
Returns:
tuple: (search_memory_maximum, build_memory_maximum) in GB
"""
num_vectors, dim = data.shape
# Calculate embedding storage size
embedding_size_bytes = num_vectors * dim * 4 # float32 = 4 bytes
embedding_size_gb = embedding_size_bytes / (1024**3)
# search_memory_maximum: 1/10 of embedding size for optimal PQ compression
# This controls Product Quantization size - smaller means more compression
search_memory_gb = max(0.1, embedding_size_gb / 10) # At least 100MB
# build_memory_maximum: Based on available system RAM for sharding control
# This controls how much memory DiskANN uses during index construction
available_memory_gb = psutil.virtual_memory().available / (1024**3)
total_memory_gb = psutil.virtual_memory().total / (1024**3)
# Use 50% of available memory, but at least 2GB and at most 75% of total
build_memory_gb = max(2.0, min(available_memory_gb * 0.5, total_memory_gb * 0.75))
logger.info(
f"Smart memory config - Data: {embedding_size_gb:.2f}GB, "
f"Search mem: {search_memory_gb:.2f}GB (PQ control), "
f"Build mem: {build_memory_gb:.2f}GB (sharding control)"
)
return search_memory_gb, build_memory_gb
@register_backend("diskann")
class DiskannBackend(LeannBackendFactoryInterface):
@staticmethod
@@ -121,6 +159,16 @@ class DiskannBuilder(LeannBackendBuilderInterface):
f"Unsupported distance_metric '{build_kwargs.get('distance_metric', 'unknown')}'."
)
# Calculate smart memory configuration if not explicitly provided
if (
"search_memory_maximum" not in build_kwargs
or "build_memory_maximum" not in build_kwargs
):
smart_search_mem, smart_build_mem = _calculate_smart_memory_config(data)
else:
smart_search_mem = build_kwargs.get("search_memory_maximum", 4.0)
smart_build_mem = build_kwargs.get("build_memory_maximum", 8.0)
try:
from . import _diskannpy as diskannpy # type: ignore
@@ -131,8 +179,8 @@ class DiskannBuilder(LeannBackendBuilderInterface):
index_prefix,
build_kwargs.get("complexity", 64),
build_kwargs.get("graph_degree", 32),
build_kwargs.get("search_memory_maximum", 4.0),
build_kwargs.get("build_memory_maximum", 8.0),
build_kwargs.get("search_memory_maximum", smart_search_mem),
build_kwargs.get("build_memory_maximum", smart_build_mem),
build_kwargs.get("num_threads", 8),
build_kwargs.get("pq_disk_bytes", 0),
"",

View File

@@ -542,14 +542,41 @@ class HFChat(LLMInterface):
self.device = "cpu"
logger.info("No GPU detected. Using CPU.")
# Load tokenizer and model
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
device_map="auto" if self.device != "cpu" else None,
trust_remote_code=True,
)
# Load tokenizer and model with timeout protection
try:
import signal
def timeout_handler(signum, frame):
raise TimeoutError("Model download/loading timed out")
# Set timeout for model loading (60 seconds)
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(60)
try:
logger.info(f"Loading tokenizer for {model_name}...")
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
logger.info(f"Loading model {model_name}...")
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
device_map="auto" if self.device != "cpu" else None,
trust_remote_code=True,
)
logger.info(f"Successfully loaded {model_name}")
finally:
signal.alarm(0) # Cancel the alarm
signal.signal(signal.SIGALRM, old_handler) # Restore old handler
except TimeoutError:
logger.error(f"Model loading timed out for {model_name}")
raise RuntimeError(
f"Model loading timed out for {model_name}. Please check your internet connection or try a smaller model."
)
except Exception as e:
logger.error(f"Failed to load model {model_name}: {e}")
raise
# Move model to device if not using device_map
if self.device != "cpu" and "device_map" not in str(self.model):

View File

@@ -354,13 +354,21 @@ class EmbeddingServerManager:
self.server_process.terminate()
try:
self.server_process.wait(timeout=5)
self.server_process.wait(timeout=3)
logger.info(f"Server process {self.server_process.pid} terminated.")
except subprocess.TimeoutExpired:
logger.warning(
f"Server process {self.server_process.pid} did not terminate gracefully, killing it."
f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
)
self.server_process.kill()
try:
self.server_process.wait(timeout=2)
logger.info(f"Server process {self.server_process.pid} killed successfully.")
except subprocess.TimeoutExpired:
logger.error(
f"Failed to kill server process {self.server_process.pid} - it may be hung"
)
# Don't hang indefinitely
# Clean up process resources to prevent resource tracker warnings
try:

View File

@@ -5,11 +5,8 @@ LEANN is a revolutionary vector database that democratizes personal AI. Transfor
## Installation
```bash
# Default installation (HNSW backend, recommended)
# Default installation (includes both HNSW and DiskANN backends)
uv pip install leann
# With DiskANN backend (for large-scale deployments)
uv pip install leann[diskann]
```
## Quick Start
@@ -19,8 +16,8 @@ from leann import LeannBuilder, LeannSearcher, LeannChat
from pathlib import Path
INDEX_PATH = str(Path("./").resolve() / "demo.leann")
# Build an index
builder = LeannBuilder(backend_name="hnsw")
# Build an index (choose backend: "hnsw" or "diskann")
builder = LeannBuilder(backend_name="hnsw") # or "diskann" for large-scale deployments
builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
builder.add_text("Tung Tung Tung Sahur called—they need their bananacrocodile hybrid back")
builder.build_index(INDEX_PATH)

View File

@@ -24,16 +24,15 @@ classifiers = [
"Programming Language :: Python :: 3.12",
]
# Default installation: core + hnsw
# Default installation: core + hnsw + diskann
dependencies = [
"leann-core>=0.1.0",
"leann-backend-hnsw>=0.1.0",
"leann-backend-diskann>=0.1.0",
]
[project.optional-dependencies]
diskann = [
"leann-backend-diskann>=0.1.0",
]
# All backends now included by default
[project.urls]
Repository = "https://github.com/yichuan-w/LEANN"