Compare commits
21 Commits
feat/diska
...
fix-macos-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc4987591b | ||
|
|
d8b6ae8d1a | ||
|
|
f2ffcf5665 | ||
|
|
27d0d73f99 | ||
|
|
b124709bcd | ||
|
|
78251a6d4c | ||
|
|
16c833da86 | ||
|
|
c246cb4a01 | ||
|
|
0f34aee5db | ||
|
|
3e53d3d264 | ||
|
|
22c8f861bc | ||
|
|
a52e3c583a | ||
|
|
ab339886dd | ||
|
|
8c988cf98b | ||
|
|
ac5fd844a5 | ||
|
|
4b4b825fec | ||
|
|
34ef0db42f | ||
|
|
41812c7d22 | ||
|
|
2047a1a128 | ||
|
|
402e8f97ad | ||
|
|
9a5c197acd |
29
README.md
29
README.md
@@ -174,28 +174,15 @@ Ask questions directly about your personal PDFs, documents, and any directory co
|
||||
<img src="videos/paper_clear.gif" alt="LEANN Document Search Demo" width="600">
|
||||
</p>
|
||||
|
||||
The example below asks a question about summarizing two papers (uses default data in `examples/data`) and this is the easiest example to run here:
|
||||
The example below asks a question about summarizing two papers (uses default data in `examples/data`):
|
||||
|
||||
```bash
|
||||
```
|
||||
# Or use python directly
|
||||
source .venv/bin/activate
|
||||
python ./examples/main_cli_example.py
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>
|
||||
|
||||
```bash
|
||||
# Use custom index directory
|
||||
python examples/main_cli_example.py --index-dir "./my_custom_index"
|
||||
|
||||
# Use custom data directory
|
||||
python examples/main_cli_example.py --data-dir "./my_documents"
|
||||
|
||||
# Ask a specific question
|
||||
python examples/main_cli_example.py --query "What are the main findings in these papers?"
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### 📧 Your Personal Email Secretary: RAG on Apple Mail!
|
||||
|
||||
@@ -208,12 +195,12 @@ python examples/main_cli_example.py --query "What are the main findings in these
|
||||
|
||||
**Note:** You need to grant full disk access to your terminal/VS Code in System Preferences → Privacy & Security → Full Disk Access.
|
||||
```bash
|
||||
python examples/mail_reader_leann.py --query "What's the food I ordered by DoorDash or Uber Eats mostly?"
|
||||
python examples/mail_reader_leann.py --query "What's the food I ordered by doordash or Uber eat mostly?"
|
||||
```
|
||||
**780K email chunks → 78MB storage.** Finally, search your email like you search Google.
|
||||
**780K email chunks → 78MB storage** Finally, search your email like you search Google.
|
||||
|
||||
<details>
|
||||
<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>
|
||||
<summary><strong>📋 Click to expand: Command Examples</strong></summary>
|
||||
|
||||
```bash
|
||||
# Use default mail path (works for most macOS setups)
|
||||
@@ -255,7 +242,7 @@ python examples/google_history_reader_leann.py --query "Tell me my browser histo
|
||||
**38K browser entries → 6MB storage.** Your browser history becomes your personal search engine.
|
||||
|
||||
<details>
|
||||
<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>
|
||||
<summary><strong>📋 Click to expand: Command Examples</strong></summary>
|
||||
|
||||
```bash
|
||||
# Use default Chrome profile (auto-finds all profiles)
|
||||
@@ -332,7 +319,7 @@ Failed to find or export WeChat data. Exiting.
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>📋 Click to expand: User Configurable Arguments</strong></summary>
|
||||
<summary><strong>📋 Click to expand: Command Examples</strong></summary>
|
||||
|
||||
```bash
|
||||
# Use default settings (recommended for first run)
|
||||
|
||||
@@ -94,14 +94,14 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--llm",
|
||||
type=str,
|
||||
default="openai",
|
||||
default="hf",
|
||||
choices=["simulated", "ollama", "hf", "openai"],
|
||||
help="The LLM backend to use.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
type=str,
|
||||
default="gpt-4o",
|
||||
default="Qwen/Qwen3-0.6B",
|
||||
help="The model name to use (e.g., 'llama3:8b' for ollama, 'deepseek-ai/deepseek-llm-7b-chat' for hf, 'gpt-4o' for openai).",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
||||
@@ -7,7 +7,6 @@ from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
import numpy as np
|
||||
import psutil
|
||||
from leann.interface import (
|
||||
LeannBackendBuilderInterface,
|
||||
LeannBackendFactoryInterface,
|
||||
@@ -85,43 +84,6 @@ def _write_vectors_to_bin(data: np.ndarray, file_path: Path):
|
||||
f.write(data.tobytes())
|
||||
|
||||
|
||||
def _calculate_smart_memory_config(data: np.ndarray) -> tuple[float, float]:
|
||||
"""
|
||||
Calculate smart memory configuration for DiskANN based on data size and system specs.
|
||||
|
||||
Args:
|
||||
data: The embedding data array
|
||||
|
||||
Returns:
|
||||
tuple: (search_memory_maximum, build_memory_maximum) in GB
|
||||
"""
|
||||
num_vectors, dim = data.shape
|
||||
|
||||
# Calculate embedding storage size
|
||||
embedding_size_bytes = num_vectors * dim * 4 # float32 = 4 bytes
|
||||
embedding_size_gb = embedding_size_bytes / (1024**3)
|
||||
|
||||
# search_memory_maximum: 1/10 of embedding size for optimal PQ compression
|
||||
# This controls Product Quantization size - smaller means more compression
|
||||
search_memory_gb = max(0.1, embedding_size_gb / 10) # At least 100MB
|
||||
|
||||
# build_memory_maximum: Based on available system RAM for sharding control
|
||||
# This controls how much memory DiskANN uses during index construction
|
||||
available_memory_gb = psutil.virtual_memory().available / (1024**3)
|
||||
total_memory_gb = psutil.virtual_memory().total / (1024**3)
|
||||
|
||||
# Use 50% of available memory, but at least 2GB and at most 75% of total
|
||||
build_memory_gb = max(2.0, min(available_memory_gb * 0.5, total_memory_gb * 0.75))
|
||||
|
||||
logger.info(
|
||||
f"Smart memory config - Data: {embedding_size_gb:.2f}GB, "
|
||||
f"Search mem: {search_memory_gb:.2f}GB (PQ control), "
|
||||
f"Build mem: {build_memory_gb:.2f}GB (sharding control)"
|
||||
)
|
||||
|
||||
return search_memory_gb, build_memory_gb
|
||||
|
||||
|
||||
@register_backend("diskann")
|
||||
class DiskannBackend(LeannBackendFactoryInterface):
|
||||
@staticmethod
|
||||
@@ -159,16 +121,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
|
||||
f"Unsupported distance_metric '{build_kwargs.get('distance_metric', 'unknown')}'."
|
||||
)
|
||||
|
||||
# Calculate smart memory configuration if not explicitly provided
|
||||
if (
|
||||
"search_memory_maximum" not in build_kwargs
|
||||
or "build_memory_maximum" not in build_kwargs
|
||||
):
|
||||
smart_search_mem, smart_build_mem = _calculate_smart_memory_config(data)
|
||||
else:
|
||||
smart_search_mem = build_kwargs.get("search_memory_maximum", 4.0)
|
||||
smart_build_mem = build_kwargs.get("build_memory_maximum", 8.0)
|
||||
|
||||
try:
|
||||
from . import _diskannpy as diskannpy # type: ignore
|
||||
|
||||
@@ -179,8 +131,8 @@ class DiskannBuilder(LeannBackendBuilderInterface):
|
||||
index_prefix,
|
||||
build_kwargs.get("complexity", 64),
|
||||
build_kwargs.get("graph_degree", 32),
|
||||
build_kwargs.get("search_memory_maximum", smart_search_mem),
|
||||
build_kwargs.get("build_memory_maximum", smart_build_mem),
|
||||
build_kwargs.get("search_memory_maximum", 4.0),
|
||||
build_kwargs.get("build_memory_maximum", 8.0),
|
||||
build_kwargs.get("num_threads", 8),
|
||||
build_kwargs.get("pq_disk_bytes", 0),
|
||||
"",
|
||||
|
||||
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
|
||||
|
||||
[project]
|
||||
name = "leann-backend-diskann"
|
||||
version = "0.1.16"
|
||||
dependencies = ["leann-core==0.1.16", "numpy", "protobuf>=3.19.0"]
|
||||
version = "0.1.15"
|
||||
dependencies = ["leann-core==0.1.15", "numpy", "protobuf>=3.19.0"]
|
||||
|
||||
[tool.scikit-build]
|
||||
# Key: simplified CMake path
|
||||
|
||||
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: 67a2611ad1...af2a26481e
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
|
||||
|
||||
[project]
|
||||
name = "leann-backend-hnsw"
|
||||
version = "0.1.16"
|
||||
version = "0.1.15"
|
||||
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
||||
dependencies = [
|
||||
"leann-core==0.1.16",
|
||||
"leann-core==0.1.15",
|
||||
"numpy",
|
||||
"pyzmq>=23.0.0",
|
||||
"msgpack>=1.0.0",
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "leann-core"
|
||||
version = "0.1.16"
|
||||
version = "0.1.15"
|
||||
description = "Core API and plugin system for LEANN"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
|
||||
@@ -542,41 +542,14 @@ class HFChat(LLMInterface):
|
||||
self.device = "cpu"
|
||||
logger.info("No GPU detected. Using CPU.")
|
||||
|
||||
# Load tokenizer and model with timeout protection
|
||||
try:
|
||||
import signal
|
||||
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError("Model download/loading timed out")
|
||||
|
||||
# Set timeout for model loading (60 seconds)
|
||||
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(60)
|
||||
|
||||
try:
|
||||
logger.info(f"Loading tokenizer for {model_name}...")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
|
||||
logger.info(f"Loading model {model_name}...")
|
||||
self.model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
|
||||
device_map="auto" if self.device != "cpu" else None,
|
||||
trust_remote_code=True,
|
||||
)
|
||||
logger.info(f"Successfully loaded {model_name}")
|
||||
finally:
|
||||
signal.alarm(0) # Cancel the alarm
|
||||
signal.signal(signal.SIGALRM, old_handler) # Restore old handler
|
||||
|
||||
except TimeoutError:
|
||||
logger.error(f"Model loading timed out for {model_name}")
|
||||
raise RuntimeError(
|
||||
f"Model loading timed out for {model_name}. Please check your internet connection or try a smaller model."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load model {model_name}: {e}")
|
||||
raise
|
||||
# Load tokenizer and model
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
self.model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
|
||||
device_map="auto" if self.device != "cpu" else None,
|
||||
trust_remote_code=True,
|
||||
)
|
||||
|
||||
# Move model to device if not using device_map
|
||||
if self.device != "cpu" and "device_map" not in str(self.model):
|
||||
|
||||
@@ -354,21 +354,13 @@ class EmbeddingServerManager:
|
||||
self.server_process.terminate()
|
||||
|
||||
try:
|
||||
self.server_process.wait(timeout=3)
|
||||
self.server_process.wait(timeout=5)
|
||||
logger.info(f"Server process {self.server_process.pid} terminated.")
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning(
|
||||
f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
|
||||
f"Server process {self.server_process.pid} did not terminate gracefully, killing it."
|
||||
)
|
||||
self.server_process.kill()
|
||||
try:
|
||||
self.server_process.wait(timeout=2)
|
||||
logger.info(f"Server process {self.server_process.pid} killed successfully.")
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error(
|
||||
f"Failed to kill server process {self.server_process.pid} - it may be hung"
|
||||
)
|
||||
# Don't hang indefinitely
|
||||
|
||||
# Clean up process resources to prevent resource tracker warnings
|
||||
try:
|
||||
|
||||
@@ -5,8 +5,11 @@ LEANN is a revolutionary vector database that democratizes personal AI. Transfor
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# Default installation (includes both HNSW and DiskANN backends)
|
||||
# Default installation (HNSW backend, recommended)
|
||||
uv pip install leann
|
||||
|
||||
# With DiskANN backend (for large-scale deployments)
|
||||
uv pip install leann[diskann]
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
@@ -16,8 +19,8 @@ from leann import LeannBuilder, LeannSearcher, LeannChat
|
||||
from pathlib import Path
|
||||
INDEX_PATH = str(Path("./").resolve() / "demo.leann")
|
||||
|
||||
# Build an index (choose backend: "hnsw" or "diskann")
|
||||
builder = LeannBuilder(backend_name="hnsw") # or "diskann" for large-scale deployments
|
||||
# Build an index
|
||||
builder = LeannBuilder(backend_name="hnsw")
|
||||
builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
|
||||
builder.add_text("Tung Tung Tung Sahur called—they need their banana‑crocodile hybrid back")
|
||||
builder.build_index(INDEX_PATH)
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "leann"
|
||||
version = "0.1.16"
|
||||
version = "0.1.15"
|
||||
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
@@ -24,15 +24,16 @@ classifiers = [
|
||||
"Programming Language :: Python :: 3.12",
|
||||
]
|
||||
|
||||
# Default installation: core + hnsw + diskann
|
||||
# Default installation: core + hnsw
|
||||
dependencies = [
|
||||
"leann-core>=0.1.0",
|
||||
"leann-backend-hnsw>=0.1.0",
|
||||
"leann-backend-diskann>=0.1.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
# All backends now included by default
|
||||
diskann = [
|
||||
"leann-backend-diskann>=0.1.0",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Repository = "https://github.com/yichuan-w/LEANN"
|
||||
|
||||
Reference in New Issue
Block a user