diff --git a/README.md b/README.md
index 702891a..659484c 100755
--- a/README.md
+++ b/README.md
@@ -71,6 +71,8 @@ source .venv/bin/activate
uv pip install leann
```
+> Low-resource? See “Low-resource setups” in the [Configuration Guide](docs/configuration-guide.md#low-resource-setups).
+
🔧 Build from Source (Recommended for development)
@@ -184,34 +186,34 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
```bash
# Core Parameters (General preprocessing for all examples)
---index-dir DIR # Directory to store the index (default: current directory)
---query "YOUR QUESTION" # Single query mode. Omit for interactive chat (type 'quit' to exit), and now you can play with your index interactively
---max-items N # Limit data preprocessing (default: -1, process all data)
---force-rebuild # Force rebuild index even if it exists
+--index-dir DIR # Directory to store the index (default: current directory)
+--query "YOUR QUESTION" # Single query mode. Omit for interactive chat (type 'quit' to exit), and now you can play with your index interactively
+--max-items N # Limit data preprocessing (default: -1, process all data)
+--force-rebuild # Force rebuild index even if it exists
# Embedding Parameters
---embedding-model MODEL # e.g., facebook/contriever, text-embedding-3-small, mlx-community/Qwen3-Embedding-0.6B-8bit or nomic-embed-text
---embedding-mode MODE # sentence-transformers, openai, mlx, or ollama
+--embedding-model MODEL # e.g., facebook/contriever, text-embedding-3-small, mlx-community/Qwen3-Embedding-0.6B-8bit or nomic-embed-text
+--embedding-mode MODE # sentence-transformers, openai, mlx, or ollama
# LLM Parameters (Text generation models)
---llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
---llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
---thinking-budget LEVEL # Thinking budget for reasoning models: low/medium/high (supported by o3, o3-mini, GPT-Oss:20b, and other reasoning models)
+--llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
+--llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
+--thinking-budget LEVEL # Thinking budget for reasoning models: low/medium/high (supported by o3, o3-mini, GPT-Oss:20b, and other reasoning models)
# Search Parameters
---top-k N # Number of results to retrieve (default: 20)
---search-complexity N # Search complexity for graph traversal (default: 32)
+--top-k N # Number of results to retrieve (default: 20)
+--search-complexity N # Search complexity for graph traversal (default: 32)
# Chunking Parameters
---chunk-size N # Size of text chunks (default varies by source: 256 for most, 192 for WeChat)
---chunk-overlap N # Overlap between chunks (default varies: 25-128 depending on source)
+--chunk-size N # Size of text chunks (default varies by source: 256 for most, 192 for WeChat)
+--chunk-overlap N # Overlap between chunks (default varies: 25-128 depending on source)
# Index Building Parameters
---backend-name NAME # Backend to use: hnsw or diskann (default: hnsw)
---graph-degree N # Graph degree for index construction (default: 32)
---build-complexity N # Build complexity for index construction (default: 64)
---no-compact # Disable compact index storage (compact storage IS enabled to save storage by default)
---no-recompute # Disable embedding recomputation (recomputation IS enabled to save storage by default)
+--backend-name NAME # Backend to use: hnsw or diskann (default: hnsw)
+--graph-degree N # Graph degree for index construction (default: 32)
+--build-complexity N # Build complexity for index construction (default: 64)
+--compact / --no-compact # Use compact storage (default: true). Must be `no-compact` for `no-recompute` build.
+--recompute / --no-recompute # Enable/disable embedding recomputation (default: enabled). Should not do a `no-recompute` search in a `recompute` build.
```
@@ -482,27 +484,29 @@ leann list
```
**Key CLI features:**
-- Auto-detects document formats (PDF, TXT, MD, DOCX)
+- Auto-detects document formats (PDF, TXT, MD, DOCX, PPTX + code files)
- Smart text chunking with overlap
- Multiple LLM providers (Ollama, OpenAI, HuggingFace)
-- Organized index storage in `~/.leann/indexes/`
+- Organized index storage in `.leann/indexes/` (project-local)
- Support for advanced search parameters
📋 Click to expand: Complete CLI Reference
+You can use `leann --help`, or `leann build --help`, `leann search --help`, `leann ask --help` to get the complete CLI reference.
+
**Build Command:**
```bash
-leann build INDEX_NAME --docs DIRECTORY [OPTIONS]
+leann build INDEX_NAME --docs DIRECTORY|FILE [DIRECTORY|FILE ...] [OPTIONS]
Options:
--backend {hnsw,diskann} Backend to use (default: hnsw)
--embedding-model MODEL Embedding model (default: facebook/contriever)
- --graph-degree N Graph degree (default: 32)
- --complexity N Build complexity (default: 64)
- --force Force rebuild existing index
- --compact Use compact storage (default: true)
- --recompute Enable recomputation (default: true)
+ --graph-degree N Graph degree (default: 32)
+ --complexity N Build complexity (default: 64)
+ --force Force rebuild existing index
+ --compact / --no-compact Use compact storage (default: true). Must be `no-compact` for `no-recompute` build.
+ --recompute / --no-recompute Enable recomputation (default: true)
```
**Search Command:**
@@ -510,9 +514,9 @@ Options:
leann search INDEX_NAME QUERY [OPTIONS]
Options:
- --top-k N Number of results (default: 5)
- --complexity N Search complexity (default: 64)
- --recompute-embeddings Use recomputation for highest accuracy
+ --top-k N Number of results (default: 5)
+ --complexity N Search complexity (default: 64)
+ --recompute / --no-recompute Enable/disable embedding recomputation (default: enabled). Should not do a `no-recompute` search in a `recompute` build.
--pruning-strategy {global,local,proportional}
```
diff --git a/benchmarks/benchmark_no_recompute.py b/benchmarks/benchmark_no_recompute.py
new file mode 100644
index 0000000..1c402c0
--- /dev/null
+++ b/benchmarks/benchmark_no_recompute.py
@@ -0,0 +1,148 @@
+import argparse
+import os
+import time
+from pathlib import Path
+
+from leann import LeannBuilder, LeannSearcher
+
+
+def _meta_exists(index_path: str) -> bool:
+ p = Path(index_path)
+ return (p.parent / f"{p.stem}.meta.json").exists()
+
+
+def ensure_index(index_path: str, backend_name: str, num_docs: int, is_recompute: bool) -> None:
+ # if _meta_exists(index_path):
+ # return
+ kwargs = {}
+ if backend_name == "hnsw":
+ kwargs["is_compact"] = is_recompute
+ builder = LeannBuilder(
+ backend_name=backend_name,
+ embedding_model=os.getenv("LEANN_EMBED_MODEL", "facebook/contriever"),
+ embedding_mode=os.getenv("LEANN_EMBED_MODE", "sentence-transformers"),
+ graph_degree=32,
+ complexity=64,
+ is_recompute=is_recompute,
+ num_threads=4,
+ **kwargs,
+ )
+ for i in range(num_docs):
+ builder.add_text(
+ f"This is a test document number {i}. It contains some repeated text for benchmarking."
+ )
+ builder.build_index(index_path)
+
+
+def _bench_group(
+ index_path: str,
+ recompute: bool,
+ query: str,
+ repeats: int,
+ complexity: int = 32,
+ top_k: int = 10,
+) -> float:
+ # Independent searcher per group; fixed port when recompute
+ searcher = LeannSearcher(index_path=index_path)
+
+ # Warm-up once
+ _ = searcher.search(
+ query,
+ top_k=top_k,
+ complexity=complexity,
+ recompute_embeddings=recompute,
+ )
+
+ def _once() -> float:
+ t0 = time.time()
+ _ = searcher.search(
+ query,
+ top_k=top_k,
+ complexity=complexity,
+ recompute_embeddings=recompute,
+ )
+ return time.time() - t0
+
+ if repeats <= 1:
+ t = _once()
+ else:
+ vals = [_once() for _ in range(repeats)]
+ vals.sort()
+ t = vals[len(vals) // 2]
+
+ searcher.cleanup()
+ return t
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--num-docs", type=int, default=5000)
+ parser.add_argument("--repeats", type=int, default=3)
+ parser.add_argument("--complexity", type=int, default=32)
+ args = parser.parse_args()
+
+ base = Path.cwd() / ".leann" / "indexes" / f"bench_n{args.num_docs}"
+ base.parent.mkdir(parents=True, exist_ok=True)
+ # ---------- Build HNSW variants ----------
+ hnsw_r = str(base / f"hnsw_recompute_n{args.num_docs}.leann")
+ hnsw_nr = str(base / f"hnsw_norecompute_n{args.num_docs}.leann")
+ ensure_index(hnsw_r, "hnsw", args.num_docs, True)
+ ensure_index(hnsw_nr, "hnsw", args.num_docs, False)
+
+ # ---------- Build DiskANN variants ----------
+ diskann_r = str(base / "diskann_r.leann")
+ diskann_nr = str(base / "diskann_nr.leann")
+ ensure_index(diskann_r, "diskann", args.num_docs, True)
+ ensure_index(diskann_nr, "diskann", args.num_docs, False)
+
+ # ---------- Helpers ----------
+ def _size_for(prefix: str) -> int:
+ p = Path(prefix)
+ base_dir = p.parent
+ stem = p.stem
+ total = 0
+ for f in base_dir.iterdir():
+ if f.is_file() and f.name.startswith(stem):
+ total += f.stat().st_size
+ return total
+
+ # ---------- HNSW benchmark ----------
+ t_hnsw_r = _bench_group(
+ hnsw_r, True, "test document number 42", repeats=args.repeats, complexity=args.complexity
+ )
+ t_hnsw_nr = _bench_group(
+ hnsw_nr, False, "test document number 42", repeats=args.repeats, complexity=args.complexity
+ )
+ size_hnsw_r = _size_for(hnsw_r)
+ size_hnsw_nr = _size_for(hnsw_nr)
+
+ print("Benchmark results (HNSW):")
+ print(f" recompute=True: search_time={t_hnsw_r:.3f}s, size={size_hnsw_r / 1024 / 1024:.1f}MB")
+ print(
+ f" recompute=False: search_time={t_hnsw_nr:.3f}s, size={size_hnsw_nr / 1024 / 1024:.1f}MB"
+ )
+ print(" Expectation: no-recompute should be faster but larger on disk.")
+
+ # ---------- DiskANN benchmark ----------
+ t_diskann_r = _bench_group(
+ diskann_r, True, "DiskANN R test doc 123", repeats=args.repeats, complexity=args.complexity
+ )
+ t_diskann_nr = _bench_group(
+ diskann_nr,
+ False,
+ "DiskANN NR test doc 123",
+ repeats=args.repeats,
+ complexity=args.complexity,
+ )
+ size_diskann_r = _size_for(diskann_r)
+ size_diskann_nr = _size_for(diskann_nr)
+
+ print("\nBenchmark results (DiskANN):")
+ print(f" build(recompute=True, partition): size={size_diskann_r / 1024 / 1024:.1f}MB")
+ print(f" build(recompute=False): size={size_diskann_nr / 1024 / 1024:.1f}MB")
+ print(f" search recompute=True (final rerank): {t_diskann_r:.3f}s")
+ print(f" search recompute=False (PQ only): {t_diskann_nr:.3f}s")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmarks/diskann_vs_hnsw_speed_comparison.py b/benchmarks/diskann_vs_hnsw_speed_comparison.py
index bb89692..b4e6159 100644
--- a/benchmarks/diskann_vs_hnsw_speed_comparison.py
+++ b/benchmarks/diskann_vs_hnsw_speed_comparison.py
@@ -10,6 +10,7 @@ This benchmark compares search performance between DiskANN and HNSW backends:
"""
import gc
+import multiprocessing as mp
import tempfile
import time
from pathlib import Path
@@ -17,6 +18,12 @@ from typing import Any
import numpy as np
+# Prefer 'fork' start method to avoid POSIX semaphore leaks on macOS
+try:
+ mp.set_start_method("fork", force=True)
+except Exception:
+ pass
+
def create_test_texts(n_docs: int) -> list[str]:
"""Create synthetic test documents for benchmarking."""
@@ -113,10 +120,10 @@ def benchmark_backend(
]
score_validity_rate = len(valid_scores) / len(all_scores) if all_scores else 0
- # Clean up
+ # Clean up (ensure embedding server shutdown and object GC)
try:
- if hasattr(searcher, "__del__"):
- searcher.__del__()
+ if hasattr(searcher, "cleanup"):
+ searcher.cleanup()
del searcher
del builder
gc.collect()
@@ -259,10 +266,21 @@ if __name__ == "__main__":
print(f"\n❌ Benchmark failed: {e}")
sys.exit(1)
finally:
- # Ensure clean exit
+ # Ensure clean exit (forceful to prevent rare hangs from atexit/threads)
try:
gc.collect()
print("\n🧹 Cleanup completed")
+ # Flush stdio to ensure message is visible before hard-exit
+ try:
+ import sys as _sys
+
+ _sys.stdout.flush()
+ _sys.stderr.flush()
+ except Exception:
+ pass
except Exception:
pass
- sys.exit(0)
+ # Use os._exit to bypass atexit handlers that may hang in rare cases
+ import os as _os
+
+ _os._exit(0)
diff --git a/docs/configuration-guide.md b/docs/configuration-guide.md
index badfc58..c1402a1 100644
--- a/docs/configuration-guide.md
+++ b/docs/configuration-guide.md
@@ -97,29 +97,23 @@ ollama pull nomic-embed-text
```
### DiskANN
-**Best for**: Performance-critical applications and large datasets - **Production-ready with automatic graph partitioning**
+**Best for**: Large datasets, especially when you want `recompute=True`.
-**How it works:**
-- **Product Quantization (PQ) + Real-time Reranking**: Uses compressed PQ codes for fast graph traversal, then recomputes exact embeddings for final candidates
-- **Automatic Graph Partitioning**: When `is_recompute=True`, automatically partitions large indices and safely removes redundant files to save storage
-- **Superior Speed-Accuracy Trade-off**: Faster search than HNSW while maintaining high accuracy
+**Key advantages:**
+- **Faster search** on large datasets (3x+ speedup vs HNSW in many cases)
+- **Smart storage**: `recompute=True` enables automatic graph partitioning for smaller indexes
+- **Better scaling**: Designed for 100k+ documents
-**Trade-offs compared to HNSW:**
-- ✅ **Faster search latency** (typically 2-8x speedup)
-- ✅ **Better scaling** for large datasets
-- ✅ **Smart storage management** with automatic partitioning
-- ✅ **Better graph locality** with `--ldg-times` parameter for SSD optimization
-- ⚠️ **Slightly larger index size** due to PQ tables and graph metadata
+**Recompute behavior:**
+- `recompute=True` (recommended): Pure PQ traversal + final reranking - faster and enables partitioning
+- `recompute=False`: PQ + partial real distances during traversal - slower but higher accuracy
```bash
# Recommended for most use cases
--backend-name diskann --graph-degree 32 --build-complexity 64
-
-# For large-scale deployments
---backend-name diskann --graph-degree 64 --build-complexity 128
```
-**Performance Benchmark**: Run `python benchmarks/diskann_vs_hnsw_speed_comparison.py` to compare DiskANN and HNSW on your system.
+**Performance Benchmark**: Run `uv run benchmarks/diskann_vs_hnsw_speed_comparison.py` to compare DiskANN and HNSW on your system.
## LLM Selection: Engine and Model Comparison
@@ -273,24 +267,114 @@ Every configuration choice involves trade-offs:
The key is finding the right balance for your specific use case. Start small and simple, measure performance, then scale up only where needed.
-## Deep Dive: Critical Configuration Decisions
+## Low-resource setups
-### When to Disable Recomputation
+If you don’t have a local GPU or builds/searches are too slow, use one or more of the options below.
-LEANN's recomputation feature provides exact distance calculations but can be disabled for extreme QPS requirements:
+### 1) Use OpenAI embeddings (no local compute)
+
+Fastest path with zero local GPU requirements. Set your API key and use OpenAI embeddings during build and search:
```bash
---no-recompute # Disable selective recomputation
+export OPENAI_API_KEY=sk-...
+
+# Build with OpenAI embeddings
+leann build my-index \
+ --embedding-mode openai \
+ --embedding-model text-embedding-3-small
+
+# Search with OpenAI embeddings (recompute at query time)
+leann search my-index "your query" \
+ --recompute
```
-**Trade-offs**:
-- **With recomputation** (default): Exact distances, best quality, higher latency, minimal storage (only stores metadata, recomputes embeddings on-demand)
-- **Without recomputation**: Must store full embeddings, significantly higher memory and storage usage (10-100x more), but faster search
+### 2) Run remote builds with SkyPilot (cloud GPU)
+
+Offload embedding generation and index building to a GPU VM using [SkyPilot](https://skypilot.readthedocs.io/en/latest/). A template is provided at `sky/leann-build.yaml`.
+
+```bash
+# One-time: install and configure SkyPilot
+pip install skypilot
+
+# Launch with defaults (L4:1) and mount ./data to ~/leann-data; the build runs automatically
+sky launch -c leann-gpu sky/leann-build.yaml
+
+# Override parameters via -e key=value (optional)
+sky launch -c leann-gpu sky/leann-build.yaml \
+ -e index_name=my-index \
+ -e backend=hnsw \
+ -e embedding_mode=sentence-transformers \
+ -e embedding_model=Qwen/Qwen3-Embedding-0.6B
+
+# Copy the built index back to your local .leann (use rsync)
+rsync -Pavz leann-gpu:~/.leann/indexes/my-index ./.leann/indexes/
+```
+
+### 3) Disable recomputation to trade storage for speed
+
+If you need lower latency and have more storage/memory, disable recomputation. This stores full embeddings and avoids recomputing at search time.
+
+```bash
+# Build without recomputation (HNSW requires non-compact in this mode)
+leann build my-index --no-recompute --no-compact
+
+# Search without recomputation
+leann search my-index "your query" --no-recompute
+```
+
+When to use:
+- Extreme low latency requirements (high QPS, interactive assistants)
+- Read-heavy workloads where storage is cheaper than latency
+- No always-available GPU
+
+Constraints:
+- HNSW: when `--no-recompute` is set, LEANN automatically disables compact mode during build
+- DiskANN: supported; `--no-recompute` skips selective recompute during search
+
+Storage impact:
+- Storing N embeddings of dimension D with float32 requires approximately N × D × 4 bytes
+- Example: 1,000,000 chunks × 768 dims × 4 bytes ≈ 2.86 GB (plus graph/metadata)
+
+Converting an existing index (rebuild required):
+```bash
+# Rebuild in-place (ensure you still have original docs or can regenerate chunks)
+leann build my-index --force --no-recompute --no-compact
+```
+
+Python API usage:
+```python
+from leann import LeannSearcher
+
+searcher = LeannSearcher("/path/to/my-index.leann")
+results = searcher.search("your query", top_k=10, recompute_embeddings=False)
+```
+
+Trade-offs:
+- Lower latency and fewer network hops at query time
+- Significantly higher storage (10–100× vs selective recomputation)
+- Slightly larger memory footprint during build and search
+
+Quick benchmark results (`benchmarks/benchmark_no_recompute.py` with 5k texts, complexity=32):
+
+- HNSW
+
+ ```text
+ recompute=True: search_time=0.818s, size=1.1MB
+ recompute=False: search_time=0.012s, size=16.6MB
+ ```
+
+- DiskANN
+
+ ```text
+ recompute=True: search_time=0.041s, size=5.9MB
+ recompute=False: search_time=0.013s, size=24.6MB
+ ```
+
+Conclusion:
+- **HNSW**: `no-recompute` is significantly faster (no embedding recomputation) but requires much more storage (stores all embeddings)
+- **DiskANN**: `no-recompute` uses PQ + partial real distances during traversal (slower but higher accuracy), while `recompute=True` uses pure PQ traversal + final reranking (faster traversal, enables build-time partitioning for smaller storage)
+
-**Disable when**:
-- You have abundant storage and memory
-- Need extremely low latency (< 100ms)
-- Running a read-heavy workload where storage cost is acceptable
## Further Reading
diff --git a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
index 6ef84cc..96fb9ee 100644
--- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
+++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py
@@ -441,9 +441,14 @@ class DiskannSearcher(BaseSearcher):
else: # "global"
use_global_pruning = True
- # Perform search with suppressed C++ output based on log level
- use_deferred_fetch = kwargs.get("USE_DEFERRED_FETCH", True)
- recompute_neighors = False
+ # Strategy:
+ # - Traversal always uses PQ distances
+ # - If recompute_embeddings=True, do a single final rerank via deferred fetch
+ # (fetch embeddings for the final candidate set only)
+ # - Do not recompute neighbor distances along the path
+ use_deferred_fetch = True if recompute_embeddings else False
+ recompute_neighors = False # Expected typo. For backward compatibility.
+
with suppress_cpp_output_if_needed():
labels, distances = self._index.batch_search(
query,
diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
index 1d5f635..2ec6e39 100644
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
@@ -54,12 +54,13 @@ class HNSWBuilder(LeannBackendBuilderInterface):
self.efConstruction = self.build_params.setdefault("efConstruction", 200)
self.distance_metric = self.build_params.setdefault("distance_metric", "mips")
self.dimensions = self.build_params.get("dimensions")
- if not self.is_recompute:
- if self.is_compact:
- # TODO: support this case @andy
- raise ValueError(
- "is_recompute is False, but is_compact is True. This is not compatible now. change is compact to False and you can use the original HNSW index."
- )
+ if not self.is_recompute and self.is_compact:
+ # Auto-correct: non-recompute requires non-compact storage for HNSW
+ logger.warning(
+ "is_recompute=False requires non-compact HNSW. Forcing is_compact=False."
+ )
+ self.is_compact = False
+ self.build_params["is_compact"] = False
def build(self, data: np.ndarray, ids: list[str], index_path: str, **kwargs):
from . import faiss # type: ignore
@@ -184,9 +185,11 @@ class HNSWSearcher(BaseSearcher):
"""
from . import faiss # type: ignore
- if not recompute_embeddings:
- if self.is_pruned:
- raise RuntimeError("Recompute is required for pruned index.")
+ if not recompute_embeddings and self.is_pruned:
+ raise RuntimeError(
+ "Recompute is required for pruned/compact HNSW index. "
+ "Re-run search with --recompute, or rebuild with --no-recompute and --no-compact."
+ )
if recompute_embeddings:
if zmq_port is None:
raise ValueError("zmq_port must be provided if recompute_embeddings is True")
diff --git a/packages/leann-core/src/leann/api.py b/packages/leann-core/src/leann/api.py
index bff3709..144e858 100644
--- a/packages/leann-core/src/leann/api.py
+++ b/packages/leann-core/src/leann/api.py
@@ -204,6 +204,18 @@ class LeannBuilder:
**backend_kwargs,
):
self.backend_name = backend_name
+ # Normalize incompatible combinations early (for consistent metadata)
+ if backend_name == "hnsw":
+ is_recompute = backend_kwargs.get("is_recompute", True)
+ is_compact = backend_kwargs.get("is_compact", True)
+ if is_recompute is False and is_compact is True:
+ warnings.warn(
+ "HNSW with is_recompute=False requires non-compact storage. Forcing is_compact=False.",
+ UserWarning,
+ stacklevel=2,
+ )
+ backend_kwargs["is_compact"] = False
+
backend_factory: Optional[LeannBackendFactoryInterface] = BACKEND_REGISTRY.get(backend_name)
if backend_factory is None:
raise ValueError(f"Backend '{backend_name}' not found or not registered.")
@@ -523,6 +535,7 @@ class LeannSearcher:
self.embedding_model = self.meta_data["embedding_model"]
# Support both old and new format
self.embedding_mode = self.meta_data.get("embedding_mode", "sentence-transformers")
+ # Delegate portability handling to PassageManager
self.passage_manager = PassageManager(
self.meta_data.get("passage_sources", []), metadata_file_path=self.meta_path_str
)
@@ -652,6 +665,23 @@ class LeannSearcher:
if hasattr(self.backend_impl, "embedding_server_manager"):
self.backend_impl.embedding_server_manager.stop_server()
+ # Enable automatic cleanup patterns
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc, tb):
+ try:
+ self.cleanup()
+ except Exception:
+ pass
+
+ def __del__(self):
+ try:
+ self.cleanup()
+ except Exception:
+ # Avoid noisy errors during interpreter shutdown
+ pass
+
class LeannChat:
def __init__(
@@ -730,3 +760,19 @@ class LeannChat:
"""
if hasattr(self.searcher, "cleanup"):
self.searcher.cleanup()
+
+ # Enable automatic cleanup patterns
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc, tb):
+ try:
+ self.cleanup()
+ except Exception:
+ pass
+
+ def __del__(self):
+ try:
+ self.cleanup()
+ except Exception:
+ pass
diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py
index 665e1bd..11bbcee 100644
--- a/packages/leann-core/src/leann/chat.py
+++ b/packages/leann-core/src/leann/chat.py
@@ -422,7 +422,6 @@ class LLMInterface(ABC):
top_k=10,
complexity=64,
beam_width=8,
- USE_DEFERRED_FETCH=True,
skip_search_reorder=True,
recompute_beighbor_embeddings=True,
dedup_node_dis=True,
@@ -434,7 +433,6 @@ class LLMInterface(ABC):
Supported kwargs:
- complexity (int): Search complexity parameter (default: 32)
- beam_width (int): Beam width for search (default: 4)
- - USE_DEFERRED_FETCH (bool): Enable deferred fetch mode (default: False)
- skip_search_reorder (bool): Skip search reorder step (default: False)
- recompute_beighbor_embeddings (bool): Enable ZMQ embedding server for neighbor recomputation (default: False)
- dedup_node_dis (bool): Deduplicate nodes by distance (default: False)
diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py
index 6de52c2..62f4992 100644
--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -72,7 +72,7 @@ class LeannCLI:
def create_parser(self) -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="leann",
- description="LEANN - Local Enhanced AI Navigation",
+ description="The smallest vector index in the world. RAG Everything with LEANN!",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
@@ -102,9 +102,18 @@ Examples:
help="Documents directories and/or files (default: current directory)",
)
build_parser.add_argument(
- "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
+ "--backend",
+ type=str,
+ default="hnsw",
+ choices=["hnsw", "diskann"],
+ help="Backend to use (default: hnsw)",
+ )
+ build_parser.add_argument(
+ "--embedding-model",
+ type=str,
+ default="facebook/contriever",
+ help="Embedding model (default: facebook/contriever)",
)
- build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
build_parser.add_argument(
"--embedding-mode",
type=str,
@@ -112,12 +121,28 @@ Examples:
choices=["sentence-transformers", "openai", "mlx", "ollama"],
help="Embedding backend mode (default: sentence-transformers)",
)
- build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
- build_parser.add_argument("--graph-degree", type=int, default=32)
- build_parser.add_argument("--complexity", type=int, default=64)
+ build_parser.add_argument(
+ "--force", "-f", action="store_true", help="Force rebuild existing index"
+ )
+ build_parser.add_argument(
+ "--graph-degree", type=int, default=32, help="Graph degree (default: 32)"
+ )
+ build_parser.add_argument(
+ "--complexity", type=int, default=64, help="Build complexity (default: 64)"
+ )
build_parser.add_argument("--num-threads", type=int, default=1)
- build_parser.add_argument("--compact", action="store_true", default=True)
- build_parser.add_argument("--recompute", action="store_true", default=True)
+ build_parser.add_argument(
+ "--compact",
+ action=argparse.BooleanOptionalAction,
+ default=True,
+ help="Use compact storage (default: true). Must be `no-compact` for `no-recompute` build.",
+ )
+ build_parser.add_argument(
+ "--recompute",
+ action=argparse.BooleanOptionalAction,
+ default=True,
+ help="Enable recomputation (default: true)",
+ )
build_parser.add_argument(
"--file-types",
type=str,
@@ -152,20 +177,26 @@ Examples:
search_parser = subparsers.add_parser("search", help="Search documents")
search_parser.add_argument("index_name", help="Index name")
search_parser.add_argument("query", help="Search query")
- search_parser.add_argument("--top-k", type=int, default=5)
- search_parser.add_argument("--complexity", type=int, default=64)
+ search_parser.add_argument(
+ "--top-k", type=int, default=5, help="Number of results (default: 5)"
+ )
+ search_parser.add_argument(
+ "--complexity", type=int, default=64, help="Search complexity (default: 64)"
+ )
search_parser.add_argument("--beam-width", type=int, default=1)
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
search_parser.add_argument(
- "--recompute-embeddings",
- action="store_true",
+ "--recompute",
+ dest="recompute_embeddings",
+ action=argparse.BooleanOptionalAction,
default=True,
- help="Recompute embeddings (default: True)",
+ help="Enable/disable embedding recomputation (default: enabled). Should not do a `no-recompute` search in a `recompute` build.",
)
search_parser.add_argument(
"--pruning-strategy",
choices=["global", "local", "proportional"],
default="global",
+ help="Pruning strategy (default: global)",
)
# Ask command
@@ -176,19 +207,27 @@ Examples:
type=str,
default="ollama",
choices=["simulated", "ollama", "hf", "openai"],
+ help="LLM provider (default: ollama)",
+ )
+ ask_parser.add_argument(
+ "--model", type=str, default="qwen3:8b", help="Model name (default: qwen3:8b)"
)
- ask_parser.add_argument("--model", type=str, default="qwen3:8b")
ask_parser.add_argument("--host", type=str, default="http://localhost:11434")
- ask_parser.add_argument("--interactive", "-i", action="store_true")
- ask_parser.add_argument("--top-k", type=int, default=20)
+ ask_parser.add_argument(
+ "--interactive", "-i", action="store_true", help="Interactive chat mode"
+ )
+ ask_parser.add_argument(
+ "--top-k", type=int, default=20, help="Retrieval count (default: 20)"
+ )
ask_parser.add_argument("--complexity", type=int, default=32)
ask_parser.add_argument("--beam-width", type=int, default=1)
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
ask_parser.add_argument(
- "--recompute-embeddings",
- action="store_true",
+ "--recompute",
+ dest="recompute_embeddings",
+ action=argparse.BooleanOptionalAction,
default=True,
- help="Recompute embeddings (default: True)",
+ help="Enable/disable embedding recomputation during ask (default: enabled)",
)
ask_parser.add_argument(
"--pruning-strategy",
diff --git a/packages/leann-core/src/leann/embedding_server_manager.py b/packages/leann-core/src/leann/embedding_server_manager.py
index 3ed223f..05c8639 100644
--- a/packages/leann-core/src/leann/embedding_server_manager.py
+++ b/packages/leann-core/src/leann/embedding_server_manager.py
@@ -268,8 +268,12 @@ class EmbeddingServerManager:
f"Terminating server process (PID: {self.server_process.pid}) for backend {self.backend_module_name}..."
)
- # Use simple termination - our improved server shutdown should handle this properly
- self.server_process.terminate()
+ # Use simple termination first; if the server installed signal handlers,
+ # it will exit cleanly. Otherwise escalate to kill after a short wait.
+ try:
+ self.server_process.terminate()
+ except Exception:
+ pass
try:
self.server_process.wait(timeout=5) # Give more time for graceful shutdown
@@ -278,7 +282,10 @@ class EmbeddingServerManager:
logger.warning(
f"Server process {self.server_process.pid} did not terminate within 5 seconds, force killing..."
)
- self.server_process.kill()
+ try:
+ self.server_process.kill()
+ except Exception:
+ pass
try:
self.server_process.wait(timeout=2)
logger.info(f"Server process {self.server_process.pid} killed successfully.")
diff --git a/sky/leann-build.yaml b/sky/leann-build.yaml
new file mode 100644
index 0000000..53fd909
--- /dev/null
+++ b/sky/leann-build.yaml
@@ -0,0 +1,76 @@
+name: leann-build
+
+resources:
+ # Choose a GPU for fast embeddings (examples: L4, A10G, A100). CPU also works but is slower.
+ accelerators: L4:1
+ # Optionally pin a cloud, otherwise SkyPilot will auto-select
+ # cloud: aws
+ disk_size: 100
+
+envs:
+ # Build parameters (override with: sky launch -c leann-gpu sky/leann-build.yaml -e key=value)
+ index_name: my-index
+ docs: ./data
+ backend: hnsw # hnsw | diskann
+ complexity: 64
+ graph_degree: 32
+ num_threads: 8
+ # Embedding selection
+ embedding_mode: sentence-transformers # sentence-transformers | openai | mlx | ollama
+ embedding_model: facebook/contriever
+ # Storage/latency knobs
+ recompute: true # true => selective recomputation (recommended)
+ compact: true # for HNSW only
+ # Optional pass-through
+ extra_args: ""
+ # Rebuild control
+ force: true
+
+# Sync local paths to the remote VM. Adjust as needed.
+file_mounts:
+ # Example: mount your local data directory used for building
+ ~/leann-data: ${docs}
+
+setup: |
+ set -e
+ # Install uv (package manager)
+ curl -LsSf https://astral.sh/uv/install.sh | sh
+ export PATH="$HOME/.local/bin:$PATH"
+
+ # Ensure modern libstdc++ for FAISS (GLIBCXX >= 3.4.30)
+ sudo apt-get update -y
+ sudo apt-get install -y libstdc++6 libgomp1
+ # Also upgrade conda's libstdc++ in base env (Skypilot images include conda)
+ if command -v conda >/dev/null 2>&1; then
+ conda install -y -n base -c conda-forge libstdcxx-ng
+ fi
+
+ # Install LEANN CLI and backends into the user environment
+ uv pip install --upgrade pip
+ uv pip install leann-core leann-backend-hnsw leann-backend-diskann
+
+run: |
+ export PATH="$HOME/.local/bin:$PATH"
+ # Derive flags from env
+ recompute_flag=""
+ if [ "${recompute}" = "false" ] || [ "${recompute}" = "0" ]; then
+ recompute_flag="--no-recompute"
+ fi
+ force_flag=""
+ if [ "${force}" = "true" ] || [ "${force}" = "1" ]; then
+ force_flag="--force"
+ fi
+
+ # Build command
+ python -m leann.cli build ${index_name} \
+ --docs ~/leann-data \
+ --backend ${backend} \
+ --complexity ${complexity} \
+ --graph-degree ${graph_degree} \
+ --num-threads ${num_threads} \
+ --embedding-mode ${embedding_mode} \
+ --embedding-model ${embedding_model} \
+ ${recompute_flag} ${force_flag} ${extra_args}
+
+ # Print where the index is stored for downstream rsync
+ echo "INDEX_OUT_DIR=~/.leann/indexes/${index_name}"
diff --git a/uv.lock b/uv.lock
index 5b990e7..0822583 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2223,7 +2223,7 @@ wheels = [
[[package]]
name = "leann-backend-diskann"
-version = "0.2.8"
+version = "0.2.9"
source = { editable = "packages/leann-backend-diskann" }
dependencies = [
{ name = "leann-core" },
@@ -2235,14 +2235,14 @@ dependencies = [
[package.metadata]
requires-dist = [
- { name = "leann-core", specifier = "==0.2.8" },
+ { name = "leann-core", specifier = "==0.2.9" },
{ name = "numpy" },
{ name = "protobuf", specifier = ">=3.19.0" },
]
[[package]]
name = "leann-backend-hnsw"
-version = "0.2.8"
+version = "0.2.9"
source = { editable = "packages/leann-backend-hnsw" }
dependencies = [
{ name = "leann-core" },
@@ -2255,7 +2255,7 @@ dependencies = [
[package.metadata]
requires-dist = [
- { name = "leann-core", specifier = "==0.2.8" },
+ { name = "leann-core", specifier = "==0.2.9" },
{ name = "msgpack", specifier = ">=1.0.0" },
{ name = "numpy" },
{ name = "pyzmq", specifier = ">=23.0.0" },
@@ -2263,7 +2263,7 @@ requires-dist = [
[[package]]
name = "leann-core"
-version = "0.2.8"
+version = "0.2.9"
source = { editable = "packages/leann-core" }
dependencies = [
{ name = "accelerate" },