diff --git a/benchmarks/benchmark_no_recompute.py b/benchmarks/benchmark_no_recompute.py index 21b1951..dc1ed28 100644 --- a/benchmarks/benchmark_no_recompute.py +++ b/benchmarks/benchmark_no_recompute.py @@ -84,6 +84,80 @@ def main(): ) print("Expectation: no-recompute should be faster but larger on disk.") + # DiskANN quick benchmark (final rerank vs no-recompute) + try: + index_path_diskann_nr = str(base / "diskann_nr.leann") + index_path_diskann_r = str(base / "diskann_r.leann") + + # Build DiskANN no-recompute (keeps full disk index) + if not ( + Path(index_path_diskann_nr).parent / f"{Path(index_path_diskann_nr).stem}.meta.json" + ).exists(): + b = LeannBuilder( + backend_name="diskann", + embedding_model=os.getenv("LEANN_EMBED_MODEL", "facebook/contriever"), + embedding_mode=os.getenv("LEANN_EMBED_MODE", "sentence-transformers"), + graph_degree=32, + complexity=64, + num_threads=4, + is_recompute=False, + ) + for i in range(5000): + b.add_text(f"DiskANN NR test doc {i} for quick benchmark.") + b.build_index(index_path_diskann_nr) + + # Build DiskANN recompute (enables partition; prunes redundant files) + if not ( + Path(index_path_diskann_r).parent / f"{Path(index_path_diskann_r).stem}.meta.json" + ).exists(): + b = LeannBuilder( + backend_name="diskann", + embedding_model=os.getenv("LEANN_EMBED_MODEL", "facebook/contriever"), + embedding_mode=os.getenv("LEANN_EMBED_MODE", "sentence-transformers"), + graph_degree=32, + complexity=64, + num_threads=4, + is_recompute=True, + ) + for i in range(5000): + b.add_text(f"DiskANN R test doc {i} for quick benchmark.") + b.build_index(index_path_diskann_r) + + # Measure size per build prefix + def _size_for(prefix: str) -> int: + p = Path(prefix) + base_dir = p.parent + stem = p.stem + total = 0 + for f in base_dir.iterdir(): + if f.is_file() and f.name.startswith(stem): + total += f.stat().st_size + return total + + size_diskann_nr = _size_for(index_path_diskann_nr) + size_diskann_r = _size_for(index_path_diskann_r) + + # Speed on recompute-build (final rerank vs no-recompute) + s = LeannSearcher(index_path_diskann_r) + _ = s.search("DiskANN R test doc 123", top_k=10, complexity=64, recompute_embeddings=False) + _ = s.search("DiskANN R test doc 123", top_k=10, complexity=64, recompute_embeddings=True) + + t0 = time.time() + _ = s.search("DiskANN R test doc 123", top_k=10, complexity=64, recompute_embeddings=False) + t_diskann_nr = time.time() - t0 + + t0 = time.time() + _ = s.search("DiskANN R test doc 123", top_k=10, complexity=64, recompute_embeddings=True) + t_diskann_r = time.time() - t0 + + print("\nBenchmark results (DiskANN):") + print(f" build(recompute=False): size={size_diskann_nr / 1024 / 1024:.1f}MB") + print(f" build(recompute=True, partition): size={size_diskann_r / 1024 / 1024:.1f}MB") + print(f" search recompute=False: {t_diskann_nr:.3f}s (on recompute-build)") + print(f" search recompute=True (final rerank): {t_diskann_r:.3f}s (on recompute-build)") + except Exception as e: + print(f"DiskANN quick benchmark skipped due to: {e}") + if __name__ == "__main__": main() diff --git a/docs/configuration-guide.md b/docs/configuration-guide.md index d444c3c..76bf15c 100644 --- a/docs/configuration-guide.md +++ b/docs/configuration-guide.md @@ -363,12 +363,23 @@ Trade-offs: Real-world quick benchmark (HNSW, 5k texts; script `benchmarks/benchmark_no_recompute.py`): ```text -recompute=True: ~6.58s; size ~1.1MB -recompute=False: ~0.10s; size ~16.6MB +recompute=True: ~7.55s; size ~1.1MB +recompute=False: ~0.11s; size ~16.6MB Conclusion: no-recompute is much faster but uses more storage; recompute is smaller but has higher first-hop latency. ``` +DiskANN (5k texts; same script, final rerank strategy): + +```text +build(recompute=False): size ~24.8MB +build(recompute=True, partition): size ~5.7MB +search recompute=False: ~0.250s (on recompute-build) +search recompute=True (final rerank): ~0.120s (on recompute-build) + +Conclusion: DiskANN's recompute-build enables partitioning to reduce storage; enabling final rerank further improves latency while keeping traversal PQ-fast. +``` + ## Further Reading diff --git a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py index 6ef84cc..3b9ceac 100644 --- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py +++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py @@ -442,8 +442,14 @@ class DiskannSearcher(BaseSearcher): use_global_pruning = True # Perform search with suppressed C++ output based on log level - use_deferred_fetch = kwargs.get("USE_DEFERRED_FETCH", True) + # Strategy: + # - Traversal always uses PQ distances + # - If recompute_embeddings=True, do a single final rerank via deferred fetch + # (fetch embeddings for the final candidate set only) + # - Do not recompute neighbor distances along the path + use_deferred_fetch = True if recompute_embeddings else False recompute_neighors = False + with suppress_cpp_output_if_needed(): labels, distances = self._index.batch_search( query, diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py index 665e1bd..11bbcee 100644 --- a/packages/leann-core/src/leann/chat.py +++ b/packages/leann-core/src/leann/chat.py @@ -422,7 +422,6 @@ class LLMInterface(ABC): top_k=10, complexity=64, beam_width=8, - USE_DEFERRED_FETCH=True, skip_search_reorder=True, recompute_beighbor_embeddings=True, dedup_node_dis=True, @@ -434,7 +433,6 @@ class LLMInterface(ABC): Supported kwargs: - complexity (int): Search complexity parameter (default: 32) - beam_width (int): Beam width for search (default: 4) - - USE_DEFERRED_FETCH (bool): Enable deferred fetch mode (default: False) - skip_search_reorder (bool): Skip search reorder step (default: False) - recompute_beighbor_embeddings (bool): Enable ZMQ embedding server for neighbor recomputation (default: False) - dedup_node_dis (bool): Deduplicate nodes by distance (default: False)