benchmarks: fix and extend HNSW+DiskANN recompute vs no-recompute; docs: add fresh numbers and DiskANN notes
This commit is contained in:
@@ -84,6 +84,80 @@ def main():
|
|||||||
)
|
)
|
||||||
print("Expectation: no-recompute should be faster but larger on disk.")
|
print("Expectation: no-recompute should be faster but larger on disk.")
|
||||||
|
|
||||||
|
# DiskANN quick benchmark (final rerank vs no-recompute)
|
||||||
|
try:
|
||||||
|
index_path_diskann_nr = str(base / "diskann_nr.leann")
|
||||||
|
index_path_diskann_r = str(base / "diskann_r.leann")
|
||||||
|
|
||||||
|
# Build DiskANN no-recompute (keeps full disk index)
|
||||||
|
if not (
|
||||||
|
Path(index_path_diskann_nr).parent / f"{Path(index_path_diskann_nr).stem}.meta.json"
|
||||||
|
).exists():
|
||||||
|
b = LeannBuilder(
|
||||||
|
backend_name="diskann",
|
||||||
|
embedding_model=os.getenv("LEANN_EMBED_MODEL", "facebook/contriever"),
|
||||||
|
embedding_mode=os.getenv("LEANN_EMBED_MODE", "sentence-transformers"),
|
||||||
|
graph_degree=32,
|
||||||
|
complexity=64,
|
||||||
|
num_threads=4,
|
||||||
|
is_recompute=False,
|
||||||
|
)
|
||||||
|
for i in range(5000):
|
||||||
|
b.add_text(f"DiskANN NR test doc {i} for quick benchmark.")
|
||||||
|
b.build_index(index_path_diskann_nr)
|
||||||
|
|
||||||
|
# Build DiskANN recompute (enables partition; prunes redundant files)
|
||||||
|
if not (
|
||||||
|
Path(index_path_diskann_r).parent / f"{Path(index_path_diskann_r).stem}.meta.json"
|
||||||
|
).exists():
|
||||||
|
b = LeannBuilder(
|
||||||
|
backend_name="diskann",
|
||||||
|
embedding_model=os.getenv("LEANN_EMBED_MODEL", "facebook/contriever"),
|
||||||
|
embedding_mode=os.getenv("LEANN_EMBED_MODE", "sentence-transformers"),
|
||||||
|
graph_degree=32,
|
||||||
|
complexity=64,
|
||||||
|
num_threads=4,
|
||||||
|
is_recompute=True,
|
||||||
|
)
|
||||||
|
for i in range(5000):
|
||||||
|
b.add_text(f"DiskANN R test doc {i} for quick benchmark.")
|
||||||
|
b.build_index(index_path_diskann_r)
|
||||||
|
|
||||||
|
# Measure size per build prefix
|
||||||
|
def _size_for(prefix: str) -> int:
|
||||||
|
p = Path(prefix)
|
||||||
|
base_dir = p.parent
|
||||||
|
stem = p.stem
|
||||||
|
total = 0
|
||||||
|
for f in base_dir.iterdir():
|
||||||
|
if f.is_file() and f.name.startswith(stem):
|
||||||
|
total += f.stat().st_size
|
||||||
|
return total
|
||||||
|
|
||||||
|
size_diskann_nr = _size_for(index_path_diskann_nr)
|
||||||
|
size_diskann_r = _size_for(index_path_diskann_r)
|
||||||
|
|
||||||
|
# Speed on recompute-build (final rerank vs no-recompute)
|
||||||
|
s = LeannSearcher(index_path_diskann_r)
|
||||||
|
_ = s.search("DiskANN R test doc 123", top_k=10, complexity=64, recompute_embeddings=False)
|
||||||
|
_ = s.search("DiskANN R test doc 123", top_k=10, complexity=64, recompute_embeddings=True)
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
_ = s.search("DiskANN R test doc 123", top_k=10, complexity=64, recompute_embeddings=False)
|
||||||
|
t_diskann_nr = time.time() - t0
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
_ = s.search("DiskANN R test doc 123", top_k=10, complexity=64, recompute_embeddings=True)
|
||||||
|
t_diskann_r = time.time() - t0
|
||||||
|
|
||||||
|
print("\nBenchmark results (DiskANN):")
|
||||||
|
print(f" build(recompute=False): size={size_diskann_nr / 1024 / 1024:.1f}MB")
|
||||||
|
print(f" build(recompute=True, partition): size={size_diskann_r / 1024 / 1024:.1f}MB")
|
||||||
|
print(f" search recompute=False: {t_diskann_nr:.3f}s (on recompute-build)")
|
||||||
|
print(f" search recompute=True (final rerank): {t_diskann_r:.3f}s (on recompute-build)")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"DiskANN quick benchmark skipped due to: {e}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -363,12 +363,23 @@ Trade-offs:
|
|||||||
Real-world quick benchmark (HNSW, 5k texts; script `benchmarks/benchmark_no_recompute.py`):
|
Real-world quick benchmark (HNSW, 5k texts; script `benchmarks/benchmark_no_recompute.py`):
|
||||||
|
|
||||||
```text
|
```text
|
||||||
recompute=True: ~6.58s; size ~1.1MB
|
recompute=True: ~7.55s; size ~1.1MB
|
||||||
recompute=False: ~0.10s; size ~16.6MB
|
recompute=False: ~0.11s; size ~16.6MB
|
||||||
|
|
||||||
Conclusion: no-recompute is much faster but uses more storage; recompute is smaller but has higher first-hop latency.
|
Conclusion: no-recompute is much faster but uses more storage; recompute is smaller but has higher first-hop latency.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
DiskANN (5k texts; same script, final rerank strategy):
|
||||||
|
|
||||||
|
```text
|
||||||
|
build(recompute=False): size ~24.8MB
|
||||||
|
build(recompute=True, partition): size ~5.7MB
|
||||||
|
search recompute=False: ~0.250s (on recompute-build)
|
||||||
|
search recompute=True (final rerank): ~0.120s (on recompute-build)
|
||||||
|
|
||||||
|
Conclusion: DiskANN's recompute-build enables partitioning to reduce storage; enabling final rerank further improves latency while keeping traversal PQ-fast.
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Further Reading
|
## Further Reading
|
||||||
|
|
||||||
|
|||||||
@@ -442,8 +442,14 @@ class DiskannSearcher(BaseSearcher):
|
|||||||
use_global_pruning = True
|
use_global_pruning = True
|
||||||
|
|
||||||
# Perform search with suppressed C++ output based on log level
|
# Perform search with suppressed C++ output based on log level
|
||||||
use_deferred_fetch = kwargs.get("USE_DEFERRED_FETCH", True)
|
# Strategy:
|
||||||
|
# - Traversal always uses PQ distances
|
||||||
|
# - If recompute_embeddings=True, do a single final rerank via deferred fetch
|
||||||
|
# (fetch embeddings for the final candidate set only)
|
||||||
|
# - Do not recompute neighbor distances along the path
|
||||||
|
use_deferred_fetch = True if recompute_embeddings else False
|
||||||
recompute_neighors = False
|
recompute_neighors = False
|
||||||
|
|
||||||
with suppress_cpp_output_if_needed():
|
with suppress_cpp_output_if_needed():
|
||||||
labels, distances = self._index.batch_search(
|
labels, distances = self._index.batch_search(
|
||||||
query,
|
query,
|
||||||
|
|||||||
@@ -422,7 +422,6 @@ class LLMInterface(ABC):
|
|||||||
top_k=10,
|
top_k=10,
|
||||||
complexity=64,
|
complexity=64,
|
||||||
beam_width=8,
|
beam_width=8,
|
||||||
USE_DEFERRED_FETCH=True,
|
|
||||||
skip_search_reorder=True,
|
skip_search_reorder=True,
|
||||||
recompute_beighbor_embeddings=True,
|
recompute_beighbor_embeddings=True,
|
||||||
dedup_node_dis=True,
|
dedup_node_dis=True,
|
||||||
@@ -434,7 +433,6 @@ class LLMInterface(ABC):
|
|||||||
Supported kwargs:
|
Supported kwargs:
|
||||||
- complexity (int): Search complexity parameter (default: 32)
|
- complexity (int): Search complexity parameter (default: 32)
|
||||||
- beam_width (int): Beam width for search (default: 4)
|
- beam_width (int): Beam width for search (default: 4)
|
||||||
- USE_DEFERRED_FETCH (bool): Enable deferred fetch mode (default: False)
|
|
||||||
- skip_search_reorder (bool): Skip search reorder step (default: False)
|
- skip_search_reorder (bool): Skip search reorder step (default: False)
|
||||||
- recompute_beighbor_embeddings (bool): Enable ZMQ embedding server for neighbor recomputation (default: False)
|
- recompute_beighbor_embeddings (bool): Enable ZMQ embedding server for neighbor recomputation (default: False)
|
||||||
- dedup_node_dis (bool): Deduplicate nodes by distance (default: False)
|
- dedup_node_dis (bool): Deduplicate nodes by distance (default: False)
|
||||||
|
|||||||
Reference in New Issue
Block a user