From d7011bbea02fbba90171eee2b2c0b3a9c5dff4ed Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Mon, 25 Aug 2025 16:25:59 -0700 Subject: [PATCH] docs: data --- .gitmodules | 2 -- benchmarks/bm25_diskann_baselines/README.md | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.gitmodules b/.gitmodules index 813256e..c1cd540 100644 --- a/.gitmodules +++ b/.gitmodules @@ -14,5 +14,3 @@ [submodule "packages/leann-backend-hnsw/third_party/libzmq"] path = packages/leann-backend-hnsw/third_party/libzmq url = https://github.com/zeromq/libzmq.git - -# Ensure CI can update this submodule; used only for Arch packaging and not required for builds. diff --git a/benchmarks/bm25_diskann_baselines/README.md b/benchmarks/bm25_diskann_baselines/README.md index 297a067..cd624ef 100644 --- a/benchmarks/bm25_diskann_baselines/README.md +++ b/benchmarks/bm25_diskann_baselines/README.md @@ -11,13 +11,13 @@ aws s3 sync s3://powerrag-diskann-rpj-wiki-20250824-224037-194d640c/diskann_rpj_ DiskANN (NQ queries, search-only) - Command: `uv run --script benchmarks/bm25_diskann_baselines/run_diskann.py` - Settings: `recompute_embeddings=False`, embeddings precomputed (excluded from timing), batching off, caching off (`cache_mechanism=2`, `num_nodes_to_cache=0`) -- Result: avg 0.019339 s/query, QPS 51.71 (p50 ~0.018936 s, p95 ~0.023573 s) +- Result: avg 0.011093 s/query, QPS 90.15 (p50 0.010731 s, p95 0.015000 s) BM25 - Command: `uv run --script benchmarks/bm25_diskann_baselines/run_bm25.py` - Settings: `k=10`, `k1=0.9`, `b=0.4`, queries=100 -- Result: avg 0.026976 s/query, QPS 37.07 (p50 0.024729 s, p90 0.042158 s, p95 0.047099 s, p99 0.053520 s) +- Result: avg 0.028589 s/query, QPS 34.97 (p50 0.026060 s, p90 0.043695 s, p95 0.053260 s, p99 0.055257 s) Notes - DiskANN measures search-only latency on real NQ queries (embeddings computed beforehand and excluded from timing). -- Use `benchmarks/bm25_diskann_baselines/run_diskann.py` for DiskANN; `benchmarks/run_bm25.py` for BM25. +- Use `benchmarks/bm25_diskann_baselines/run_diskann.py` for DiskANN; `benchmarks/bm25_diskann_baselines/run_bm25.py` for BM25.