diff --git a/benchmarks/benchmark_no_recompute.py b/benchmarks/benchmark_no_recompute.py index 8f218d1..21b1951 100644 --- a/benchmarks/benchmark_no_recompute.py +++ b/benchmarks/benchmark_no_recompute.py @@ -61,12 +61,19 @@ def main(): t_recompute = bench_once(index_path_recompute, recompute=True) t_norecompute = bench_once(index_path_norecompute, recompute=False) - size_recompute = sum( - f.stat().st_size for f in Path(index_path_recompute).parent.iterdir() if f.is_file() - ) - size_norecompute = sum( - f.stat().st_size for f in Path(index_path_norecompute).parent.iterdir() if f.is_file() - ) + # Compute sizes only for files belonging to each index prefix + def _size_for(prefix: str) -> int: + p = Path(prefix) + base = p.parent + stem = p.stem # e.g., 'recompute.leann' + total = 0 + for f in base.iterdir(): + if f.is_file() and f.name.startswith(stem): + total += f.stat().st_size + return total + + size_recompute = _size_for(index_path_recompute) + size_norecompute = _size_for(index_path_norecompute) print("Benchmark results (HNSW):") print( diff --git a/docs/configuration-guide.md b/docs/configuration-guide.md index a7393ce..d444c3c 100644 --- a/docs/configuration-guide.md +++ b/docs/configuration-guide.md @@ -360,6 +360,15 @@ Trade-offs: - Significantly higher storage (10–100× vs selective recomputation) - Slightly larger memory footprint during build and search +Real-world quick benchmark (HNSW, 5k texts; script `benchmarks/benchmark_no_recompute.py`): + +```text +recompute=True: ~6.58s; size ~1.1MB +recompute=False: ~0.10s; size ~16.6MB + +Conclusion: no-recompute is much faster but uses more storage; recompute is smaller but has higher first-hop latency. +``` + ## Further Reading