[EXP] Update the benchmark code (#71)

* chore(hnsw): reorder imports to satisfy ruff I001 * chore: sync changes; fix Ruff import order; update examples, benchmarks, and dependencies - Fix import order in packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py (Ruff I001) - Update benchmarks/run_evaluation.py - Update apps/base_rag_example.py and leann-core API usage - Add benchmarks/data/README.md - Update uv.lock - Misc cleanup - Note: added paru-bin as an embedded git repo; consider making it a submodule (git rm --cached paru-bin) if unintended * chore: remove unintended embedded repo paru-bin and ignore it Fix CI: avoid missing .gitmodules entry by removing gitlink and adding to .gitignore. * ci: retrigger after removing unintended gitlink (paru-bin) * feat(benchmarks): add --batch-size option and plumb through to HNSW search (default 0) * feat(hnsw): add batch_size to LeannSearcher.search and LeannChat.ask; forward only for HNSW backend * chore(logging): surface recompute and batching params; enable INFO logging in benchmark * feat(embeddings): add optional manual tokenization path (HF tokenizer+model) with mean pooling; default remains SentenceTransformer.encode * fix micro bench and fix pre commit * update readme --------- Co-authored-by: yichuan-w <yichuan-w@users.noreply.github.com>
2025-08-20 17:31:46 -07:00
parent 6d11e86e71
commit dde2221513
11 changed files with 296 additions and 302 deletions
--- a/benchmarks/simple_mac_tpt_test.py
+++ b/benchmarks/simple_mac_tpt_test.py
@@ -20,7 +20,7 @@ except ImportError:

@dataclass
 class BenchmarkConfig:
-    model_path: str = "facebook/contriever"
+    model_path: str = "facebook/contriever-msmarco"
    batch_sizes: list[int] = None
    seq_length: int = 256
    num_runs: int = 5
@@ -34,7 +34,7 @@ class BenchmarkConfig:

    def __post_init__(self):
        if self.batch_sizes is None:
-            self.batch_sizes = [1, 2, 4, 8, 16, 32, 64]
+            self.batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]


 class MLXBenchmark:
@@ -179,11 +179,14 @@ class Benchmark:

    def _run_inference(self, input_ids: torch.Tensor) -> float:
        attention_mask = torch.ones_like(input_ids)
-
+        # print shape of input_ids and attention_mask
+        print(f"input_ids shape: {input_ids.shape}")
+        print(f"attention_mask shape: {attention_mask.shape}")
        start_time = time.time()
        with torch.no_grad():
            self.model(input_ids=input_ids, attention_mask=attention_mask)
-        # mps sync
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
        if torch.backends.mps.is_available():
            torch.mps.synchronize()
        end_time = time.time()