From ed72232babc8e43f5c17bfdaedd54c36fe4f05e6 Mon Sep 17 00:00:00 2001
From: Andy Lee <andylizf@outlook.com>
Date: Fri, 22 Aug 2025 13:50:57 -0700
Subject: [PATCH] style: format

---
 .../financebench/evaluate_financebench.py     |  2 +-
 benchmarks/laion/.gitignore                   |  2 +-
 benchmarks/laion/README.md                    |  2 +-
 benchmarks/laion/evaluate_laion.py            |  9 +++--
 .../leann_backend_hnsw/hnsw_backend.py        | 34 +++++--------------
 packages/leann-core/src/leann/api.py          | 10 ++++--
 paru-bin                                      |  1 +
 7 files changed, 28 insertions(+), 32 deletions(-)
 create mode 160000 paru-bin

diff --git a/benchmarks/financebench/evaluate_financebench.py b/benchmarks/financebench/evaluate_financebench.py
index ac05cce..948b355 100755
--- a/benchmarks/financebench/evaluate_financebench.py
+++ b/benchmarks/financebench/evaluate_financebench.py
@@ -482,7 +482,7 @@ class FinanceBenchEvaluator:
         self, generated_answer: str, ground_truth: str, question: str
     ) -> bool:
         """Check if generated answer matches ground truth using LLM as judge"""
-        judge_prompt = f"""You are an expert judge evaluating financial question answering. 
+        judge_prompt = f"""You are an expert judge evaluating financial question answering.
 
 Question: {question}
 
diff --git a/benchmarks/laion/.gitignore b/benchmarks/laion/.gitignore
index adbb97d..8fce603 100644
--- a/benchmarks/laion/.gitignore
+++ b/benchmarks/laion/.gitignore
@@ -1 +1 @@
-data/
\ No newline at end of file
+data/
diff --git a/benchmarks/laion/README.md b/benchmarks/laion/README.md
index 38650f0..516f347 100644
--- a/benchmarks/laion/README.md
+++ b/benchmarks/laion/README.md
@@ -166,4 +166,4 @@ benchmarks/laion/
 - For real LAION data, implement actual download logic in `setup_laion.py`
 - CLIP embeddings are randomly generated - replace with real CLIP model for production
 - Adjust `num_samples` and `num_queries` based on available resources
-- Consider using `--num-samples` during evaluation for faster testing
\ No newline at end of file
+- Consider using `--num-samples` during evaluation for faster testing
diff --git a/benchmarks/laion/evaluate_laion.py b/benchmarks/laion/evaluate_laion.py
index eaafa8b..3b68480 100644
--- a/benchmarks/laion/evaluate_laion.py
+++ b/benchmarks/laion/evaluate_laion.py
@@ -323,7 +323,10 @@ class LAIONEvaluator:
                     f"  Storage saving by compact: {timing_metrics.get('storage_saving_percent', 0):.1f}%"
                 )
                 # Show excluded components for reference if available
-                if any(k in non_compact for k in ("passages_text_mb", "passages_index_mb", "metadata_mb")):
+                if any(
+                    k in non_compact
+                    for k in ("passages_text_mb", "passages_index_mb", "metadata_mb")
+                ):
                     print("  (passages excluded in totals, shown for reference):")
                     print(
                         f"    - Passages text: {non_compact.get('passages_text_mb', 0):.1f} MB, "
@@ -333,7 +336,9 @@ class LAIONEvaluator:
             else:
                 # Fallback to legacy totals if running with older metrics
                 print("\n📏 Index Comparison Analysis:")
-                print(f"  Compact index (current): {current.get('total_with_embeddings', 0):.1f} MB")
+                print(
+                    f"  Compact index (current): {current.get('total_with_embeddings', 0):.1f} MB"
+                )
                 print(
                     f"  Non-compact index (with embeddings): {non_compact.get('total_with_embeddings', 0):.1f} MB"
                 )
diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
index 31c1524..4af18e2 100644
--- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
+++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py
@@ -118,16 +118,12 @@ class HNSWBuilder(LeannBackendBuilderInterface):
             # index_file_old = index_file.with_suffix(".old")
             # shutil.move(str(index_file), str(index_file_old))
             shutil.move(str(csr_temp_file), str(index_file))
-            logger.info(
-                f"INFO: Replaced original index with {mode_str} version at '{index_file}'"
-            )
+            logger.info(f"INFO: Replaced original index with {mode_str} version at '{index_file}'")
         else:
             # Clean up and fail fast
             if csr_temp_file.exists():
                 os.remove(csr_temp_file)
-            raise RuntimeError(
-                "CSR conversion failed - cannot proceed with compact format"
-            )
+            raise RuntimeError("CSR conversion failed - cannot proceed with compact format")
 
 
 class HNSWSearcher(BaseSearcher):
@@ -216,9 +212,7 @@ class HNSWSearcher(BaseSearcher):
             )
         if recompute_embeddings:
             if zmq_port is None:
-                raise ValueError(
-                    "zmq_port must be provided if recompute_embeddings is True"
-                )
+                raise ValueError("zmq_port must be provided if recompute_embeddings is True")
 
         if query.dtype != np.float32:
             query = query.astype(np.float32)
@@ -227,9 +221,7 @@ class HNSWSearcher(BaseSearcher):
 
         params = faiss.SearchParametersHNSW()
         if zmq_port is not None:
-            params.zmq_port = (
-                zmq_port  # C++ code won't use this if recompute_embeddings is False
-            )
+            params.zmq_port = zmq_port  # C++ code won't use this if recompute_embeddings is False
         params.efSearch = complexity
         params.beam_size = beam_width
 
@@ -237,8 +229,7 @@ class HNSWSearcher(BaseSearcher):
         # This prevents early termination when all scores are in a narrow range
         embedding_model = self.meta.get("embedding_model", "").lower()
         if self.distance_metric == "cosine" and any(
-            openai_model in embedding_model
-            for openai_model in ["text-embedding", "openai"]
+            openai_model in embedding_model for openai_model in ["text-embedding", "openai"]
         ):
             params.check_relative_distance = False
         else:
@@ -253,9 +244,7 @@ class HNSWSearcher(BaseSearcher):
             params.send_neigh_times_ratio = 0.0
         elif pruning_strategy == "proportional":
             params.local_prune = False
-            params.send_neigh_times_ratio = (
-                1.0  # Any value > 1e-6 triggers proportional mode
-            )
+            params.send_neigh_times_ratio = 1.0  # Any value > 1e-6 triggers proportional mode
         else:  # "global"
             params.local_prune = False
             params.send_neigh_times_ratio = 0.0
@@ -277,9 +266,7 @@ class HNSWSearcher(BaseSearcher):
             params,
         )
         search_time = time.time() - search_time
-        logger.info(
-            f"  Search time in HNSWSearcher.search() backend: {search_time} seconds"
-        )
+        logger.info(f"  Search time in HNSWSearcher.search() backend: {search_time} seconds")
         if self._id_map:
 
             def map_label(x: int) -> str:
@@ -287,13 +274,10 @@ class HNSWSearcher(BaseSearcher):
                     return self._id_map[x]
                 return str(x)
 
-            string_labels = [
-                [map_label(int(l)) for l in batch_labels] for batch_labels in labels
-            ]
+            string_labels = [[map_label(int(l)) for l in batch_labels] for batch_labels in labels]
         else:
             string_labels = [
-                [str(int_label) for int_label in batch_labels]
-                for batch_labels in labels
+                [str(int_label) for int_label in batch_labels] for batch_labels in labels
             ]
 
         return {"labels": string_labels, "distances": distances}
diff --git a/packages/leann-core/src/leann/api.py b/packages/leann-core/src/leann/api.py
index 8ae2e67..49f61a6 100644
--- a/packages/leann-core/src/leann/api.py
+++ b/packages/leann-core/src/leann/api.py
@@ -447,7 +447,10 @@ class LeannBuilder:
         string_ids = [chunk["id"] for chunk in self.chunks]
         # Persist ID map alongside index so backends that return integer labels can remap to passage IDs
         try:
-            idmap_file = index_dir / f"{index_name[: -len('.leann')] if index_name.endswith('.leann') else index_name}.ids.txt"
+            idmap_file = (
+                index_dir
+                / f"{index_name[: -len('.leann')] if index_name.endswith('.leann') else index_name}.ids.txt"
+            )
             with open(idmap_file, "w", encoding="utf-8") as f:
                 for sid in string_ids:
                     f.write(str(sid) + "\n")
@@ -573,7 +576,10 @@ class LeannBuilder:
         string_ids = [str(id_val) for id_val in ids]
         # Persist ID map (order == embeddings order)
         try:
-            idmap_file = index_dir / f"{index_name[: -len('.leann')] if index_name.endswith('.leann') else index_name}.ids.txt"
+            idmap_file = (
+                index_dir
+                / f"{index_name[: -len('.leann')] if index_name.endswith('.leann') else index_name}.ids.txt"
+            )
             with open(idmap_file, "w", encoding="utf-8") as f:
                 for sid in string_ids:
                     f.write(str(sid) + "\n")
diff --git a/paru-bin b/paru-bin
new file mode 160000
index 0000000..92a5542
--- /dev/null
+++ b/paru-bin
@@ -0,0 +1 @@
+Subproject commit 92a55429afbec4fceeb2cef843245105307444d2