From ed72232babc8e43f5c17bfdaedd54c36fe4f05e6 Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Fri, 22 Aug 2025 13:50:57 -0700 Subject: [PATCH] style: format --- .../financebench/evaluate_financebench.py | 2 +- benchmarks/laion/.gitignore | 2 +- benchmarks/laion/README.md | 2 +- benchmarks/laion/evaluate_laion.py | 9 +++-- .../leann_backend_hnsw/hnsw_backend.py | 34 +++++-------------- packages/leann-core/src/leann/api.py | 10 ++++-- paru-bin | 1 + 7 files changed, 28 insertions(+), 32 deletions(-) create mode 160000 paru-bin diff --git a/benchmarks/financebench/evaluate_financebench.py b/benchmarks/financebench/evaluate_financebench.py index ac05cce..948b355 100755 --- a/benchmarks/financebench/evaluate_financebench.py +++ b/benchmarks/financebench/evaluate_financebench.py @@ -482,7 +482,7 @@ class FinanceBenchEvaluator: self, generated_answer: str, ground_truth: str, question: str ) -> bool: """Check if generated answer matches ground truth using LLM as judge""" - judge_prompt = f"""You are an expert judge evaluating financial question answering. + judge_prompt = f"""You are an expert judge evaluating financial question answering. Question: {question} diff --git a/benchmarks/laion/.gitignore b/benchmarks/laion/.gitignore index adbb97d..8fce603 100644 --- a/benchmarks/laion/.gitignore +++ b/benchmarks/laion/.gitignore @@ -1 +1 @@ -data/ \ No newline at end of file +data/ diff --git a/benchmarks/laion/README.md b/benchmarks/laion/README.md index 38650f0..516f347 100644 --- a/benchmarks/laion/README.md +++ b/benchmarks/laion/README.md @@ -166,4 +166,4 @@ benchmarks/laion/ - For real LAION data, implement actual download logic in `setup_laion.py` - CLIP embeddings are randomly generated - replace with real CLIP model for production - Adjust `num_samples` and `num_queries` based on available resources -- Consider using `--num-samples` during evaluation for faster testing \ No newline at end of file +- Consider using `--num-samples` during evaluation for faster testing diff --git a/benchmarks/laion/evaluate_laion.py b/benchmarks/laion/evaluate_laion.py index eaafa8b..3b68480 100644 --- a/benchmarks/laion/evaluate_laion.py +++ b/benchmarks/laion/evaluate_laion.py @@ -323,7 +323,10 @@ class LAIONEvaluator: f" Storage saving by compact: {timing_metrics.get('storage_saving_percent', 0):.1f}%" ) # Show excluded components for reference if available - if any(k in non_compact for k in ("passages_text_mb", "passages_index_mb", "metadata_mb")): + if any( + k in non_compact + for k in ("passages_text_mb", "passages_index_mb", "metadata_mb") + ): print(" (passages excluded in totals, shown for reference):") print( f" - Passages text: {non_compact.get('passages_text_mb', 0):.1f} MB, " @@ -333,7 +336,9 @@ class LAIONEvaluator: else: # Fallback to legacy totals if running with older metrics print("\nšŸ“ Index Comparison Analysis:") - print(f" Compact index (current): {current.get('total_with_embeddings', 0):.1f} MB") + print( + f" Compact index (current): {current.get('total_with_embeddings', 0):.1f} MB" + ) print( f" Non-compact index (with embeddings): {non_compact.get('total_with_embeddings', 0):.1f} MB" ) diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py index 31c1524..4af18e2 100644 --- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py @@ -118,16 +118,12 @@ class HNSWBuilder(LeannBackendBuilderInterface): # index_file_old = index_file.with_suffix(".old") # shutil.move(str(index_file), str(index_file_old)) shutil.move(str(csr_temp_file), str(index_file)) - logger.info( - f"INFO: Replaced original index with {mode_str} version at '{index_file}'" - ) + logger.info(f"INFO: Replaced original index with {mode_str} version at '{index_file}'") else: # Clean up and fail fast if csr_temp_file.exists(): os.remove(csr_temp_file) - raise RuntimeError( - "CSR conversion failed - cannot proceed with compact format" - ) + raise RuntimeError("CSR conversion failed - cannot proceed with compact format") class HNSWSearcher(BaseSearcher): @@ -216,9 +212,7 @@ class HNSWSearcher(BaseSearcher): ) if recompute_embeddings: if zmq_port is None: - raise ValueError( - "zmq_port must be provided if recompute_embeddings is True" - ) + raise ValueError("zmq_port must be provided if recompute_embeddings is True") if query.dtype != np.float32: query = query.astype(np.float32) @@ -227,9 +221,7 @@ class HNSWSearcher(BaseSearcher): params = faiss.SearchParametersHNSW() if zmq_port is not None: - params.zmq_port = ( - zmq_port # C++ code won't use this if recompute_embeddings is False - ) + params.zmq_port = zmq_port # C++ code won't use this if recompute_embeddings is False params.efSearch = complexity params.beam_size = beam_width @@ -237,8 +229,7 @@ class HNSWSearcher(BaseSearcher): # This prevents early termination when all scores are in a narrow range embedding_model = self.meta.get("embedding_model", "").lower() if self.distance_metric == "cosine" and any( - openai_model in embedding_model - for openai_model in ["text-embedding", "openai"] + openai_model in embedding_model for openai_model in ["text-embedding", "openai"] ): params.check_relative_distance = False else: @@ -253,9 +244,7 @@ class HNSWSearcher(BaseSearcher): params.send_neigh_times_ratio = 0.0 elif pruning_strategy == "proportional": params.local_prune = False - params.send_neigh_times_ratio = ( - 1.0 # Any value > 1e-6 triggers proportional mode - ) + params.send_neigh_times_ratio = 1.0 # Any value > 1e-6 triggers proportional mode else: # "global" params.local_prune = False params.send_neigh_times_ratio = 0.0 @@ -277,9 +266,7 @@ class HNSWSearcher(BaseSearcher): params, ) search_time = time.time() - search_time - logger.info( - f" Search time in HNSWSearcher.search() backend: {search_time} seconds" - ) + logger.info(f" Search time in HNSWSearcher.search() backend: {search_time} seconds") if self._id_map: def map_label(x: int) -> str: @@ -287,13 +274,10 @@ class HNSWSearcher(BaseSearcher): return self._id_map[x] return str(x) - string_labels = [ - [map_label(int(l)) for l in batch_labels] for batch_labels in labels - ] + string_labels = [[map_label(int(l)) for l in batch_labels] for batch_labels in labels] else: string_labels = [ - [str(int_label) for int_label in batch_labels] - for batch_labels in labels + [str(int_label) for int_label in batch_labels] for batch_labels in labels ] return {"labels": string_labels, "distances": distances} diff --git a/packages/leann-core/src/leann/api.py b/packages/leann-core/src/leann/api.py index 8ae2e67..49f61a6 100644 --- a/packages/leann-core/src/leann/api.py +++ b/packages/leann-core/src/leann/api.py @@ -447,7 +447,10 @@ class LeannBuilder: string_ids = [chunk["id"] for chunk in self.chunks] # Persist ID map alongside index so backends that return integer labels can remap to passage IDs try: - idmap_file = index_dir / f"{index_name[: -len('.leann')] if index_name.endswith('.leann') else index_name}.ids.txt" + idmap_file = ( + index_dir + / f"{index_name[: -len('.leann')] if index_name.endswith('.leann') else index_name}.ids.txt" + ) with open(idmap_file, "w", encoding="utf-8") as f: for sid in string_ids: f.write(str(sid) + "\n") @@ -573,7 +576,10 @@ class LeannBuilder: string_ids = [str(id_val) for id_val in ids] # Persist ID map (order == embeddings order) try: - idmap_file = index_dir / f"{index_name[: -len('.leann')] if index_name.endswith('.leann') else index_name}.ids.txt" + idmap_file = ( + index_dir + / f"{index_name[: -len('.leann')] if index_name.endswith('.leann') else index_name}.ids.txt" + ) with open(idmap_file, "w", encoding="utf-8") as f: for sid in string_ids: f.write(str(sid) + "\n") diff --git a/paru-bin b/paru-bin new file mode 160000 index 0000000..92a5542 --- /dev/null +++ b/paru-bin @@ -0,0 +1 @@ +Subproject commit 92a55429afbec4fceeb2cef843245105307444d2