From d3e6cfa1f71c9693bc5094e194a4ec59fa87b26f Mon Sep 17 00:00:00 2001 From: aakash Date: Mon, 6 Oct 2025 14:40:04 -0700 Subject: [PATCH] fix: Resolve SentenceTransformer model_kwargs parameter conflict - Fix local_files_only parameter conflict in embedding_compute.py - Create separate copies of model_kwargs and tokenizer_kwargs for local vs network loading - Prevents parameter conflicts when falling back from local to network loading - Resolves TypeError in test_readme_examples.py tests This addresses the SentenceTransformer initialization issues in CI tests. --- .../leann-core/src/leann/embedding_compute.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/leann-core/src/leann/embedding_compute.py b/packages/leann-core/src/leann/embedding_compute.py index a01bd3b..fafa1a0 100644 --- a/packages/leann-core/src/leann/embedding_compute.py +++ b/packages/leann-core/src/leann/embedding_compute.py @@ -184,28 +184,32 @@ def compute_embeddings_sentence_transformers( try: # Try local loading first - model_kwargs["local_files_only"] = True - tokenizer_kwargs["local_files_only"] = True + local_model_kwargs = model_kwargs.copy() + local_tokenizer_kwargs = tokenizer_kwargs.copy() + local_model_kwargs["local_files_only"] = True + local_tokenizer_kwargs["local_files_only"] = True model = SentenceTransformer( model_name, device=device, - model_kwargs=model_kwargs, - tokenizer_kwargs=tokenizer_kwargs, + model_kwargs=local_model_kwargs, + tokenizer_kwargs=local_tokenizer_kwargs, local_files_only=True, ) logger.info("Model loaded successfully! (local + optimized)") except Exception as e: logger.warning(f"Local loading failed ({e}), trying network download...") # Fallback to network loading - model_kwargs["local_files_only"] = False - tokenizer_kwargs["local_files_only"] = False + network_model_kwargs = model_kwargs.copy() + network_tokenizer_kwargs = tokenizer_kwargs.copy() + network_model_kwargs["local_files_only"] = False + network_tokenizer_kwargs["local_files_only"] = False model = SentenceTransformer( model_name, device=device, - model_kwargs=model_kwargs, - tokenizer_kwargs=tokenizer_kwargs, + model_kwargs=network_model_kwargs, + tokenizer_kwargs=network_tokenizer_kwargs, local_files_only=False, ) logger.info("Model loaded successfully! (network + optimized)")