From 969f5145645d0584b97ed74a802dce6194016be7 Mon Sep 17 00:00:00 2001 From: ww26 Date: Sun, 16 Nov 2025 23:56:42 -0500 Subject: [PATCH] Fix prompt template bugs: build template ignored and runtime override not wired (#173) * Fix prompt template bugs in build and search Bug 1: Build template ignored in new format - Updated compute_embeddings_openai() to read build_prompt_template or prompt_template - Updated compute_embeddings_ollama() with same fix - Maintains backward compatibility with old single-template format Bug 2: Runtime override not wired up - Wired CLI search to pass provider_options to searcher.search() - Enables runtime template override during search via --embedding-prompt-template All 42 prompt template tests passing. Fixes #155 * Fix: Prevent embedding server from applying templates during search - Filter out all prompt templates (build_prompt_template, query_prompt_template, prompt_template) from provider_options when launching embedding server during search - Templates are already applied in compute_query_embedding() before server call - Prevents double-templating and ensures runtime override works correctly This fixes the issue where --embedding-prompt-template during search was ignored because the server was applying build_prompt_template instead. * Format code with ruff --- packages/leann-core/src/leann/cli.py | 6 ++++++ packages/leann-core/src/leann/embedding_compute.py | 10 ++++++++-- packages/leann-core/src/leann/searcher_base.py | 11 ++++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py index 982ae9c..122ae6b 100644 --- a/packages/leann-core/src/leann/cli.py +++ b/packages/leann-core/src/leann/cli.py @@ -1545,6 +1545,11 @@ Examples: print("Invalid input. Aborting search.") return + # Build provider_options for runtime override + provider_options = {} + if args.embedding_prompt_template: + provider_options["prompt_template"] = args.embedding_prompt_template + searcher = LeannSearcher(index_path=index_path) results = searcher.search( query, @@ -1554,6 +1559,7 @@ Examples: prune_ratio=args.prune_ratio, recompute_embeddings=args.recompute_embeddings, pruning_strategy=args.pruning_strategy, + provider_options=provider_options if provider_options else None, ) print(f"Search results for '{query}' (top {len(results)}):") diff --git a/packages/leann-core/src/leann/embedding_compute.py b/packages/leann-core/src/leann/embedding_compute.py index a8dba9d..093a710 100644 --- a/packages/leann-core/src/leann/embedding_compute.py +++ b/packages/leann-core/src/leann/embedding_compute.py @@ -740,7 +740,10 @@ def compute_embeddings_openai( print(f"len of texts: {len(texts)}") # Apply prompt template if provided - prompt_template = provider_options.get("prompt_template") + # Priority: build_prompt_template (new format) > prompt_template (old format) + prompt_template = provider_options.get("build_prompt_template") or provider_options.get( + "prompt_template" + ) if prompt_template: logger.warning(f"Applying prompt template: '{prompt_template}'") @@ -1031,7 +1034,10 @@ def compute_embeddings_ollama( # Apply prompt template if provided provider_options = provider_options or {} - prompt_template = provider_options.get("prompt_template") + # Priority: build_prompt_template (new format) > prompt_template (old format) + prompt_template = provider_options.get("build_prompt_template") or provider_options.get( + "prompt_template" + ) if prompt_template: logger.warning(f"Applying prompt template: '{prompt_template}'") diff --git a/packages/leann-core/src/leann/searcher_base.py b/packages/leann-core/src/leann/searcher_base.py index ba5b189..f8ab71c 100644 --- a/packages/leann-core/src/leann/searcher_base.py +++ b/packages/leann-core/src/leann/searcher_base.py @@ -71,6 +71,15 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC): or "mips" ) + # Filter out ALL prompt templates from provider_options during search + # Templates are applied in compute_query_embedding (line 109-110) BEFORE server call + # The server should never apply templates during search to avoid double-templating + search_provider_options = { + k: v + for k, v in self.embedding_options.items() + if k not in ("build_prompt_template", "query_prompt_template", "prompt_template") + } + server_started, actual_port = self.embedding_server_manager.start_server( port=port, model_name=self.embedding_model, @@ -78,7 +87,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC): passages_file=passages_source_file, distance_metric=distance_metric, enable_warmup=kwargs.get("enable_warmup", False), - provider_options=self.embedding_options, + provider_options=search_provider_options, ) if not server_started: raise RuntimeError(f"Failed to start embedding server on port {actual_port}")