Fix prompt template bugs: build template ignored and runtime override not wired (#173)

* Fix prompt template bugs in build and search

Bug 1: Build template ignored in new format
- Updated compute_embeddings_openai() to read build_prompt_template or prompt_template
- Updated compute_embeddings_ollama() with same fix
- Maintains backward compatibility with old single-template format

Bug 2: Runtime override not wired up
- Wired CLI search to pass provider_options to searcher.search()
- Enables runtime template override during search via --embedding-prompt-template

All 42 prompt template tests passing.

Fixes #155

* Fix: Prevent embedding server from applying templates during search

- Filter out all prompt templates (build_prompt_template, query_prompt_template, prompt_template) from provider_options when launching embedding server during search
- Templates are already applied in compute_query_embedding() before server call
- Prevents double-templating and ensures runtime override works correctly

This fixes the issue where --embedding-prompt-template during search was ignored because the server was applying build_prompt_template instead.

* Format code with ruff
This commit is contained in:
ww26
2025-11-16 23:56:42 -05:00
committed by GitHub
parent 1ef9cba7de
commit 969f514564
3 changed files with 24 additions and 3 deletions

View File

@@ -1545,6 +1545,11 @@ Examples:
print("Invalid input. Aborting search.") print("Invalid input. Aborting search.")
return return
# Build provider_options for runtime override
provider_options = {}
if args.embedding_prompt_template:
provider_options["prompt_template"] = args.embedding_prompt_template
searcher = LeannSearcher(index_path=index_path) searcher = LeannSearcher(index_path=index_path)
results = searcher.search( results = searcher.search(
query, query,
@@ -1554,6 +1559,7 @@ Examples:
prune_ratio=args.prune_ratio, prune_ratio=args.prune_ratio,
recompute_embeddings=args.recompute_embeddings, recompute_embeddings=args.recompute_embeddings,
pruning_strategy=args.pruning_strategy, pruning_strategy=args.pruning_strategy,
provider_options=provider_options if provider_options else None,
) )
print(f"Search results for '{query}' (top {len(results)}):") print(f"Search results for '{query}' (top {len(results)}):")

View File

@@ -740,7 +740,10 @@ def compute_embeddings_openai(
print(f"len of texts: {len(texts)}") print(f"len of texts: {len(texts)}")
# Apply prompt template if provided # Apply prompt template if provided
prompt_template = provider_options.get("prompt_template") # Priority: build_prompt_template (new format) > prompt_template (old format)
prompt_template = provider_options.get("build_prompt_template") or provider_options.get(
"prompt_template"
)
if prompt_template: if prompt_template:
logger.warning(f"Applying prompt template: '{prompt_template}'") logger.warning(f"Applying prompt template: '{prompt_template}'")
@@ -1031,7 +1034,10 @@ def compute_embeddings_ollama(
# Apply prompt template if provided # Apply prompt template if provided
provider_options = provider_options or {} provider_options = provider_options or {}
prompt_template = provider_options.get("prompt_template") # Priority: build_prompt_template (new format) > prompt_template (old format)
prompt_template = provider_options.get("build_prompt_template") or provider_options.get(
"prompt_template"
)
if prompt_template: if prompt_template:
logger.warning(f"Applying prompt template: '{prompt_template}'") logger.warning(f"Applying prompt template: '{prompt_template}'")

View File

@@ -71,6 +71,15 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
or "mips" or "mips"
) )
# Filter out ALL prompt templates from provider_options during search
# Templates are applied in compute_query_embedding (line 109-110) BEFORE server call
# The server should never apply templates during search to avoid double-templating
search_provider_options = {
k: v
for k, v in self.embedding_options.items()
if k not in ("build_prompt_template", "query_prompt_template", "prompt_template")
}
server_started, actual_port = self.embedding_server_manager.start_server( server_started, actual_port = self.embedding_server_manager.start_server(
port=port, port=port,
model_name=self.embedding_model, model_name=self.embedding_model,
@@ -78,7 +87,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
passages_file=passages_source_file, passages_file=passages_source_file,
distance_metric=distance_metric, distance_metric=distance_metric,
enable_warmup=kwargs.get("enable_warmup", False), enable_warmup=kwargs.get("enable_warmup", False),
provider_options=self.embedding_options, provider_options=search_provider_options,
) )
if not server_started: if not server_started:
raise RuntimeError(f"Failed to start embedding server on port {actual_port}") raise RuntimeError(f"Failed to start embedding server on port {actual_port}")