Fix prompt template bugs: build template ignored and runtime override not wired (#173)
* Fix prompt template bugs in build and search Bug 1: Build template ignored in new format - Updated compute_embeddings_openai() to read build_prompt_template or prompt_template - Updated compute_embeddings_ollama() with same fix - Maintains backward compatibility with old single-template format Bug 2: Runtime override not wired up - Wired CLI search to pass provider_options to searcher.search() - Enables runtime template override during search via --embedding-prompt-template All 42 prompt template tests passing. Fixes #155 * Fix: Prevent embedding server from applying templates during search - Filter out all prompt templates (build_prompt_template, query_prompt_template, prompt_template) from provider_options when launching embedding server during search - Templates are already applied in compute_query_embedding() before server call - Prevents double-templating and ensures runtime override works correctly This fixes the issue where --embedding-prompt-template during search was ignored because the server was applying build_prompt_template instead. * Format code with ruff
This commit is contained in:
@@ -1545,6 +1545,11 @@ Examples:
|
|||||||
print("Invalid input. Aborting search.")
|
print("Invalid input. Aborting search.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Build provider_options for runtime override
|
||||||
|
provider_options = {}
|
||||||
|
if args.embedding_prompt_template:
|
||||||
|
provider_options["prompt_template"] = args.embedding_prompt_template
|
||||||
|
|
||||||
searcher = LeannSearcher(index_path=index_path)
|
searcher = LeannSearcher(index_path=index_path)
|
||||||
results = searcher.search(
|
results = searcher.search(
|
||||||
query,
|
query,
|
||||||
@@ -1554,6 +1559,7 @@ Examples:
|
|||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy,
|
pruning_strategy=args.pruning_strategy,
|
||||||
|
provider_options=provider_options if provider_options else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Search results for '{query}' (top {len(results)}):")
|
print(f"Search results for '{query}' (top {len(results)}):")
|
||||||
|
|||||||
@@ -740,7 +740,10 @@ def compute_embeddings_openai(
|
|||||||
print(f"len of texts: {len(texts)}")
|
print(f"len of texts: {len(texts)}")
|
||||||
|
|
||||||
# Apply prompt template if provided
|
# Apply prompt template if provided
|
||||||
prompt_template = provider_options.get("prompt_template")
|
# Priority: build_prompt_template (new format) > prompt_template (old format)
|
||||||
|
prompt_template = provider_options.get("build_prompt_template") or provider_options.get(
|
||||||
|
"prompt_template"
|
||||||
|
)
|
||||||
|
|
||||||
if prompt_template:
|
if prompt_template:
|
||||||
logger.warning(f"Applying prompt template: '{prompt_template}'")
|
logger.warning(f"Applying prompt template: '{prompt_template}'")
|
||||||
@@ -1031,7 +1034,10 @@ def compute_embeddings_ollama(
|
|||||||
|
|
||||||
# Apply prompt template if provided
|
# Apply prompt template if provided
|
||||||
provider_options = provider_options or {}
|
provider_options = provider_options or {}
|
||||||
prompt_template = provider_options.get("prompt_template")
|
# Priority: build_prompt_template (new format) > prompt_template (old format)
|
||||||
|
prompt_template = provider_options.get("build_prompt_template") or provider_options.get(
|
||||||
|
"prompt_template"
|
||||||
|
)
|
||||||
|
|
||||||
if prompt_template:
|
if prompt_template:
|
||||||
logger.warning(f"Applying prompt template: '{prompt_template}'")
|
logger.warning(f"Applying prompt template: '{prompt_template}'")
|
||||||
|
|||||||
@@ -71,6 +71,15 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
|
|||||||
or "mips"
|
or "mips"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Filter out ALL prompt templates from provider_options during search
|
||||||
|
# Templates are applied in compute_query_embedding (line 109-110) BEFORE server call
|
||||||
|
# The server should never apply templates during search to avoid double-templating
|
||||||
|
search_provider_options = {
|
||||||
|
k: v
|
||||||
|
for k, v in self.embedding_options.items()
|
||||||
|
if k not in ("build_prompt_template", "query_prompt_template", "prompt_template")
|
||||||
|
}
|
||||||
|
|
||||||
server_started, actual_port = self.embedding_server_manager.start_server(
|
server_started, actual_port = self.embedding_server_manager.start_server(
|
||||||
port=port,
|
port=port,
|
||||||
model_name=self.embedding_model,
|
model_name=self.embedding_model,
|
||||||
@@ -78,7 +87,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
|
|||||||
passages_file=passages_source_file,
|
passages_file=passages_source_file,
|
||||||
distance_metric=distance_metric,
|
distance_metric=distance_metric,
|
||||||
enable_warmup=kwargs.get("enable_warmup", False),
|
enable_warmup=kwargs.get("enable_warmup", False),
|
||||||
provider_options=self.embedding_options,
|
provider_options=search_provider_options,
|
||||||
)
|
)
|
||||||
if not server_started:
|
if not server_started:
|
||||||
raise RuntimeError(f"Failed to start embedding server on port {actual_port}")
|
raise RuntimeError(f"Failed to start embedding server on port {actual_port}")
|
||||||
|
|||||||
Reference in New Issue
Block a user