Files
LEANN/tests/test_cli_prompt_template.py
Andy Lee b754474c44 Fix all test type errors and enable ty check on tests
- Fix test_basic.py: search() takes str not list
- Fix test_cli_prompt_template.py: add type: ignore for Mock assignments
- Fix test_prompt_template_persistence.py: match BaseSearcher.search signature
- Fix test_prompt_template_e2e.py: add type narrowing asserts after skip
- Fix test_readme_examples.py: use explicit kwargs instead of **model_args
- Fix metadata_filter.py: allow Optional[MetadataFilters]
- Update CI to run ty check on tests
2025-12-23 09:42:08 +00:00

534 lines
20 KiB
Python

"""
Tests for CLI argument integration of --embedding-prompt-template.
These tests verify that:
1. The --embedding-prompt-template flag is properly registered on build and search commands
2. The template value flows from CLI args to embedding_options dict
3. The template is passed through to compute_embeddings() function
4. Default behavior (no flag) is handled correctly
"""
from unittest.mock import Mock, patch
from leann.cli import LeannCLI
class TestCLIPromptTemplateArgument:
"""Tests for --embedding-prompt-template on build and search commands."""
def test_commands_accept_prompt_template_argument(self):
"""Verify that build and search parsers accept --embedding-prompt-template flag."""
cli = LeannCLI()
parser = cli.create_parser()
template_value = "search_query: "
# Test build command
build_args = parser.parse_args(
[
"build",
"test-index",
"--docs",
"/tmp/test-docs",
"--embedding-prompt-template",
template_value,
]
)
assert build_args.command == "build"
assert hasattr(build_args, "embedding_prompt_template"), (
"build command should have embedding_prompt_template attribute"
)
assert build_args.embedding_prompt_template == template_value
# Test search command
search_args = parser.parse_args(
["search", "test-index", "my query", "--embedding-prompt-template", template_value]
)
assert search_args.command == "search"
assert hasattr(search_args, "embedding_prompt_template"), (
"search command should have embedding_prompt_template attribute"
)
assert search_args.embedding_prompt_template == template_value
def test_commands_default_to_none(self):
"""Verify default value is None when flag not provided (backward compatibility)."""
cli = LeannCLI()
parser = cli.create_parser()
# Test build command default
build_args = parser.parse_args(["build", "test-index", "--docs", "/tmp/test-docs"])
assert hasattr(build_args, "embedding_prompt_template"), (
"build command should have embedding_prompt_template attribute"
)
assert build_args.embedding_prompt_template is None, (
"Build default value should be None when flag not provided"
)
# Test search command default
search_args = parser.parse_args(["search", "test-index", "my query"])
assert hasattr(search_args, "embedding_prompt_template"), (
"search command should have embedding_prompt_template attribute"
)
assert search_args.embedding_prompt_template is None, (
"Search default value should be None when flag not provided"
)
class TestBuildCommandPromptTemplateArgumentExtras:
"""Additional build-specific tests for prompt template argument."""
def test_build_command_prompt_template_with_multiword_value(self):
"""
Verify that template values with spaces are handled correctly.
Templates like "search_document: " or "Represent this sentence for searching: "
should be accepted as a single string argument.
"""
cli = LeannCLI()
parser = cli.create_parser()
template = "Represent this sentence for searching: "
args = parser.parse_args(
[
"build",
"test-index",
"--docs",
"/tmp/test-docs",
"--embedding-prompt-template",
template,
]
)
assert args.embedding_prompt_template == template
class TestPromptTemplateStoredInEmbeddingOptions:
"""Tests for template storage in embedding_options dict."""
@patch("leann.cli.LeannBuilder")
def test_prompt_template_stored_in_embedding_options_on_build(
self, mock_builder_class, tmp_path
):
"""
Verify that when --embedding-prompt-template is provided to build command,
the value is stored in embedding_options dict passed to LeannBuilder.
This test will fail because the CLI doesn't currently process this argument
and add it to embedding_options.
"""
# Setup mocks
mock_builder = Mock()
mock_builder_class.return_value = mock_builder
# Create CLI and run build command
cli = LeannCLI()
# Mock load_documents to return a document so builder is created
cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}]) # type: ignore[assignment]
parser = cli.create_parser()
template = "search_query: "
args = parser.parse_args(
[
"build",
"test-index",
"--docs",
str(tmp_path),
"--embedding-prompt-template",
template,
"--force", # Force rebuild to ensure LeannBuilder is called
]
)
# Run the build command
import asyncio
asyncio.run(cli.build_index(args))
# Check that LeannBuilder was called with embedding_options containing prompt_template
call_kwargs = mock_builder_class.call_args.kwargs
assert "embedding_options" in call_kwargs, "LeannBuilder should receive embedding_options"
embedding_options = call_kwargs["embedding_options"]
assert embedding_options is not None, (
"embedding_options should not be None when template provided"
)
assert "prompt_template" in embedding_options, (
"embedding_options should contain 'prompt_template' key"
)
assert embedding_options["prompt_template"] == template, (
f"Template should be '{template}', got {embedding_options.get('prompt_template')}"
)
@patch("leann.cli.LeannBuilder")
def test_prompt_template_not_in_options_when_not_provided(self, mock_builder_class, tmp_path):
"""
Verify that when --embedding-prompt-template is NOT provided,
embedding_options either doesn't have the key or it's None.
This ensures we don't pass empty/None values unnecessarily.
"""
# Setup mocks
mock_builder = Mock()
mock_builder_class.return_value = mock_builder
cli = LeannCLI()
# Mock load_documents to return a document so builder is created
cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}]) # type: ignore[assignment]
parser = cli.create_parser()
args = parser.parse_args(
[
"build",
"test-index",
"--docs",
str(tmp_path),
"--force", # Force rebuild to ensure LeannBuilder is called
]
)
import asyncio
asyncio.run(cli.build_index(args))
# Check that if embedding_options is passed, it doesn't have prompt_template
call_kwargs = mock_builder_class.call_args.kwargs
if call_kwargs.get("embedding_options"):
embedding_options = call_kwargs["embedding_options"]
# Either the key shouldn't exist, or it should be None
assert (
"prompt_template" not in embedding_options
or embedding_options["prompt_template"] is None
), "prompt_template should not be set when flag not provided"
# R1 Tests: Build-time separate template storage
@patch("leann.cli.LeannBuilder")
def test_build_stores_separate_templates(self, mock_builder_class, tmp_path):
"""
R1 Test 1: Verify that when both --embedding-prompt-template and
--query-prompt-template are provided to build command, both values
are stored separately in embedding_options dict as build_prompt_template
and query_prompt_template.
This test will fail because:
1. CLI doesn't accept --query-prompt-template flag yet
2. CLI doesn't store templates as separate build_prompt_template and
query_prompt_template keys
Expected behavior after implementation:
- .meta.json contains: {"embedding_options": {
"build_prompt_template": "doc: ",
"query_prompt_template": "query: "
}}
"""
# Setup mocks
mock_builder = Mock()
mock_builder_class.return_value = mock_builder
cli = LeannCLI()
# Mock load_documents to return a document so builder is created
cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}]) # type: ignore[assignment]
parser = cli.create_parser()
build_template = "doc: "
query_template = "query: "
args = parser.parse_args(
[
"build",
"test-index",
"--docs",
str(tmp_path),
"--embedding-prompt-template",
build_template,
"--query-prompt-template",
query_template,
"--force",
]
)
# Run the build command
import asyncio
asyncio.run(cli.build_index(args))
# Check that LeannBuilder was called with separate template keys
call_kwargs = mock_builder_class.call_args.kwargs
assert "embedding_options" in call_kwargs, "LeannBuilder should receive embedding_options"
embedding_options = call_kwargs["embedding_options"]
assert embedding_options is not None, (
"embedding_options should not be None when templates provided"
)
assert "build_prompt_template" in embedding_options, (
"embedding_options should contain 'build_prompt_template' key"
)
assert embedding_options["build_prompt_template"] == build_template, (
f"build_prompt_template should be '{build_template}'"
)
assert "query_prompt_template" in embedding_options, (
"embedding_options should contain 'query_prompt_template' key"
)
assert embedding_options["query_prompt_template"] == query_template, (
f"query_prompt_template should be '{query_template}'"
)
# Old key should NOT be present when using new separate template format
assert "prompt_template" not in embedding_options, (
"Old 'prompt_template' key should not be present with separate templates"
)
@patch("leann.cli.LeannBuilder")
def test_build_backward_compat_single_template(self, mock_builder_class, tmp_path):
"""
R1 Test 2: Verify backward compatibility - when only
--embedding-prompt-template is provided (old behavior), it should
still be stored as 'prompt_template' in embedding_options.
This ensures existing workflows continue to work unchanged.
This test currently passes because it matches existing behavior, but it
documents the requirement that this behavior must be preserved after
implementing the separate template feature.
Expected behavior:
- .meta.json contains: {"embedding_options": {"prompt_template": "prompt: "}}
- No build_prompt_template or query_prompt_template keys
"""
# Setup mocks
mock_builder = Mock()
mock_builder_class.return_value = mock_builder
cli = LeannCLI()
# Mock load_documents to return a document so builder is created
cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}]) # type: ignore[assignment]
parser = cli.create_parser()
template = "prompt: "
args = parser.parse_args(
[
"build",
"test-index",
"--docs",
str(tmp_path),
"--embedding-prompt-template",
template,
"--force",
]
)
# Run the build command
import asyncio
asyncio.run(cli.build_index(args))
# Check that LeannBuilder was called with old format
call_kwargs = mock_builder_class.call_args.kwargs
assert "embedding_options" in call_kwargs, "LeannBuilder should receive embedding_options"
embedding_options = call_kwargs["embedding_options"]
assert embedding_options is not None, (
"embedding_options should not be None when template provided"
)
assert "prompt_template" in embedding_options, (
"embedding_options should contain old 'prompt_template' key for backward compat"
)
assert embedding_options["prompt_template"] == template, (
f"prompt_template should be '{template}'"
)
# New keys should NOT be present in backward compat mode
assert "build_prompt_template" not in embedding_options, (
"build_prompt_template should not be present with single template flag"
)
assert "query_prompt_template" not in embedding_options, (
"query_prompt_template should not be present with single template flag"
)
@patch("leann.cli.LeannBuilder")
def test_build_no_templates(self, mock_builder_class, tmp_path):
"""
R1 Test 3: Verify that when no template flags are provided,
embedding_options has no prompt template keys.
This ensures clean defaults and no unnecessary keys in .meta.json.
This test currently passes because it matches existing behavior, but it
documents the requirement that this behavior must be preserved after
implementing the separate template feature.
Expected behavior:
- .meta.json has no prompt_template, build_prompt_template, or
query_prompt_template keys (or embedding_options is empty/None)
"""
# Setup mocks
mock_builder = Mock()
mock_builder_class.return_value = mock_builder
cli = LeannCLI()
# Mock load_documents to return a document so builder is created
cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}]) # type: ignore[assignment]
parser = cli.create_parser()
args = parser.parse_args(["build", "test-index", "--docs", str(tmp_path), "--force"])
# Run the build command
import asyncio
asyncio.run(cli.build_index(args))
# Check that no template keys are present
call_kwargs = mock_builder_class.call_args.kwargs
if call_kwargs.get("embedding_options"):
embedding_options = call_kwargs["embedding_options"]
# None of the template keys should be present
assert "prompt_template" not in embedding_options, (
"prompt_template should not be present when no flags provided"
)
assert "build_prompt_template" not in embedding_options, (
"build_prompt_template should not be present when no flags provided"
)
assert "query_prompt_template" not in embedding_options, (
"query_prompt_template should not be present when no flags provided"
)
class TestPromptTemplateFlowsToComputeEmbeddings:
"""Tests for template flowing through to compute_embeddings function."""
@patch("leann.api.compute_embeddings")
def test_prompt_template_flows_to_compute_embeddings_via_provider_options(
self, mock_compute_embeddings, tmp_path
):
"""
Verify that the prompt template flows from CLI args through LeannBuilder
to compute_embeddings() function via provider_options parameter.
This is an integration test that verifies the complete flow:
CLI → embedding_options → LeannBuilder → compute_embeddings(provider_options)
This test will fail because:
1. CLI doesn't capture the argument yet
2. embedding_options doesn't include prompt_template
3. LeannBuilder doesn't pass it through to compute_embeddings
"""
# Mock compute_embeddings to return dummy embeddings as numpy array
import numpy as np
mock_compute_embeddings.return_value = np.array([[0.1, 0.2, 0.3]], dtype=np.float32)
# Use real LeannBuilder (not mocked) to test the actual flow
cli = LeannCLI()
# Mock load_documents to return a simple document
cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}]) # type: ignore[assignment]
parser = cli.create_parser()
template = "search_document: "
args = parser.parse_args(
[
"build",
"test-index",
"--docs",
str(tmp_path),
"--embedding-prompt-template",
template,
"--backend-name",
"hnsw", # Use hnsw backend
"--force", # Force rebuild to ensure index is created
]
)
# This should fail because the flow isn't implemented yet
import asyncio
asyncio.run(cli.build_index(args))
# Verify compute_embeddings was called with provider_options containing prompt_template
assert mock_compute_embeddings.called, "compute_embeddings should have been called"
# Check the call arguments
call_kwargs = mock_compute_embeddings.call_args.kwargs
assert "provider_options" in call_kwargs, (
"compute_embeddings should receive provider_options parameter"
)
provider_options = call_kwargs["provider_options"]
assert provider_options is not None, "provider_options should not be None"
assert "prompt_template" in provider_options, (
"provider_options should contain prompt_template key"
)
assert provider_options["prompt_template"] == template, (
f"Template should be '{template}', got {provider_options.get('prompt_template')}"
)
class TestPromptTemplateArgumentHelp:
"""Tests for argument help text and documentation."""
def test_build_command_prompt_template_has_help_text(self):
"""
Verify that --embedding-prompt-template has descriptive help text.
Good help text is crucial for CLI usability.
"""
cli = LeannCLI()
parser = cli.create_parser()
# Get the build subparser
# This is a bit tricky - we need to parse to get the help
# We'll check that the help includes relevant keywords
import io
from contextlib import redirect_stdout
f = io.StringIO()
try:
with redirect_stdout(f):
parser.parse_args(["build", "--help"])
except SystemExit:
pass # --help causes sys.exit(0)
help_text = f.getvalue()
assert "--embedding-prompt-template" in help_text, (
"Help text should mention --embedding-prompt-template"
)
# Check for keywords that should be in the help
help_lower = help_text.lower()
assert any(keyword in help_lower for keyword in ["template", "prompt", "prepend"]), (
"Help text should explain what the prompt template does"
)
def test_search_command_prompt_template_has_help_text(self):
"""
Verify that search command also has help text for --embedding-prompt-template.
"""
cli = LeannCLI()
parser = cli.create_parser()
import io
from contextlib import redirect_stdout
f = io.StringIO()
try:
with redirect_stdout(f):
parser.parse_args(["search", "--help"])
except SystemExit:
pass # --help causes sys.exit(0)
help_text = f.getvalue()
assert "--embedding-prompt-template" in help_text, (
"Search help text should mention --embedding-prompt-template"
)