LEANN/tests/test_cli_prompt_template.py

"""
Tests for CLI argument integration of --embedding-prompt-template.

These tests verify that:
1. The --embedding-prompt-template flag is properly registered on build and search commands
2. The template value flows from CLI args to embedding_options dict
3. The template is passed through to compute_embeddings() function
4. Default behavior (no flag) is handled correctly
"""

from unittest.mock import Mock, patch

from leann.cli import LeannCLI


class TestCLIPromptTemplateArgument:
    """Tests for --embedding-prompt-template on build and search commands."""

    def test_commands_accept_prompt_template_argument(self):
        """Verify that build and search parsers accept --embedding-prompt-template flag."""
        cli = LeannCLI()
        parser = cli.create_parser()
        template_value = "search_query: "

        # Test build command
        build_args = parser.parse_args(
            [
                "build",
                "test-index",
                "--docs",
                "/tmp/test-docs",
                "--embedding-prompt-template",
                template_value,
            ]
        )
        assert build_args.command == "build"
        assert hasattr(build_args, "embedding_prompt_template"), (
            "build command should have embedding_prompt_template attribute"
        )
        assert build_args.embedding_prompt_template == template_value

        # Test search command
        search_args = parser.parse_args(
            ["search", "test-index", "my query", "--embedding-prompt-template", template_value]
        )
        assert search_args.command == "search"
        assert hasattr(search_args, "embedding_prompt_template"), (
            "search command should have embedding_prompt_template attribute"
        )
        assert search_args.embedding_prompt_template == template_value

    def test_commands_default_to_none(self):
        """Verify default value is None when flag not provided (backward compatibility)."""
        cli = LeannCLI()
        parser = cli.create_parser()

        # Test build command default
        build_args = parser.parse_args(["build", "test-index", "--docs", "/tmp/test-docs"])
        assert hasattr(build_args, "embedding_prompt_template"), (
            "build command should have embedding_prompt_template attribute"
        )
        assert build_args.embedding_prompt_template is None, (
            "Build default value should be None when flag not provided"
        )

        # Test search command default
        search_args = parser.parse_args(["search", "test-index", "my query"])
        assert hasattr(search_args, "embedding_prompt_template"), (
            "search command should have embedding_prompt_template attribute"
        )
        assert search_args.embedding_prompt_template is None, (
            "Search default value should be None when flag not provided"
        )


class TestBuildCommandPromptTemplateArgumentExtras:
    """Additional build-specific tests for prompt template argument."""

    def test_build_command_prompt_template_with_multiword_value(self):
        """
        Verify that template values with spaces are handled correctly.

        Templates like "search_document: " or "Represent this sentence for searching: "
        should be accepted as a single string argument.
        """
        cli = LeannCLI()
        parser = cli.create_parser()

        template = "Represent this sentence for searching: "
        args = parser.parse_args(
            [
                "build",
                "test-index",
                "--docs",
                "/tmp/test-docs",
                "--embedding-prompt-template",
                template,
            ]
        )

        assert args.embedding_prompt_template == template


class TestPromptTemplateStoredInEmbeddingOptions:
    """Tests for template storage in embedding_options dict."""

    @patch("leann.cli.LeannBuilder")
    def test_prompt_template_stored_in_embedding_options_on_build(
        self, mock_builder_class, tmp_path
    ):
        """
        Verify that when --embedding-prompt-template is provided to build command,
        the value is stored in embedding_options dict passed to LeannBuilder.

        This test will fail because the CLI doesn't currently process this argument
        and add it to embedding_options.
        """
        # Setup mocks
        mock_builder = Mock()
        mock_builder_class.return_value = mock_builder

        # Create CLI and run build command
        cli = LeannCLI()

        # Mock load_documents to return a document so builder is created
        cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}])

        parser = cli.create_parser()

        template = "search_query: "
        args = parser.parse_args(
            [
                "build",
                "test-index",
                "--docs",
                str(tmp_path),
                "--embedding-prompt-template",
                template,
                "--force",  # Force rebuild to ensure LeannBuilder is called
            ]
        )

        # Run the build command
        import asyncio

        asyncio.run(cli.build_index(args))

        # Check that LeannBuilder was called with embedding_options containing prompt_template
        call_kwargs = mock_builder_class.call_args.kwargs
        assert "embedding_options" in call_kwargs, "LeannBuilder should receive embedding_options"

        embedding_options = call_kwargs["embedding_options"]
        assert embedding_options is not None, (
            "embedding_options should not be None when template provided"
        )
        assert "prompt_template" in embedding_options, (
            "embedding_options should contain 'prompt_template' key"
        )
        assert embedding_options["prompt_template"] == template, (
            f"Template should be '{template}', got {embedding_options.get('prompt_template')}"
        )

    @patch("leann.cli.LeannBuilder")
    def test_prompt_template_not_in_options_when_not_provided(self, mock_builder_class, tmp_path):
        """
        Verify that when --embedding-prompt-template is NOT provided,
        embedding_options either doesn't have the key or it's None.

        This ensures we don't pass empty/None values unnecessarily.
        """
        # Setup mocks
        mock_builder = Mock()
        mock_builder_class.return_value = mock_builder

        cli = LeannCLI()

        # Mock load_documents to return a document so builder is created
        cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}])

        parser = cli.create_parser()

        args = parser.parse_args(
            [
                "build",
                "test-index",
                "--docs",
                str(tmp_path),
                "--force",  # Force rebuild to ensure LeannBuilder is called
            ]
        )

        import asyncio

        asyncio.run(cli.build_index(args))

        # Check that if embedding_options is passed, it doesn't have prompt_template
        call_kwargs = mock_builder_class.call_args.kwargs
        if call_kwargs.get("embedding_options"):
            embedding_options = call_kwargs["embedding_options"]
            # Either the key shouldn't exist, or it should be None
            assert (
                "prompt_template" not in embedding_options
                or embedding_options["prompt_template"] is None
            ), "prompt_template should not be set when flag not provided"

    # R1 Tests: Build-time separate template storage
    @patch("leann.cli.LeannBuilder")
    def test_build_stores_separate_templates(self, mock_builder_class, tmp_path):
        """
        R1 Test 1: Verify that when both --embedding-prompt-template and
        --query-prompt-template are provided to build command, both values
        are stored separately in embedding_options dict as build_prompt_template
        and query_prompt_template.

        This test will fail because:
        1. CLI doesn't accept --query-prompt-template flag yet
        2. CLI doesn't store templates as separate build_prompt_template and
           query_prompt_template keys

        Expected behavior after implementation:
        - .meta.json contains: {"embedding_options": {
            "build_prompt_template": "doc: ",
            "query_prompt_template": "query: "
          }}
        """
        # Setup mocks
        mock_builder = Mock()
        mock_builder_class.return_value = mock_builder

        cli = LeannCLI()

        # Mock load_documents to return a document so builder is created
        cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}])

        parser = cli.create_parser()

        build_template = "doc: "
        query_template = "query: "
        args = parser.parse_args(
            [
                "build",
                "test-index",
                "--docs",
                str(tmp_path),
                "--embedding-prompt-template",
                build_template,
                "--query-prompt-template",
                query_template,
                "--force",
            ]
        )

        # Run the build command
        import asyncio

        asyncio.run(cli.build_index(args))

        # Check that LeannBuilder was called with separate template keys
        call_kwargs = mock_builder_class.call_args.kwargs
        assert "embedding_options" in call_kwargs, "LeannBuilder should receive embedding_options"

        embedding_options = call_kwargs["embedding_options"]
        assert embedding_options is not None, (
            "embedding_options should not be None when templates provided"
        )

        assert "build_prompt_template" in embedding_options, (
            "embedding_options should contain 'build_prompt_template' key"
        )
        assert embedding_options["build_prompt_template"] == build_template, (
            f"build_prompt_template should be '{build_template}'"
        )

        assert "query_prompt_template" in embedding_options, (
            "embedding_options should contain 'query_prompt_template' key"
        )
        assert embedding_options["query_prompt_template"] == query_template, (
            f"query_prompt_template should be '{query_template}'"
        )

        # Old key should NOT be present when using new separate template format
        assert "prompt_template" not in embedding_options, (
            "Old 'prompt_template' key should not be present with separate templates"
        )

    @patch("leann.cli.LeannBuilder")
    def test_build_backward_compat_single_template(self, mock_builder_class, tmp_path):
        """
        R1 Test 2: Verify backward compatibility - when only
        --embedding-prompt-template is provided (old behavior), it should
        still be stored as 'prompt_template' in embedding_options.

        This ensures existing workflows continue to work unchanged.

        This test currently passes because it matches existing behavior, but it
        documents the requirement that this behavior must be preserved after
        implementing the separate template feature.

        Expected behavior:
        - .meta.json contains: {"embedding_options": {"prompt_template": "prompt: "}}
        - No build_prompt_template or query_prompt_template keys
        """
        # Setup mocks
        mock_builder = Mock()
        mock_builder_class.return_value = mock_builder

        cli = LeannCLI()

        # Mock load_documents to return a document so builder is created
        cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}])

        parser = cli.create_parser()

        template = "prompt: "
        args = parser.parse_args(
            [
                "build",
                "test-index",
                "--docs",
                str(tmp_path),
                "--embedding-prompt-template",
                template,
                "--force",
            ]
        )

        # Run the build command
        import asyncio

        asyncio.run(cli.build_index(args))

        # Check that LeannBuilder was called with old format
        call_kwargs = mock_builder_class.call_args.kwargs
        assert "embedding_options" in call_kwargs, "LeannBuilder should receive embedding_options"

        embedding_options = call_kwargs["embedding_options"]
        assert embedding_options is not None, (
            "embedding_options should not be None when template provided"
        )

        assert "prompt_template" in embedding_options, (
            "embedding_options should contain old 'prompt_template' key for backward compat"
        )
        assert embedding_options["prompt_template"] == template, (
            f"prompt_template should be '{template}'"
        )

        # New keys should NOT be present in backward compat mode
        assert "build_prompt_template" not in embedding_options, (
            "build_prompt_template should not be present with single template flag"
        )
        assert "query_prompt_template" not in embedding_options, (
            "query_prompt_template should not be present with single template flag"
        )

    @patch("leann.cli.LeannBuilder")
    def test_build_no_templates(self, mock_builder_class, tmp_path):
        """
        R1 Test 3: Verify that when no template flags are provided,
        embedding_options has no prompt template keys.

        This ensures clean defaults and no unnecessary keys in .meta.json.

        This test currently passes because it matches existing behavior, but it
        documents the requirement that this behavior must be preserved after
        implementing the separate template feature.

        Expected behavior:
        - .meta.json has no prompt_template, build_prompt_template, or
          query_prompt_template keys (or embedding_options is empty/None)
        """
        # Setup mocks
        mock_builder = Mock()
        mock_builder_class.return_value = mock_builder

        cli = LeannCLI()

        # Mock load_documents to return a document so builder is created
        cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}])

        parser = cli.create_parser()

        args = parser.parse_args(["build", "test-index", "--docs", str(tmp_path), "--force"])

        # Run the build command
        import asyncio

        asyncio.run(cli.build_index(args))

        # Check that no template keys are present
        call_kwargs = mock_builder_class.call_args.kwargs
        if call_kwargs.get("embedding_options"):
            embedding_options = call_kwargs["embedding_options"]

            # None of the template keys should be present
            assert "prompt_template" not in embedding_options, (
                "prompt_template should not be present when no flags provided"
            )
            assert "build_prompt_template" not in embedding_options, (
                "build_prompt_template should not be present when no flags provided"
            )
            assert "query_prompt_template" not in embedding_options, (
                "query_prompt_template should not be present when no flags provided"
            )


class TestPromptTemplateFlowsToComputeEmbeddings:
    """Tests for template flowing through to compute_embeddings function."""

    @patch("leann.api.compute_embeddings")
    def test_prompt_template_flows_to_compute_embeddings_via_provider_options(
        self, mock_compute_embeddings, tmp_path
    ):
        """
        Verify that the prompt template flows from CLI args through LeannBuilder
        to compute_embeddings() function via provider_options parameter.

        This is an integration test that verifies the complete flow:
        CLI → embedding_options → LeannBuilder → compute_embeddings(provider_options)

        This test will fail because:
        1. CLI doesn't capture the argument yet
        2. embedding_options doesn't include prompt_template
        3. LeannBuilder doesn't pass it through to compute_embeddings
        """
        # Mock compute_embeddings to return dummy embeddings as numpy array
        import numpy as np

        mock_compute_embeddings.return_value = np.array([[0.1, 0.2, 0.3]], dtype=np.float32)

        # Use real LeannBuilder (not mocked) to test the actual flow
        cli = LeannCLI()

        # Mock load_documents to return a simple document
        cli.load_documents = Mock(return_value=[{"text": "test content", "metadata": {}}])

        parser = cli.create_parser()

        template = "search_document: "
        args = parser.parse_args(
            [
                "build",
                "test-index",
                "--docs",
                str(tmp_path),
                "--embedding-prompt-template",
                template,
                "--backend-name",
                "hnsw",  # Use hnsw backend
                "--force",  # Force rebuild to ensure index is created
            ]
        )

        # This should fail because the flow isn't implemented yet
        import asyncio

        asyncio.run(cli.build_index(args))

        # Verify compute_embeddings was called with provider_options containing prompt_template
        assert mock_compute_embeddings.called, "compute_embeddings should have been called"

        # Check the call arguments
        call_kwargs = mock_compute_embeddings.call_args.kwargs
        assert "provider_options" in call_kwargs, (
            "compute_embeddings should receive provider_options parameter"
        )

        provider_options = call_kwargs["provider_options"]
        assert provider_options is not None, "provider_options should not be None"
        assert "prompt_template" in provider_options, (
            "provider_options should contain prompt_template key"
        )
        assert provider_options["prompt_template"] == template, (
            f"Template should be '{template}', got {provider_options.get('prompt_template')}"
        )


class TestPromptTemplateArgumentHelp:
    """Tests for argument help text and documentation."""

    def test_build_command_prompt_template_has_help_text(self):
        """
        Verify that --embedding-prompt-template has descriptive help text.

        Good help text is crucial for CLI usability.
        """
        cli = LeannCLI()
        parser = cli.create_parser()

        # Get the build subparser
        # This is a bit tricky - we need to parse to get the help
        # We'll check that the help includes relevant keywords
        import io
        from contextlib import redirect_stdout

        f = io.StringIO()
        try:
            with redirect_stdout(f):
                parser.parse_args(["build", "--help"])
        except SystemExit:
            pass  # --help causes sys.exit(0)

        help_text = f.getvalue()
        assert "--embedding-prompt-template" in help_text, (
            "Help text should mention --embedding-prompt-template"
        )
        # Check for keywords that should be in the help
        help_lower = help_text.lower()
        assert any(keyword in help_lower for keyword in ["template", "prompt", "prepend"]), (
            "Help text should explain what the prompt template does"
        )

    def test_search_command_prompt_template_has_help_text(self):
        """
        Verify that search command also has help text for --embedding-prompt-template.
        """
        cli = LeannCLI()
        parser = cli.create_parser()

        import io
        from contextlib import redirect_stdout

        f = io.StringIO()
        try:
            with redirect_stdout(f):
                parser.parse_args(["search", "--help"])
        except SystemExit:
            pass  # --help causes sys.exit(0)

        help_text = f.getvalue()
        assert "--embedding-prompt-template" in help_text, (
            "Search help text should mention --embedding-prompt-template"
        )