LEANN/tests/test_prompt_template_e2e.py

"""End-to-end integration tests for prompt template and token limit features.

These tests verify real-world functionality with live services:
- OpenAI-compatible APIs (OpenAI, LM Studio) with prompt template support
- Ollama with dynamic token limit detection
- Hybrid token limit discovery mechanism

Run with: pytest tests/test_prompt_template_e2e.py -v -s
Skip if services unavailable: pytest tests/test_prompt_template_e2e.py -m "not integration"

Prerequisites:
1. LM Studio running with embedding model: http://localhost:1234
2. [Optional] Ollama running: ollama serve
3. [Optional] Ollama model: ollama pull nomic-embed-text
4. [Optional] Node.js + @lmstudio/sdk for context length detection
"""

import logging
import socket

import numpy as np
import pytest
import requests
from leann.embedding_compute import (
    compute_embeddings_ollama,
    compute_embeddings_openai,
    get_model_token_limit,
)

# Test markers for conditional execution
pytestmark = pytest.mark.integration

logger = logging.getLogger(__name__)


def check_service_available(host: str, port: int, timeout: float = 2.0) -> bool:
    """Check if a service is available on the given host:port."""
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(timeout)
        result = sock.connect_ex((host, port))
        sock.close()
        return result == 0
    except Exception:
        return False


def check_ollama_available() -> bool:
    """Check if Ollama service is available."""
    if not check_service_available("localhost", 11434):
        return False
    try:
        response = requests.get("http://localhost:11434/api/tags", timeout=2.0)
        return response.status_code == 200
    except Exception:
        return False


def check_lmstudio_available() -> bool:
    """Check if LM Studio service is available."""
    if not check_service_available("localhost", 1234):
        return False
    try:
        response = requests.get("http://localhost:1234/v1/models", timeout=2.0)
        return response.status_code == 200
    except Exception:
        return False


def get_lmstudio_first_model() -> str:
    """Get the first available model from LM Studio."""
    try:
        response = requests.get("http://localhost:1234/v1/models", timeout=5.0)
        data = response.json()
        models = data.get("data", [])
        if models:
            return models[0]["id"]
    except Exception:
        pass
    return None


class TestPromptTemplateOpenAI:
    """End-to-end tests for prompt template with OpenAI-compatible APIs (LM Studio)."""

    @pytest.mark.skipif(
        not check_lmstudio_available(), reason="LM Studio service not available on localhost:1234"
    )
    def test_lmstudio_embedding_with_prompt_template(self):
        """Test prompt templates with LM Studio using OpenAI-compatible API."""
        model_name = get_lmstudio_first_model()
        if not model_name:
            pytest.skip("No models loaded in LM Studio")

        texts = ["artificial intelligence", "machine learning"]
        prompt_template = "search_query: "

        # Get embeddings with prompt template via provider_options
        provider_options = {"prompt_template": prompt_template}
        embeddings = compute_embeddings_openai(
            texts=texts,
            model_name=model_name,
            base_url="http://localhost:1234/v1",
            api_key="lm-studio",  # LM Studio doesn't require real key
            provider_options=provider_options,
        )

        assert embeddings is not None
        assert len(embeddings) == 2
        assert all(isinstance(emb, np.ndarray) for emb in embeddings)
        assert all(len(emb) > 0 for emb in embeddings)

        logger.info(
            f"✓ LM Studio embeddings with prompt template: {len(embeddings)} vectors, {len(embeddings[0])} dimensions"
        )

    @pytest.mark.skipif(not check_lmstudio_available(), reason="LM Studio service not available")
    def test_lmstudio_prompt_template_affects_embeddings(self):
        """Verify that prompt templates actually change embedding values."""
        model_name = get_lmstudio_first_model()
        if not model_name:
            pytest.skip("No models loaded in LM Studio")

        text = "machine learning"
        base_url = "http://localhost:1234/v1"
        api_key = "lm-studio"

        # Get embeddings without template
        embeddings_no_template = compute_embeddings_openai(
            texts=[text],
            model_name=model_name,
            base_url=base_url,
            api_key=api_key,
            provider_options={},
        )

        # Get embeddings with template
        embeddings_with_template = compute_embeddings_openai(
            texts=[text],
            model_name=model_name,
            base_url=base_url,
            api_key=api_key,
            provider_options={"prompt_template": "search_query: "},
        )

        # Embeddings should be different when template is applied
        assert not np.allclose(embeddings_no_template[0], embeddings_with_template[0])

        logger.info("✓ Prompt template changes embedding values as expected")


class TestPromptTemplateOllama:
    """End-to-end tests for prompt template with Ollama."""

    @pytest.mark.skipif(
        not check_ollama_available(), reason="Ollama service not available on localhost:11434"
    )
    def test_ollama_embedding_with_prompt_template(self):
        """Test prompt templates with Ollama using any available embedding model."""
        # Get any available embedding model
        try:
            response = requests.get("http://localhost:11434/api/tags", timeout=2.0)
            models = response.json().get("models", [])

            embedding_models = []
            for model in models:
                name = model["name"]
                base_name = name.split(":")[0]
                if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5", "nomic"]):
                    embedding_models.append(name)

            if not embedding_models:
                pytest.skip("No embedding models available in Ollama")

            model_name = embedding_models[0]

            texts = ["artificial intelligence", "machine learning"]
            prompt_template = "search_query: "

            # Get embeddings with prompt template via provider_options
            provider_options = {"prompt_template": prompt_template}
            embeddings = compute_embeddings_ollama(
                texts=texts,
                model_name=model_name,
                is_build=False,
                host="http://localhost:11434",
                provider_options=provider_options,
            )

            assert embeddings is not None
            assert len(embeddings) == 2
            assert all(isinstance(emb, np.ndarray) for emb in embeddings)
            assert all(len(emb) > 0 for emb in embeddings)

            logger.info(
                f"✓ Ollama embeddings with prompt template: {len(embeddings)} vectors, {len(embeddings[0])} dimensions"
            )

        except Exception as e:
            pytest.skip(f"Could not test Ollama prompt template: {e}")

    @pytest.mark.skipif(not check_ollama_available(), reason="Ollama service not available")
    def test_ollama_prompt_template_affects_embeddings(self):
        """Verify that prompt templates actually change embedding values with Ollama."""
        # Get any available embedding model
        try:
            response = requests.get("http://localhost:11434/api/tags", timeout=2.0)
            models = response.json().get("models", [])

            embedding_models = []
            for model in models:
                name = model["name"]
                base_name = name.split(":")[0]
                if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5", "nomic"]):
                    embedding_models.append(name)

            if not embedding_models:
                pytest.skip("No embedding models available in Ollama")

            model_name = embedding_models[0]
            text = "machine learning"
            host = "http://localhost:11434"

            # Get embeddings without template
            embeddings_no_template = compute_embeddings_ollama(
                texts=[text], model_name=model_name, is_build=False, host=host, provider_options={}
            )

            # Get embeddings with template
            embeddings_with_template = compute_embeddings_ollama(
                texts=[text],
                model_name=model_name,
                is_build=False,
                host=host,
                provider_options={"prompt_template": "search_query: "},
            )

            # Embeddings should be different when template is applied
            assert not np.allclose(embeddings_no_template[0], embeddings_with_template[0])

            logger.info("✓ Ollama prompt template changes embedding values as expected")

        except Exception as e:
            pytest.skip(f"Could not test Ollama prompt template: {e}")


class TestLMStudioSDK:
    """End-to-end tests for LM Studio SDK integration."""

    @pytest.mark.skipif(not check_lmstudio_available(), reason="LM Studio service not available")
    def test_lmstudio_model_listing(self):
        """Test that we can list models from LM Studio."""
        try:
            response = requests.get("http://localhost:1234/v1/models", timeout=5.0)
            assert response.status_code == 200

            data = response.json()
            assert "data" in data

            models = data["data"]
            logger.info(f"✓ LM Studio models available: {len(models)}")

            if models:
                logger.info(f"  First model: {models[0].get('id', 'unknown')}")
        except Exception as e:
            pytest.skip(f"LM Studio API error: {e}")

    @pytest.mark.skipif(not check_lmstudio_available(), reason="LM Studio service not available")
    def test_lmstudio_sdk_context_length_detection(self):
        """Test context length detection via LM Studio SDK bridge (requires Node.js + SDK)."""
        model_name = get_lmstudio_first_model()
        if not model_name:
            pytest.skip("No models loaded in LM Studio")

        try:
            from leann.embedding_compute import _query_lmstudio_context_limit

            # SDK requires WebSocket URL (ws://)
            context_length = _query_lmstudio_context_limit(
                model_name=model_name, base_url="ws://localhost:1234"
            )

            if context_length is None:
                logger.warning(
                    "⚠ LM Studio SDK bridge returned None (Node.js or SDK may not be available)"
                )
                pytest.skip("Node.js or @lmstudio/sdk not available - SDK bridge unavailable")
            else:
                assert context_length > 0
                logger.info(
                    f"✓ LM Studio context length detected via SDK: {context_length} for {model_name}"
                )

        except ImportError:
            pytest.skip("_query_lmstudio_context_limit not implemented yet")
        except Exception as e:
            logger.error(f"LM Studio SDK test error: {e}")
            raise


class TestOllamaTokenLimit:
    """End-to-end tests for Ollama token limit discovery."""

    @pytest.mark.skipif(not check_ollama_available(), reason="Ollama service not available")
    def test_ollama_token_limit_detection(self):
        """Test dynamic token limit detection from Ollama /api/show endpoint."""
        # Get any available embedding model
        try:
            response = requests.get("http://localhost:11434/api/tags", timeout=2.0)
            models = response.json().get("models", [])

            embedding_models = []
            for model in models:
                name = model["name"]
                base_name = name.split(":")[0]
                if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5", "nomic"]):
                    embedding_models.append(name)

            if not embedding_models:
                pytest.skip("No embedding models available in Ollama")

            test_model = embedding_models[0]

            # Test token limit detection
            limit = get_model_token_limit(model_name=test_model, base_url="http://localhost:11434")

            assert limit > 0
            logger.info(f"✓ Ollama token limit detected: {limit} for {test_model}")

        except Exception as e:
            pytest.skip(f"Could not test Ollama token detection: {e}")


class TestHybridTokenLimit:
    """End-to-end tests for hybrid token limit discovery mechanism."""

    def test_hybrid_discovery_registry_fallback(self):
        """Test fallback to static registry for known OpenAI models."""
        # Use a known OpenAI model (should be in registry)
        limit = get_model_token_limit(
            model_name="text-embedding-3-small",
            base_url="http://fake-server:9999",  # Fake URL to force registry lookup
        )

        # text-embedding-3-small should have 8192 in registry
        assert limit == 8192
        logger.info(f"✓ Hybrid discovery (registry fallback): {limit} tokens")

    def test_hybrid_discovery_default_fallback(self):
        """Test fallback to safe default for completely unknown models."""
        limit = get_model_token_limit(
            model_name="completely-unknown-model-xyz-12345",
            base_url="http://fake-server:9999",
            default=512,
        )

        # Should get the specified default
        assert limit == 512
        logger.info(f"✓ Hybrid discovery (default fallback): {limit} tokens")

    @pytest.mark.skipif(not check_ollama_available(), reason="Ollama service not available")
    def test_hybrid_discovery_ollama_dynamic_first(self):
        """Test that Ollama models use dynamic discovery first."""
        # Get any available embedding model
        try:
            response = requests.get("http://localhost:11434/api/tags", timeout=2.0)
            models = response.json().get("models", [])

            embedding_models = []
            for model in models:
                name = model["name"]
                base_name = name.split(":")[0]
                if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5", "nomic"]):
                    embedding_models.append(name)

            if not embedding_models:
                pytest.skip("No embedding models available in Ollama")

            test_model = embedding_models[0]

            # Should query Ollama /api/show dynamically
            limit = get_model_token_limit(model_name=test_model, base_url="http://localhost:11434")

            assert limit > 0
            logger.info(f"✓ Hybrid discovery (Ollama dynamic): {limit} tokens for {test_model}")

        except Exception as e:
            pytest.skip(f"Could not test hybrid Ollama discovery: {e}")


if __name__ == "__main__":
    print("\n" + "=" * 70)
    print("INTEGRATION TEST SUITE - Real Service Testing")
    print("=" * 70)
    print("\nThese tests require live services:")
    print("  • LM Studio: http://localhost:1234 (with embedding model loaded)")
    print("  • [Optional] Ollama: http://localhost:11434")
    print("  • [Optional] Node.js + @lmstudio/sdk for SDK bridge tests")
    print("\nRun with: pytest tests/test_prompt_template_e2e.py -v -s")
    print("=" * 70 + "\n")