LEANN/tests/test_basic.py

"""
Basic functionality tests for CI pipeline using pytest.
"""

import os
import tempfile
from pathlib import Path

import pytest


def test_imports():
    """Test that all packages can be imported."""

    # Test C++ extensions


@pytest.mark.skipif(
    os.environ.get("CI") == "true", reason="Skip model tests in CI to avoid MPS memory issues"
)
@pytest.mark.parametrize("backend_name", ["hnsw", "diskann"])
def test_backend_basic(backend_name):
    """Test basic functionality for each backend."""
    from leann.api import LeannBuilder, LeannSearcher, SearchResult

    # Create temporary directory for index
    with tempfile.TemporaryDirectory() as temp_dir:
        index_path = str(Path(temp_dir) / f"test.{backend_name}")

        # Test with small data
        texts = [f"This is document {i} about topic {i % 5}" for i in range(100)]

        # Configure builder based on backend
        if backend_name == "hnsw":
            builder = LeannBuilder(
                backend_name="hnsw",
                embedding_model="facebook/contriever",
                embedding_mode="sentence-transformers",
                M=16,
                efConstruction=200,
            )
        else:  # diskann
            builder = LeannBuilder(
                backend_name="diskann",
                embedding_model="facebook/contriever",
                embedding_mode="sentence-transformers",
                num_neighbors=32,
                search_list_size=50,
            )

        # Add texts
        for text in texts:
            builder.add_text(text)

        # Build index
        builder.build_index(index_path)

        # Test search
        searcher = LeannSearcher(index_path)
        results = searcher.search("document about topic 2", top_k=5)

        # Verify results
        assert len(results) > 0
        assert isinstance(results[0], SearchResult)
        assert "topic 2" in results[0].text or "document" in results[0].text


@pytest.mark.skipif(
    os.environ.get("CI") == "true", reason="Skip model tests in CI to avoid MPS memory issues"
)
def test_large_index():
    """Test with larger dataset."""
    from leann.api import LeannBuilder, LeannSearcher

    with tempfile.TemporaryDirectory() as temp_dir:
        index_path = str(Path(temp_dir) / "test_large.hnsw")
        texts = [f"Document {i}: {' '.join([f'word{j}' for j in range(50)])}" for i in range(1000)]

        builder = LeannBuilder(
            backend_name="hnsw",
            embedding_model="facebook/contriever",
            embedding_mode="sentence-transformers",
        )

        for text in texts:
            builder.add_text(text)

        builder.build_index(index_path)

        searcher = LeannSearcher(index_path)
        results = searcher.search(["word10 word20"], top_k=10)
        assert len(results[0]) == 10