refactor: improve test structure and fix main_cli example

- Move pytest configuration from pytest.ini to pyproject.toml - Remove unnecessary run_tests.py script (use test extras instead) - Fix main_cli_example.py to properly use command line arguments for LLM config - Add test_readme_examples.py to test code examples from README - Refactor tests to use pytest fixtures and parametrization - Update test documentation to reflect new structure - Set proper environment variables in CI for test execution
2025-07-28 14:25:48 -07:00
parent ac5fd844a5
commit 8c988cf98b
8 changed files with 401 additions and 361 deletions
--- a/tests/test_main_cli.py
+++ b/tests/test_main_cli.py
@@ -1,166 +1,114 @@
-#!/usr/bin/env python3
 """
-Test main_cli_example functionality.
-This test is specifically designed to work in CI environments.
+Test main_cli_example functionality using pytest.
 """

-import sys
 import os
 import subprocess
-import shutil
+import sys
+import tempfile
 from pathlib import Path

-
-def test_main_cli_basic():
-    """Test main_cli with basic settings."""
-    print("Testing main_cli with facebook/contriever...")
-    
-    # Clean up any existing test index
-    test_index = Path("./test_index")
-    if test_index.exists():
-        shutil.rmtree(test_index)
-    
-    cmd = [
-        sys.executable,
-        "examples/main_cli_example.py",
-        "--llm", "simulated",
-        "--embedding-model", "facebook/contriever",
-        "--embedding-mode", "sentence-transformers",
-        "--index-dir", "./test_index",
-        "--data-dir", "examples/data",
-        "--query", "What is Pride and Prejudice about?"
-    ]
-    
-    try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=300  # 5 minute timeout
-        )
-        
-        if result.returncode != 0:
-            print(f"❌ main_cli failed with return code {result.returncode}")
-            print(f"STDOUT:\n{result.stdout}")
-            print(f"STDERR:\n{result.stderr}")
-            return False
-        
-        print("✅ main_cli completed successfully")
-        
-        # Check if index was created
-        if not test_index.exists():
-            print("❌ Index directory was not created")
-            return False
-        
-        print("✅ Index directory created")
-        return True
-        
-    except subprocess.TimeoutExpired:
-        print("❌ main_cli timed out after 5 minutes")
-        return False
-    except Exception as e:
-        print(f"❌ main_cli failed with exception: {e}")
-        return False
-    finally:
-        # Clean up
-        if test_index.exists():
-            shutil.rmtree(test_index)
+import pytest


-def test_main_cli_openai():
-    """Test main_cli with OpenAI embeddings if API key is available."""
-    if not os.environ.get("OPENAI_API_KEY"):
-        print("Skipping OpenAI test - no API key found")
-        return True
-    
-    print("Testing main_cli with OpenAI text-embedding-3-small...")
-    
-    # Clean up any existing test index
-    test_index = Path("./test_index_openai")
-    if test_index.exists():
-        shutil.rmtree(test_index)
-    
-    cmd = [
-        sys.executable,
-        "examples/main_cli_example.py",
-        "--llm", "simulated",
-        "--embedding-model", "text-embedding-3-small",
-        "--embedding-mode", "openai",
-        "--index-dir", "./test_index_openai",
-        "--data-dir", "examples/data",
-        "--query", "What is Pride and Prejudice about?"
-    ]
-    
-    try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=300,
-            env={**os.environ, "TOKENIZERS_PARALLELISM": "false"}
-        )
-        
-        if result.returncode != 0:
-            print(f"❌ main_cli with OpenAI failed with return code {result.returncode}")
-            print(f"STDOUT:\n{result.stdout}")
-            print(f"STDERR:\n{result.stderr}")
-            return False
-        
-        print("✅ main_cli with OpenAI completed successfully")
-        
+@pytest.fixture
+def test_data_dir():
+    """Return the path to test data directory."""
+    return Path("examples/data")
+
+
+def test_main_cli_simulated(test_data_dir):
+    """Test main_cli with simulated LLM."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cmd = [
+            sys.executable,
+            "examples/main_cli_example.py",
+            "--llm",
+            "simulated",
+            "--embedding-model",
+            "facebook/contriever",
+            "--embedding-mode",
+            "sentence-transformers",
+            "--index-dir",
+            temp_dir,
+            "--data-dir",
+            str(test_data_dir),
+            "--query",
+            "What is Pride and Prejudice about?",
+        ]
+
+        env = os.environ.copy()
+        env["HF_HUB_DISABLE_SYMLINKS"] = "1"
+        env["TOKENIZERS_PARALLELISM"] = "false"
+
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
+
+        # Check return code
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+
+        # Verify output
+        output = result.stdout + result.stderr
+        assert "Leann index built at" in output or "Using existing index" in output
+        assert "This is a simulated answer" in output
+
+
+@pytest.mark.skipif(not os.environ.get("OPENAI_API_KEY"), reason="OpenAI API key not available")
+def test_main_cli_openai(test_data_dir):
+    """Test main_cli with OpenAI embeddings."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cmd = [
+            sys.executable,
+            "examples/main_cli_example.py",
+            "--llm",
+            "simulated",  # Use simulated LLM to avoid GPT-4 costs
+            "--embedding-model",
+            "text-embedding-3-small",
+            "--embedding-mode",
+            "openai",
+            "--index-dir",
+            temp_dir,
+            "--data-dir",
+            str(test_data_dir),
+            "--query",
+            "What is Pride and Prejudice about?",
+        ]
+
+        env = os.environ.copy()
+        env["TOKENIZERS_PARALLELISM"] = "false"
+
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
+
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+
        # Verify cosine distance was used
-        if "distance_metric='cosine'" in result.stdout or "distance_metric='cosine'" in result.stderr:
-            print("✅ Correctly detected normalized embeddings and used cosine distance")
-        else:
-            print("⚠️  Could not verify cosine distance was used")
-        
-        return True
-        
-    except subprocess.TimeoutExpired:
-        print("❌ main_cli with OpenAI timed out after 5 minutes")
-        return False
-    except Exception as e:
-        print(f"❌ main_cli with OpenAI failed with exception: {e}")
-        return False
-    finally:
-        # Clean up
-        if test_index.exists():
-            shutil.rmtree(test_index)
+        output = result.stdout + result.stderr
+        assert any(
+            msg in output
+            for msg in [
+                "distance_metric='cosine'",
+                "Automatically setting distance_metric='cosine'",
+                "Using cosine distance",
+            ]
+        )


-def main():
-    """Run all main_cli tests."""
-    print("=" * 60)
-    print("Running main_cli Tests")
-    print("=" * 60)
-    
-    # Set environment variables
-    os.environ["HF_HUB_DISABLE_SYMLINKS"] = "1"
-    os.environ["TOKENIZERS_PARALLELISM"] = "false"
-    
-    all_passed = True
-    
-    # Test basic functionality
-    if not test_main_cli_basic():
-        all_passed = False
-        # On macOS, this might be due to C++ library issues
-        if sys.platform == "darwin":
-            print("⚠️  main_cli test failed on macOS, this might be due to the C++ library issue")
-            print("Continuing tests...")
-            all_passed = True  # Don't fail CI on macOS
-    
-    # Test with OpenAI if available
-    if not test_main_cli_openai():
-        all_passed = False
-    
-    print("\n" + "=" * 60)
-    if all_passed:
-        print("✅ All main_cli tests passed!")
-        return 0
-    else:
-        print("❌ Some main_cli tests failed!")
-        return 1
+@pytest.mark.xfail(sys.platform == "darwin", reason="May fail on macOS due to C++ ABI issues")
+def test_main_cli_error_handling(test_data_dir):
+    """Test main_cli with invalid parameters."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cmd = [
+            sys.executable,
+            "examples/main_cli_example.py",
+            "--llm",
+            "invalid_llm_type",
+            "--index-dir",
+            temp_dir,
+            "--data-dir",
+            str(test_data_dir),
+        ]

+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)

-if __name__ == "__main__":
-    sys.exit(main()) 
+        # Should fail with invalid LLM type
+        assert result.returncode != 0
+        assert "Unknown LLM type" in result.stderr or "invalid_llm_type" in result.stderr