From 8c988cf98baa7e84f96ba3f93a10bb7400adc8ef Mon Sep 17 00:00:00 2001
From: Andy Lee <andylizf@outlook.com>
Date: Mon, 28 Jul 2025 14:25:48 -0700
Subject: [PATCH] refactor: improve test structure and fix main_cli example

- Move pytest configuration from pytest.ini to pyproject.toml
- Remove unnecessary run_tests.py script (use test extras instead)
- Fix main_cli_example.py to properly use command line arguments for LLM config
- Add test_readme_examples.py to test code examples from README
- Refactor tests to use pytest fixtures and parametrization
- Update test documentation to reflect new structure
- Set proper environment variables in CI for test execution
---
 .github/workflows/build-reusable.yml |  19 +-
 examples/main_cli_example.py         |  16 +-
 pyproject.toml                       |  31 ++++
 tests/README.md                      |  77 +++++---
 tests/test_basic.py                  |  86 +++++++++
 tests/test_ci_basic.py               | 178 -------------------
 tests/test_main_cli.py               | 252 +++++++++++----------------
 tests/test_readme_examples.py        | 103 +++++++++++
 8 files changed, 401 insertions(+), 361 deletions(-)
 create mode 100644 tests/test_basic.py
 delete mode 100644 tests/test_ci_basic.py
 create mode 100644 tests/test_readme_examples.py

diff --git a/.github/workflows/build-reusable.yml b/.github/workflows/build-reusable.yml
index f842db2..1db0eec 100644
--- a/.github/workflows/build-reusable.yml
+++ b/.github/workflows/build-reusable.yml
@@ -207,19 +207,22 @@ jobs:
           fi
           uv pip install --system packages/leann-backend-hnsw/dist/*.whl
           uv pip install --system packages/leann-backend-diskann/dist/*.whl
-          
-          # Install test dependencies
-          uv pip install --system llama-index-core python-dotenv sentence-transformers
 
-      - name: Run basic functionality tests
-        run: |
-          python tests/test_ci_basic.py
+          # Install test dependencies using extras
+          uv pip install --system -e ".[test]"
 
-      - name: Run main_cli tests
+      - name: Run tests with pytest
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          HF_HUB_DISABLE_SYMLINKS: 1
+          TOKENIZERS_PARALLELISM: false
         run: |
-          python tests/test_main_cli.py
+          # Run all tests, continue on macOS failures
+          if [[ "${{ matrix.os }}" == macos-* ]]; then
+            pytest tests/ -x || echo "⚠️ Tests failed on macOS, continuing..."
+          else
+            pytest tests/
+          fi
 
       - name: Run sanity checks (optional)
         run: |
diff --git a/examples/main_cli_example.py b/examples/main_cli_example.py
index 502821c..ae78fbc 100644
--- a/examples/main_cli_example.py
+++ b/examples/main_cli_example.py
@@ -64,9 +64,19 @@ async def main(args):
 
     print("\n[PHASE 2] Starting Leann chat session...")
 
-    llm_config = {"type": "hf", "model": "Qwen/Qwen3-4B"}
-    llm_config = {"type": "ollama", "model": "qwen3:8b"}
-    llm_config = {"type": "openai", "model": "gpt-4o"}
+    # Build llm_config based on command line arguments
+    if args.llm == "simulated":
+        llm_config = {"type": "simulated"}
+    elif args.llm == "ollama":
+        llm_config = {"type": "ollama", "model": args.model, "host": args.host}
+    elif args.llm == "hf":
+        llm_config = {"type": "hf", "model": args.model}
+    elif args.llm == "openai":
+        llm_config = {"type": "openai", "model": args.model}
+    else:
+        raise ValueError(f"Unknown LLM type: {args.llm}")
+
+    print(f"Using LLM: {args.llm} with model: {args.model if args.llm != 'simulated' else 'N/A'}")
 
     chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
     # query = (
diff --git a/pyproject.toml b/pyproject.toml
index aac0f78..0431022 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,6 +49,7 @@ dependencies = [
 dev = [
     "pytest>=7.0",
     "pytest-cov>=4.0",
+    "pytest-xdist>=3.0",  # For parallel test execution
     "black>=23.0",
     "ruff>=0.1.0",
     "matplotlib",
@@ -56,6 +57,15 @@ dev = [
     "pre-commit>=3.5.0",
 ]
 
+test = [
+    "pytest>=7.0",
+    "pytest-timeout>=2.0",
+    "llama-index-core>=0.12.0",
+    "llama-index-readers-file>=0.4.0",
+    "python-dotenv>=1.0.0",
+    "sentence-transformers>=2.2.0",
+]
+
 diskann = [
     "leann-backend-diskann",
 ]
@@ -123,3 +133,24 @@ line-ending = "auto"
 dev = [
     "ruff>=0.12.4",
 ]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "openai: marks tests that require OpenAI API key",
+]
+timeout = 600
+addopts = [
+    "-v",
+    "--tb=short",
+    "--strict-markers",
+    "--disable-warnings",
+]
+env = [
+    "HF_HUB_DISABLE_SYMLINKS=1",
+    "TOKENIZERS_PARALLELISM=false",
+]
diff --git a/tests/README.md b/tests/README.md
index 30419c9..b3cd70b 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -1,52 +1,89 @@
 # LEANN Tests
 
-This directory contains automated tests for the LEANN project, primarily used in CI/CD pipelines.
+This directory contains automated tests for the LEANN project using pytest.
 
 ## Test Files
 
-### `test_ci_basic.py`
+### `test_readme_examples.py`
+Tests the examples shown in README.md:
+- The basic example code that users see first
+- Import statements work correctly
+- Different backend options (HNSW, DiskANN)
+- Different LLM configuration options
+
+### `test_basic.py`
 Basic functionality tests that verify:
 - All packages can be imported correctly
 - C++ extensions (FAISS, DiskANN) load properly
 - Basic index building and searching works for both HNSW and DiskANN backends
+- Uses parametrized tests to test both backends
 
 ### `test_main_cli.py`
 Tests the main CLI example functionality:
 - Tests with facebook/contriever embeddings
 - Tests with OpenAI embeddings (if API key is available)
+- Tests error handling with invalid parameters
 - Verifies that normalized embeddings are detected and cosine distance is used
 
-## Running Tests Locally
+## Running Tests
 
-### Basic tests:
+### Install test dependencies:
 ```bash
-python tests/test_ci_basic.py
+# Using extras
+uv pip install -e ".[test]"
 ```
 
-### Main CLI tests:
+### Run all tests:
 ```bash
-# Without OpenAI API key
-python tests/test_main_cli.py
+pytest tests/
 
-# With OpenAI API key
-OPENAI_API_KEY=your-key-here python tests/test_main_cli.py
+# Or with coverage
+pytest tests/ --cov=leann --cov-report=html
+
+# Run in parallel (faster)
+pytest tests/ -n auto
+```
+
+### Run specific tests:
+```bash
+# Only basic tests
+pytest tests/test_basic.py
+
+# Only tests that don't require OpenAI
+pytest tests/ -m "not openai"
+
+# Skip slow tests
+pytest tests/ -m "not slow"
+```
+
+### Run with specific backend:
+```bash
+# Test only HNSW backend
+pytest tests/test_basic.py::test_backend_basic[hnsw]
+
+# Test only DiskANN backend
+pytest tests/test_basic.py::test_backend_basic[diskann]
 ```
 
 ## CI/CD Integration
 
-These tests are automatically run in the GitHub Actions workflow:
+Tests are automatically run in GitHub Actions:
 1. After building wheel packages
 2. On multiple Python versions (3.9 - 3.13)
 3. On both Ubuntu and macOS
+4. Using pytest with appropriate markers and flags
+
+### pytest.ini Configuration
+
+The `pytest.ini` file configures:
+- Test discovery paths
+- Default timeout (600 seconds)
+- Environment variables (HF_HUB_DISABLE_SYMLINKS, TOKENIZERS_PARALLELISM)
+- Custom markers for slow and OpenAI tests
+- Verbose output with short tracebacks
 
 ### Known Issues
 
-- On macOS, there might be C++ standard library compatibility issues that cause tests to fail
-- The CI is configured to continue on macOS failures to avoid blocking releases
-- OpenAI tests are skipped if no API key is provided in GitHub secrets
-
-## Test Data
-
-Tests use the example data in `examples/data/`:
-- `PrideandPrejudice.txt` - Text file for testing
-- PDF files for document processing tests 
\ No newline at end of file
+- On macOS, tests may fail due to C++ standard library compatibility issues
+- Tests marked with `@pytest.mark.xfail` are expected to fail on macOS
+- OpenAI tests are automatically skipped if no API key is provided
diff --git a/tests/test_basic.py b/tests/test_basic.py
new file mode 100644
index 0000000..6aacd54
--- /dev/null
+++ b/tests/test_basic.py
@@ -0,0 +1,86 @@
+"""
+Basic functionality tests for CI pipeline using pytest.
+"""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+
+def test_imports():
+    """Test that all packages can be imported."""
+
+    # Test C++ extensions
+
+
+@pytest.mark.parametrize("backend_name", ["hnsw", "diskann"])
+def test_backend_basic(backend_name):
+    """Test basic functionality for each backend."""
+    from leann.api import LeannBuilder, LeannSearcher
+
+    # Create temporary directory for index
+    with tempfile.TemporaryDirectory() as temp_dir:
+        index_path = str(Path(temp_dir) / f"test.{backend_name}")
+
+        # Test with small data
+        texts = [f"This is document {i} about topic {i % 5}" for i in range(100)]
+
+        # Configure builder based on backend
+        if backend_name == "hnsw":
+            builder = LeannBuilder(
+                backend_name="hnsw",
+                embedding_model="facebook/contriever",
+                embedding_mode="sentence-transformers",
+                M=16,
+                efConstruction=200,
+            )
+        else:  # diskann
+            builder = LeannBuilder(
+                backend_name="diskann",
+                embedding_model="facebook/contriever",
+                embedding_mode="sentence-transformers",
+                num_neighbors=32,
+                search_list_size=50,
+            )
+
+        # Add texts
+        for text in texts:
+            builder.add_text(text)
+
+        # Build index
+        builder.build_index(index_path)
+
+        # Test search
+        searcher = LeannSearcher(index_path)
+        results = searcher.search(["document about topic 2"], top_k=5)
+
+        # Verify results
+        assert len(results) > 0
+        assert len(results[0]) > 0
+        assert "topic 2" in results[0][0].text or "document" in results[0][0].text
+
+
+@pytest.mark.skipif("sys.platform == 'darwin'", reason="May fail on macOS due to C++ ABI issues")
+def test_large_index():
+    """Test with larger dataset (skip on macOS CI)."""
+    from leann.api import LeannBuilder, LeannSearcher
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        index_path = str(Path(temp_dir) / "test_large.hnsw")
+        texts = [f"Document {i}: {' '.join([f'word{j}' for j in range(50)])}" for i in range(1000)]
+
+        builder = LeannBuilder(
+            backend_name="hnsw",
+            embedding_model="facebook/contriever",
+            embedding_mode="sentence-transformers",
+        )
+
+        for text in texts:
+            builder.add_text(text)
+
+        builder.build_index(index_path)
+
+        searcher = LeannSearcher(index_path)
+        results = searcher.search(["word10 word20"], top_k=10)
+        assert len(results[0]) == 10
diff --git a/tests/test_ci_basic.py b/tests/test_ci_basic.py
deleted file mode 100644
index d677454..0000000
--- a/tests/test_ci_basic.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env python3
-"""
-Basic functionality tests for CI pipeline.
-These tests verify that the built packages work correctly.
-"""
-
-import sys
-import numpy as np
-from pathlib import Path
-
-
-def test_imports():
-    """Test that all packages can be imported."""
-    print("Testing package imports...")
-
-    try:
-        import leann
-
-        print("✅ leann imported successfully")
-    except ImportError as e:
-        print(f"❌ Failed to import leann: {e}")
-        return False
-
-    try:
-        import leann_backend_hnsw
-
-        print("✅ leann_backend_hnsw imported successfully")
-    except ImportError as e:
-        print(f"❌ Failed to import leann_backend_hnsw: {e}")
-        return False
-
-    try:
-        import leann_backend_diskann
-
-        print("✅ leann_backend_diskann imported successfully")
-    except ImportError as e:
-        print(f"❌ Failed to import leann_backend_diskann: {e}")
-        return False
-
-    # Test C++ extensions
-    try:
-        from leann_backend_hnsw import faiss
-
-        print("✅ FAISS loaded successfully")
-    except ImportError as e:
-        print(f"❌ Failed to load FAISS: {e}")
-        return False
-
-    try:
-        import leann_backend_diskann.diskann_backend
-
-        print("✅ DiskANN loaded successfully")
-    except ImportError as e:
-        print(f"❌ Failed to load DiskANN: {e}")
-        return False
-
-    return True
-
-
-def test_hnsw_basic():
-    """Test basic HNSW functionality."""
-    print("\nTesting HNSW basic functionality...")
-
-    try:
-        from leann.api import LeannBuilder
-
-        # Test with small random data
-        data = np.random.rand(100, 768).astype(np.float32)
-        texts = [f"Text {i}" for i in range(100)]
-
-        builder = LeannBuilder(
-            backend_name="hnsw",
-            embedding_model="facebook/contriever",
-            embedding_mode="sentence-transformers",
-            dimensions=768,
-            M=16,
-            efConstruction=200,
-        )
-
-        # Build in-memory index
-        index = builder.build_memory_index(data, texts)
-        print("✅ HNSW index built successfully")
-
-        # Test search
-        results = index.search(["test query"], top_k=5)
-        print(f"✅ Search completed, found {len(results[0])} results")
-
-        return True
-    except Exception as e:
-        print(f"❌ HNSW test failed: {e}")
-        import traceback
-
-        traceback.print_exc()
-        return False
-
-
-def test_diskann_basic():
-    """Test basic DiskANN functionality."""
-    print("\nTesting DiskANN basic functionality...")
-
-    try:
-        from leann.api import LeannBuilder
-        import tempfile
-        import shutil
-
-        # Test with small random data
-        data = np.random.rand(100, 768).astype(np.float32)
-        texts = [f"Text {i}" for i in range(100)]
-
-        # Create temporary directory for index
-        temp_dir = tempfile.mkdtemp()
-        index_path = str(Path(temp_dir) / "test.diskann")
-
-        try:
-            builder = LeannBuilder(
-                backend_name="diskann",
-                embedding_model="facebook/contriever",
-                embedding_mode="sentence-transformers",
-                dimensions=768,
-                num_neighbors=32,
-                search_list_size=50,
-            )
-
-            # Build disk index
-            builder.build_index(index_path, texts=texts, embeddings=data)
-            print("✅ DiskANN index built successfully")
-
-            # Test search
-            from leann.api import LeannSearcher
-
-            searcher = LeannSearcher(index_path)
-            results = searcher.search(["test query"], top_k=5)
-            print(f"✅ DiskANN search completed, found {len(results[0])} results")
-
-            return True
-        finally:
-            # Clean up
-            shutil.rmtree(temp_dir, ignore_errors=True)
-
-    except Exception as e:
-        print(f"❌ DiskANN test failed: {e}")
-        import traceback
-
-        traceback.print_exc()
-        return False
-
-
-def main():
-    """Run all tests."""
-    print("=" * 60)
-    print("Running CI Basic Functionality Tests")
-    print("=" * 60)
-
-    all_passed = True
-
-    # Test imports
-    if not test_imports():
-        all_passed = False
-
-    # Test HNSW
-    if not test_hnsw_basic():
-        all_passed = False
-
-    # Test DiskANN
-    if not test_diskann_basic():
-        all_passed = False
-
-    print("\n" + "=" * 60)
-    if all_passed:
-        print("✅ All tests passed!")
-        return 0
-    else:
-        print("❌ Some tests failed!")
-        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/tests/test_main_cli.py b/tests/test_main_cli.py
index e80a794..b42e043 100644
--- a/tests/test_main_cli.py
+++ b/tests/test_main_cli.py
@@ -1,166 +1,114 @@
-#!/usr/bin/env python3
 """
-Test main_cli_example functionality.
-This test is specifically designed to work in CI environments.
+Test main_cli_example functionality using pytest.
 """
 
-import sys
 import os
 import subprocess
-import shutil
+import sys
+import tempfile
 from pathlib import Path
 
-
-def test_main_cli_basic():
-    """Test main_cli with basic settings."""
-    print("Testing main_cli with facebook/contriever...")
-    
-    # Clean up any existing test index
-    test_index = Path("./test_index")
-    if test_index.exists():
-        shutil.rmtree(test_index)
-    
-    cmd = [
-        sys.executable,
-        "examples/main_cli_example.py",
-        "--llm", "simulated",
-        "--embedding-model", "facebook/contriever",
-        "--embedding-mode", "sentence-transformers",
-        "--index-dir", "./test_index",
-        "--data-dir", "examples/data",
-        "--query", "What is Pride and Prejudice about?"
-    ]
-    
-    try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=300  # 5 minute timeout
-        )
-        
-        if result.returncode != 0:
-            print(f"❌ main_cli failed with return code {result.returncode}")
-            print(f"STDOUT:\n{result.stdout}")
-            print(f"STDERR:\n{result.stderr}")
-            return False
-        
-        print("✅ main_cli completed successfully")
-        
-        # Check if index was created
-        if not test_index.exists():
-            print("❌ Index directory was not created")
-            return False
-        
-        print("✅ Index directory created")
-        return True
-        
-    except subprocess.TimeoutExpired:
-        print("❌ main_cli timed out after 5 minutes")
-        return False
-    except Exception as e:
-        print(f"❌ main_cli failed with exception: {e}")
-        return False
-    finally:
-        # Clean up
-        if test_index.exists():
-            shutil.rmtree(test_index)
+import pytest
 
 
-def test_main_cli_openai():
-    """Test main_cli with OpenAI embeddings if API key is available."""
-    if not os.environ.get("OPENAI_API_KEY"):
-        print("Skipping OpenAI test - no API key found")
-        return True
-    
-    print("Testing main_cli with OpenAI text-embedding-3-small...")
-    
-    # Clean up any existing test index
-    test_index = Path("./test_index_openai")
-    if test_index.exists():
-        shutil.rmtree(test_index)
-    
-    cmd = [
-        sys.executable,
-        "examples/main_cli_example.py",
-        "--llm", "simulated",
-        "--embedding-model", "text-embedding-3-small",
-        "--embedding-mode", "openai",
-        "--index-dir", "./test_index_openai",
-        "--data-dir", "examples/data",
-        "--query", "What is Pride and Prejudice about?"
-    ]
-    
-    try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=300,
-            env={**os.environ, "TOKENIZERS_PARALLELISM": "false"}
-        )
-        
-        if result.returncode != 0:
-            print(f"❌ main_cli with OpenAI failed with return code {result.returncode}")
-            print(f"STDOUT:\n{result.stdout}")
-            print(f"STDERR:\n{result.stderr}")
-            return False
-        
-        print("✅ main_cli with OpenAI completed successfully")
-        
+@pytest.fixture
+def test_data_dir():
+    """Return the path to test data directory."""
+    return Path("examples/data")
+
+
+def test_main_cli_simulated(test_data_dir):
+    """Test main_cli with simulated LLM."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cmd = [
+            sys.executable,
+            "examples/main_cli_example.py",
+            "--llm",
+            "simulated",
+            "--embedding-model",
+            "facebook/contriever",
+            "--embedding-mode",
+            "sentence-transformers",
+            "--index-dir",
+            temp_dir,
+            "--data-dir",
+            str(test_data_dir),
+            "--query",
+            "What is Pride and Prejudice about?",
+        ]
+
+        env = os.environ.copy()
+        env["HF_HUB_DISABLE_SYMLINKS"] = "1"
+        env["TOKENIZERS_PARALLELISM"] = "false"
+
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
+
+        # Check return code
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+
+        # Verify output
+        output = result.stdout + result.stderr
+        assert "Leann index built at" in output or "Using existing index" in output
+        assert "This is a simulated answer" in output
+
+
+@pytest.mark.skipif(not os.environ.get("OPENAI_API_KEY"), reason="OpenAI API key not available")
+def test_main_cli_openai(test_data_dir):
+    """Test main_cli with OpenAI embeddings."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cmd = [
+            sys.executable,
+            "examples/main_cli_example.py",
+            "--llm",
+            "simulated",  # Use simulated LLM to avoid GPT-4 costs
+            "--embedding-model",
+            "text-embedding-3-small",
+            "--embedding-mode",
+            "openai",
+            "--index-dir",
+            temp_dir,
+            "--data-dir",
+            str(test_data_dir),
+            "--query",
+            "What is Pride and Prejudice about?",
+        ]
+
+        env = os.environ.copy()
+        env["TOKENIZERS_PARALLELISM"] = "false"
+
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
+
+        assert result.returncode == 0, f"Command failed: {result.stderr}"
+
         # Verify cosine distance was used
-        if "distance_metric='cosine'" in result.stdout or "distance_metric='cosine'" in result.stderr:
-            print("✅ Correctly detected normalized embeddings and used cosine distance")
-        else:
-            print("⚠️  Could not verify cosine distance was used")
-        
-        return True
-        
-    except subprocess.TimeoutExpired:
-        print("❌ main_cli with OpenAI timed out after 5 minutes")
-        return False
-    except Exception as e:
-        print(f"❌ main_cli with OpenAI failed with exception: {e}")
-        return False
-    finally:
-        # Clean up
-        if test_index.exists():
-            shutil.rmtree(test_index)
+        output = result.stdout + result.stderr
+        assert any(
+            msg in output
+            for msg in [
+                "distance_metric='cosine'",
+                "Automatically setting distance_metric='cosine'",
+                "Using cosine distance",
+            ]
+        )
 
 
-def main():
-    """Run all main_cli tests."""
-    print("=" * 60)
-    print("Running main_cli Tests")
-    print("=" * 60)
-    
-    # Set environment variables
-    os.environ["HF_HUB_DISABLE_SYMLINKS"] = "1"
-    os.environ["TOKENIZERS_PARALLELISM"] = "false"
-    
-    all_passed = True
-    
-    # Test basic functionality
-    if not test_main_cli_basic():
-        all_passed = False
-        # On macOS, this might be due to C++ library issues
-        if sys.platform == "darwin":
-            print("⚠️  main_cli test failed on macOS, this might be due to the C++ library issue")
-            print("Continuing tests...")
-            all_passed = True  # Don't fail CI on macOS
-    
-    # Test with OpenAI if available
-    if not test_main_cli_openai():
-        all_passed = False
-    
-    print("\n" + "=" * 60)
-    if all_passed:
-        print("✅ All main_cli tests passed!")
-        return 0
-    else:
-        print("❌ Some main_cli tests failed!")
-        return 1
+@pytest.mark.xfail(sys.platform == "darwin", reason="May fail on macOS due to C++ ABI issues")
+def test_main_cli_error_handling(test_data_dir):
+    """Test main_cli with invalid parameters."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cmd = [
+            sys.executable,
+            "examples/main_cli_example.py",
+            "--llm",
+            "invalid_llm_type",
+            "--index-dir",
+            temp_dir,
+            "--data-dir",
+            str(test_data_dir),
+        ]
 
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
 
-if __name__ == "__main__":
-    sys.exit(main()) 
\ No newline at end of file
+        # Should fail with invalid LLM type
+        assert result.returncode != 0
+        assert "Unknown LLM type" in result.stderr or "invalid_llm_type" in result.stderr
diff --git a/tests/test_readme_examples.py b/tests/test_readme_examples.py
new file mode 100644
index 0000000..48d7217
--- /dev/null
+++ b/tests/test_readme_examples.py
@@ -0,0 +1,103 @@
+"""
+Test examples from README.md to ensure documentation is accurate.
+"""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+
+def test_readme_basic_example():
+    """Test the basic example from README.md."""
+    # This is the exact code from README
+    from leann import LeannBuilder, LeannChat, LeannSearcher
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        INDEX_PATH = str(Path(temp_dir) / "demo.leann")
+
+        # Build an index
+        builder = LeannBuilder(backend_name="hnsw")
+        builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
+        builder.add_text("Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back")
+        builder.build_index(INDEX_PATH)
+
+        # Verify index was created
+        assert Path(INDEX_PATH).exists()
+
+        # Search
+        searcher = LeannSearcher(INDEX_PATH)
+        results = searcher.search("fantastical AI-generated creatures", top_k=1)
+
+        # Verify search results
+        assert len(results) > 0
+        assert len(results[0]) == 1  # top_k=1
+        # The second text about banana-crocodile should be more relevant
+        assert "banana" in results[0][0].text or "crocodile" in results[0][0].text
+
+        # Chat with your data (using simulated LLM to avoid external dependencies)
+        chat = LeannChat(INDEX_PATH, llm_config={"type": "simulated"})
+        response = chat.ask("How much storage does LEANN save?", top_k=1)
+
+        # Verify chat works
+        assert isinstance(response, str)
+        assert len(response) > 0
+
+
+def test_readme_imports():
+    """Test that the imports shown in README work correctly."""
+    # These are the imports shown in README
+    from leann import LeannBuilder, LeannChat, LeannSearcher
+
+    # Verify they are the correct types
+    assert callable(LeannBuilder)
+    assert callable(LeannSearcher)
+    assert callable(LeannChat)
+
+
+def test_backend_options():
+    """Test different backend options mentioned in documentation."""
+    from leann import LeannBuilder
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Test HNSW backend (as shown in README)
+        hnsw_path = str(Path(temp_dir) / "test_hnsw.leann")
+        builder_hnsw = LeannBuilder(backend_name="hnsw")
+        builder_hnsw.add_text("Test document for HNSW backend")
+        builder_hnsw.build_index(hnsw_path)
+        assert Path(hnsw_path).exists()
+
+        # Test DiskANN backend (mentioned as available option)
+        diskann_path = str(Path(temp_dir) / "test_diskann.leann")
+        builder_diskann = LeannBuilder(backend_name="diskann")
+        builder_diskann.add_text("Test document for DiskANN backend")
+        builder_diskann.build_index(diskann_path)
+        assert Path(diskann_path).exists()
+
+
+@pytest.mark.parametrize("llm_type", ["simulated", "hf"])
+def test_llm_config_options(llm_type):
+    """Test different LLM configuration options shown in documentation."""
+    from leann import LeannBuilder, LeannChat
+
+    if llm_type == "hf":
+        pytest.importorskip("transformers")  # Skip if transformers not installed
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Build a simple index
+        index_path = str(Path(temp_dir) / "test.leann")
+        builder = LeannBuilder(backend_name="hnsw")
+        builder.add_text("Test document for LLM testing")
+        builder.build_index(index_path)
+
+        # Test LLM config
+        if llm_type == "simulated":
+            llm_config = {"type": "simulated"}
+        else:  # hf
+            llm_config = {"type": "hf", "model": "Qwen/Qwen3-0.6B"}
+
+        chat = LeannChat(index_path, llm_config=llm_config)
+        response = chat.ask("What is this document about?", top_k=1)
+
+        assert isinstance(response, str)
+        assert len(response) > 0