tests: diskann and partition
This commit is contained in:
@@ -6,10 +6,11 @@ This directory contains automated tests for the LEANN project using pytest.
|
|||||||
|
|
||||||
### `test_readme_examples.py`
|
### `test_readme_examples.py`
|
||||||
Tests the examples shown in README.md:
|
Tests the examples shown in README.md:
|
||||||
- The basic example code that users see first
|
- The basic example code that users see first (parametrized for both HNSW and DiskANN backends)
|
||||||
- Import statements work correctly
|
- Import statements work correctly
|
||||||
- Different backend options (HNSW, DiskANN)
|
- Different backend options (HNSW, DiskANN)
|
||||||
- Different LLM configuration options
|
- Different LLM configuration options (parametrized for both backends)
|
||||||
|
- **All main README examples are tested with both HNSW and DiskANN backends using pytest parametrization**
|
||||||
|
|
||||||
### `test_basic.py`
|
### `test_basic.py`
|
||||||
Basic functionality tests that verify:
|
Basic functionality tests that verify:
|
||||||
@@ -25,6 +26,16 @@ Tests the document RAG example functionality:
|
|||||||
- Tests error handling with invalid parameters
|
- Tests error handling with invalid parameters
|
||||||
- Verifies that normalized embeddings are detected and cosine distance is used
|
- Verifies that normalized embeddings are detected and cosine distance is used
|
||||||
|
|
||||||
|
### `test_diskann_partition.py`
|
||||||
|
Tests DiskANN graph partitioning functionality:
|
||||||
|
- Tests DiskANN index building without partitioning (baseline)
|
||||||
|
- Tests automatic graph partitioning with `is_recompute=True`
|
||||||
|
- Verifies that partition files are created and large files are cleaned up for storage saving
|
||||||
|
- Tests search functionality with partitioned indices
|
||||||
|
- Validates medoid and max_base_norm file generation and usage
|
||||||
|
- Includes performance comparison between DiskANN (with partition) and HNSW
|
||||||
|
- **Note**: These tests are skipped in CI due to hardware requirements and computation time
|
||||||
|
|
||||||
## Running Tests
|
## Running Tests
|
||||||
|
|
||||||
### Install test dependencies:
|
### Install test dependencies:
|
||||||
@@ -54,15 +65,23 @@ pytest tests/ -m "not openai"
|
|||||||
|
|
||||||
# Skip slow tests
|
# Skip slow tests
|
||||||
pytest tests/ -m "not slow"
|
pytest tests/ -m "not slow"
|
||||||
|
|
||||||
|
# Run DiskANN partition tests (requires local machine, not CI)
|
||||||
|
pytest tests/test_diskann_partition.py
|
||||||
```
|
```
|
||||||
|
|
||||||
### Run with specific backend:
|
### Run with specific backend:
|
||||||
```bash
|
```bash
|
||||||
# Test only HNSW backend
|
# Test only HNSW backend
|
||||||
pytest tests/test_basic.py::test_backend_basic[hnsw]
|
pytest tests/test_basic.py::test_backend_basic[hnsw]
|
||||||
|
pytest tests/test_readme_examples.py::test_readme_basic_example[hnsw]
|
||||||
|
|
||||||
# Test only DiskANN backend
|
# Test only DiskANN backend
|
||||||
pytest tests/test_basic.py::test_backend_basic[diskann]
|
pytest tests/test_basic.py::test_backend_basic[diskann]
|
||||||
|
pytest tests/test_readme_examples.py::test_readme_basic_example[diskann]
|
||||||
|
|
||||||
|
# All DiskANN tests (parametrized + specialized partition tests)
|
||||||
|
pytest tests/ -k diskann
|
||||||
```
|
```
|
||||||
|
|
||||||
## CI/CD Integration
|
## CI/CD Integration
|
||||||
|
|||||||
369
tests/test_diskann_partition.py
Normal file
369
tests/test_diskann_partition.py
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
"""
|
||||||
|
Test DiskANN graph partitioning functionality.
|
||||||
|
|
||||||
|
Tests the automatic graph partitioning feature that was implemented to save
|
||||||
|
storage space by partitioning large DiskANN indices and safely deleting
|
||||||
|
redundant files while maintaining search functionality.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("CI") == "true",
|
||||||
|
reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
|
||||||
|
)
|
||||||
|
def test_diskann_without_partition():
|
||||||
|
"""Test DiskANN index building without partition (baseline)."""
|
||||||
|
from leann.api import LeannBuilder, LeannSearcher
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
index_path = str(Path(temp_dir) / "test_no_partition.leann")
|
||||||
|
|
||||||
|
# Test data - enough to trigger index building
|
||||||
|
texts = [
|
||||||
|
f"Document {i} discusses topic {i % 10} with detailed analysis of subject {i // 10}."
|
||||||
|
for i in range(500)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Build without partition (is_recompute=False)
|
||||||
|
builder = LeannBuilder(
|
||||||
|
backend_name="diskann",
|
||||||
|
embedding_model="facebook/contriever",
|
||||||
|
embedding_mode="sentence-transformers",
|
||||||
|
num_neighbors=32,
|
||||||
|
search_list_size=50,
|
||||||
|
is_recompute=False, # No partition
|
||||||
|
)
|
||||||
|
|
||||||
|
for text in texts:
|
||||||
|
builder.add_text(text)
|
||||||
|
|
||||||
|
builder.build_index(index_path)
|
||||||
|
|
||||||
|
# Verify index was created
|
||||||
|
index_dir = Path(index_path).parent
|
||||||
|
assert index_dir.exists()
|
||||||
|
|
||||||
|
# Check that traditional DiskANN files exist
|
||||||
|
index_prefix = Path(index_path).stem
|
||||||
|
# Core DiskANN files (beam search index may not be created for small datasets)
|
||||||
|
required_files = [
|
||||||
|
f"{index_prefix}_disk.index",
|
||||||
|
f"{index_prefix}_pq_compressed.bin",
|
||||||
|
f"{index_prefix}_pq_pivots.bin",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Check all generated files first for debugging
|
||||||
|
generated_files = [f.name for f in index_dir.glob(f"{index_prefix}*")]
|
||||||
|
print(f"Generated files: {generated_files}")
|
||||||
|
|
||||||
|
for required_file in required_files:
|
||||||
|
file_path = index_dir / required_file
|
||||||
|
assert file_path.exists(), f"Required file {required_file} not found"
|
||||||
|
|
||||||
|
# Ensure no partition files exist in non-partition mode
|
||||||
|
partition_files = [f"{index_prefix}_disk_graph.index", f"{index_prefix}_partition.bin"]
|
||||||
|
|
||||||
|
for partition_file in partition_files:
|
||||||
|
file_path = index_dir / partition_file
|
||||||
|
assert (
|
||||||
|
not file_path.exists()
|
||||||
|
), f"Partition file {partition_file} should not exist in non-partition mode"
|
||||||
|
|
||||||
|
# Test search functionality
|
||||||
|
searcher = LeannSearcher(index_path)
|
||||||
|
results = searcher.search("topic 3 analysis", top_k=3)
|
||||||
|
|
||||||
|
assert len(results) > 0
|
||||||
|
assert all(result.score is not None and result.score != float("-inf") for result in results)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("CI") == "true",
|
||||||
|
reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
|
||||||
|
)
|
||||||
|
def test_diskann_with_partition():
|
||||||
|
"""Test DiskANN index building with automatic graph partitioning."""
|
||||||
|
from leann.api import LeannBuilder
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
index_path = str(Path(temp_dir) / "test_with_partition.leann")
|
||||||
|
|
||||||
|
# Test data - enough to trigger partitioning
|
||||||
|
texts = [
|
||||||
|
f"Document {i} explores subject {i % 15} with comprehensive coverage of area {i // 15}."
|
||||||
|
for i in range(500)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Build with partition (is_recompute=True)
|
||||||
|
builder = LeannBuilder(
|
||||||
|
backend_name="diskann",
|
||||||
|
embedding_model="facebook/contriever",
|
||||||
|
embedding_mode="sentence-transformers",
|
||||||
|
num_neighbors=32,
|
||||||
|
search_list_size=50,
|
||||||
|
is_recompute=True, # Enable automatic partitioning
|
||||||
|
)
|
||||||
|
|
||||||
|
for text in texts:
|
||||||
|
builder.add_text(text)
|
||||||
|
|
||||||
|
builder.build_index(index_path)
|
||||||
|
|
||||||
|
# Verify index was created
|
||||||
|
index_dir = Path(index_path).parent
|
||||||
|
assert index_dir.exists()
|
||||||
|
|
||||||
|
# Check that partition files exist
|
||||||
|
index_prefix = Path(index_path).stem
|
||||||
|
partition_files = [
|
||||||
|
f"{index_prefix}_disk_graph.index", # Partitioned graph
|
||||||
|
f"{index_prefix}_partition.bin", # Partition metadata
|
||||||
|
f"{index_prefix}_pq_compressed.bin",
|
||||||
|
f"{index_prefix}_pq_pivots.bin",
|
||||||
|
]
|
||||||
|
|
||||||
|
for partition_file in partition_files:
|
||||||
|
file_path = index_dir / partition_file
|
||||||
|
assert file_path.exists(), f"Expected partition file {partition_file} not found"
|
||||||
|
|
||||||
|
# Check that large files were cleaned up (storage saving goal)
|
||||||
|
large_files = [f"{index_prefix}_disk.index", f"{index_prefix}_disk_beam_search.index"]
|
||||||
|
|
||||||
|
for large_file in large_files:
|
||||||
|
file_path = index_dir / large_file
|
||||||
|
assert (
|
||||||
|
not file_path.exists()
|
||||||
|
), f"Large file {large_file} should have been deleted for storage saving"
|
||||||
|
|
||||||
|
# Verify required auxiliary files for partition mode exist
|
||||||
|
required_files = [
|
||||||
|
f"{index_prefix}_disk.index_medoids.bin",
|
||||||
|
f"{index_prefix}_disk.index_max_base_norm.bin",
|
||||||
|
]
|
||||||
|
|
||||||
|
for req_file in required_files:
|
||||||
|
file_path = index_dir / req_file
|
||||||
|
assert (
|
||||||
|
file_path.exists()
|
||||||
|
), f"Required auxiliary file {req_file} missing for partition mode"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("CI") == "true",
|
||||||
|
reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
|
||||||
|
)
|
||||||
|
def test_diskann_partition_search_functionality():
|
||||||
|
"""Test that search works correctly with partitioned indices."""
|
||||||
|
from leann.api import LeannBuilder, LeannSearcher
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
index_path = str(Path(temp_dir) / "test_partition_search.leann")
|
||||||
|
|
||||||
|
# Create diverse test data
|
||||||
|
texts = [
|
||||||
|
"LEANN is a storage-efficient approximate nearest neighbor search system.",
|
||||||
|
"Graph partitioning helps reduce memory usage in large scale vector search.",
|
||||||
|
"DiskANN provides high-performance disk-based approximate nearest neighbor search.",
|
||||||
|
"Vector embeddings enable semantic search over unstructured text data.",
|
||||||
|
"Approximate nearest neighbor algorithms trade accuracy for speed and storage.",
|
||||||
|
] * 100 # Repeat to get enough data
|
||||||
|
|
||||||
|
# Build with partitioning
|
||||||
|
builder = LeannBuilder(
|
||||||
|
backend_name="diskann",
|
||||||
|
embedding_model="facebook/contriever",
|
||||||
|
embedding_mode="sentence-transformers",
|
||||||
|
is_recompute=True, # Enable partitioning
|
||||||
|
)
|
||||||
|
|
||||||
|
for text in texts:
|
||||||
|
builder.add_text(text)
|
||||||
|
|
||||||
|
builder.build_index(index_path)
|
||||||
|
|
||||||
|
# Test search with partitioned index
|
||||||
|
searcher = LeannSearcher(index_path)
|
||||||
|
|
||||||
|
# Test various queries
|
||||||
|
test_queries = [
|
||||||
|
("vector search algorithms", 5),
|
||||||
|
("LEANN storage efficiency", 3),
|
||||||
|
("graph partitioning memory", 4),
|
||||||
|
("approximate nearest neighbor", 7),
|
||||||
|
]
|
||||||
|
|
||||||
|
for query, top_k in test_queries:
|
||||||
|
results = searcher.search(query, top_k=top_k)
|
||||||
|
|
||||||
|
# Verify search results
|
||||||
|
assert len(results) == top_k, f"Expected {top_k} results for query '{query}'"
|
||||||
|
assert all(
|
||||||
|
result.score is not None for result in results
|
||||||
|
), "All results should have scores"
|
||||||
|
assert all(
|
||||||
|
result.score != float("-inf") for result in results
|
||||||
|
), "No result should have -inf score"
|
||||||
|
assert all(
|
||||||
|
result.text is not None for result in results
|
||||||
|
), "All results should have text"
|
||||||
|
|
||||||
|
# Scores should be in descending order (higher similarity first)
|
||||||
|
scores = [result.score for result in results]
|
||||||
|
assert scores == sorted(
|
||||||
|
scores, reverse=True
|
||||||
|
), "Results should be sorted by score descending"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("CI") == "true",
|
||||||
|
reason="Skip DiskANN partition tests in CI - requires specific hardware and large memory",
|
||||||
|
)
|
||||||
|
def test_diskann_medoid_and_norm_files():
|
||||||
|
"""Test that medoid and max_base_norm files are correctly generated and used."""
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from leann.api import LeannBuilder, LeannSearcher
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
index_path = str(Path(temp_dir) / "test_medoid_norm.leann")
|
||||||
|
|
||||||
|
# Small but sufficient dataset
|
||||||
|
texts = [f"Test document {i} with content about subject {i % 10}." for i in range(200)]
|
||||||
|
|
||||||
|
builder = LeannBuilder(
|
||||||
|
backend_name="diskann",
|
||||||
|
embedding_model="facebook/contriever",
|
||||||
|
embedding_mode="sentence-transformers",
|
||||||
|
is_recompute=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for text in texts:
|
||||||
|
builder.add_text(text)
|
||||||
|
|
||||||
|
builder.build_index(index_path)
|
||||||
|
|
||||||
|
index_dir = Path(index_path).parent
|
||||||
|
index_prefix = Path(index_path).stem
|
||||||
|
|
||||||
|
# Test medoids file
|
||||||
|
medoids_file = index_dir / f"{index_prefix}_disk.index_medoids.bin"
|
||||||
|
assert medoids_file.exists(), "Medoids file should be generated"
|
||||||
|
|
||||||
|
# Read and validate medoids file format
|
||||||
|
with open(medoids_file, "rb") as f:
|
||||||
|
nshards = struct.unpack("<I", f.read(4))[0]
|
||||||
|
one_val = struct.unpack("<I", f.read(4))[0]
|
||||||
|
medoid_id = struct.unpack("<I", f.read(4))[0]
|
||||||
|
|
||||||
|
assert nshards == 1, "Single-shot build should have 1 shard"
|
||||||
|
assert one_val == 1, "Expected value should be 1"
|
||||||
|
assert medoid_id >= 0, "Medoid ID should be valid (not hardcoded 0)"
|
||||||
|
|
||||||
|
# Test max_base_norm file
|
||||||
|
norm_file = index_dir / f"{index_prefix}_disk.index_max_base_norm.bin"
|
||||||
|
assert norm_file.exists(), "Max base norm file should be generated"
|
||||||
|
|
||||||
|
# Read and validate norm file
|
||||||
|
with open(norm_file, "rb") as f:
|
||||||
|
npts = struct.unpack("<I", f.read(4))[0]
|
||||||
|
ndims = struct.unpack("<I", f.read(4))[0]
|
||||||
|
norm_val = struct.unpack("<f", f.read(4))[0]
|
||||||
|
|
||||||
|
assert npts == 1, "Should have 1 norm point"
|
||||||
|
assert ndims == 1, "Should have 1 dimension"
|
||||||
|
assert norm_val > 0, "Norm value should be positive"
|
||||||
|
assert norm_val != float("inf"), "Norm value should be finite"
|
||||||
|
|
||||||
|
# Test that search works with these files
|
||||||
|
searcher = LeannSearcher(index_path)
|
||||||
|
results = searcher.search("test subject", top_k=3)
|
||||||
|
|
||||||
|
# Verify that scores are not -inf (which indicates norm file was loaded correctly)
|
||||||
|
assert len(results) > 0
|
||||||
|
assert all(
|
||||||
|
result.score != float("-inf") for result in results
|
||||||
|
), "Scores should not be -inf when norm file is correct"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("CI") == "true",
|
||||||
|
reason="Skip performance comparison in CI - requires significant compute time",
|
||||||
|
)
|
||||||
|
def test_diskann_vs_hnsw_performance():
|
||||||
|
"""Compare DiskANN (with partition) vs HNSW performance."""
|
||||||
|
import time
|
||||||
|
|
||||||
|
from leann.api import LeannBuilder, LeannSearcher
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
# Test data
|
||||||
|
texts = [
|
||||||
|
f"Performance test document {i} covering topic {i % 20} in detail." for i in range(1000)
|
||||||
|
]
|
||||||
|
query = "performance topic test"
|
||||||
|
|
||||||
|
# Test DiskANN with partitioning
|
||||||
|
diskann_path = str(Path(temp_dir) / "perf_diskann.leann")
|
||||||
|
diskann_builder = LeannBuilder(
|
||||||
|
backend_name="diskann",
|
||||||
|
embedding_model="facebook/contriever",
|
||||||
|
embedding_mode="sentence-transformers",
|
||||||
|
is_recompute=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for text in texts:
|
||||||
|
diskann_builder.add_text(text)
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
diskann_builder.build_index(diskann_path)
|
||||||
|
|
||||||
|
# Test HNSW
|
||||||
|
hnsw_path = str(Path(temp_dir) / "perf_hnsw.leann")
|
||||||
|
hnsw_builder = LeannBuilder(
|
||||||
|
backend_name="hnsw",
|
||||||
|
embedding_model="facebook/contriever",
|
||||||
|
embedding_mode="sentence-transformers",
|
||||||
|
is_recompute=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for text in texts:
|
||||||
|
hnsw_builder.add_text(text)
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
hnsw_builder.build_index(hnsw_path)
|
||||||
|
|
||||||
|
# Compare search performance
|
||||||
|
diskann_searcher = LeannSearcher(diskann_path)
|
||||||
|
hnsw_searcher = LeannSearcher(hnsw_path)
|
||||||
|
|
||||||
|
# Warm up searches
|
||||||
|
diskann_searcher.search(query, top_k=5)
|
||||||
|
hnsw_searcher.search(query, top_k=5)
|
||||||
|
|
||||||
|
# Timed searches
|
||||||
|
start_time = time.time()
|
||||||
|
diskann_results = diskann_searcher.search(query, top_k=10)
|
||||||
|
diskann_search_time = time.time() - start_time
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
hnsw_results = hnsw_searcher.search(query, top_k=10)
|
||||||
|
hnsw_search_time = time.time() - start_time
|
||||||
|
|
||||||
|
# Basic assertions
|
||||||
|
assert len(diskann_results) == 10
|
||||||
|
assert len(hnsw_results) == 10
|
||||||
|
assert all(r.score != float("-inf") for r in diskann_results)
|
||||||
|
assert all(r.score != float("-inf") for r in hnsw_results)
|
||||||
|
|
||||||
|
# Performance ratio (informational)
|
||||||
|
if hnsw_search_time > 0:
|
||||||
|
speed_ratio = hnsw_search_time / diskann_search_time
|
||||||
|
print(f"DiskANN search time: {diskann_search_time:.4f}s")
|
||||||
|
print(f"HNSW search time: {hnsw_search_time:.4f}s")
|
||||||
|
print(f"DiskANN is {speed_ratio:.2f}x faster than HNSW")
|
||||||
@@ -10,8 +10,9 @@ from pathlib import Path
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
def test_readme_basic_example():
|
@pytest.mark.parametrize("backend_name", ["hnsw", "diskann"])
|
||||||
"""Test the basic example from README.md."""
|
def test_readme_basic_example(backend_name):
|
||||||
|
"""Test the basic example from README.md with both backends."""
|
||||||
# Skip on macOS CI due to MPS environment issues with all-MiniLM-L6-v2
|
# Skip on macOS CI due to MPS environment issues with all-MiniLM-L6-v2
|
||||||
if os.environ.get("CI") == "true" and platform.system() == "Darwin":
|
if os.environ.get("CI") == "true" and platform.system() == "Darwin":
|
||||||
pytest.skip("Skipping on macOS CI due to MPS environment issues with all-MiniLM-L6-v2")
|
pytest.skip("Skipping on macOS CI due to MPS environment issues with all-MiniLM-L6-v2")
|
||||||
@@ -21,18 +22,18 @@ def test_readme_basic_example():
|
|||||||
from leann.api import SearchResult
|
from leann.api import SearchResult
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
INDEX_PATH = str(Path(temp_dir) / "demo.leann")
|
INDEX_PATH = str(Path(temp_dir) / f"demo_{backend_name}.leann")
|
||||||
|
|
||||||
# Build an index
|
# Build an index
|
||||||
# In CI, use a smaller model to avoid memory issues
|
# In CI, use a smaller model to avoid memory issues
|
||||||
if os.environ.get("CI") == "true":
|
if os.environ.get("CI") == "true":
|
||||||
builder = LeannBuilder(
|
builder = LeannBuilder(
|
||||||
backend_name="hnsw",
|
backend_name=backend_name,
|
||||||
embedding_model="sentence-transformers/all-MiniLM-L6-v2", # Smaller model
|
embedding_model="sentence-transformers/all-MiniLM-L6-v2", # Smaller model
|
||||||
dimensions=384, # Smaller dimensions
|
dimensions=384, # Smaller dimensions
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
builder = LeannBuilder(backend_name="hnsw")
|
builder = LeannBuilder(backend_name=backend_name)
|
||||||
builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
|
builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
|
||||||
builder.add_text("Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back")
|
builder.add_text("Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back")
|
||||||
builder.build_index(INDEX_PATH)
|
builder.build_index(INDEX_PATH)
|
||||||
@@ -52,6 +53,9 @@ def test_readme_basic_example():
|
|||||||
# Verify search results
|
# Verify search results
|
||||||
assert len(results) > 0
|
assert len(results) > 0
|
||||||
assert isinstance(results[0], SearchResult)
|
assert isinstance(results[0], SearchResult)
|
||||||
|
assert results[0].score != float(
|
||||||
|
"-inf"
|
||||||
|
), f"should return valid scores, got {results[0].score}"
|
||||||
# The second text about banana-crocodile should be more relevant
|
# The second text about banana-crocodile should be more relevant
|
||||||
assert "banana" in results[0].text or "crocodile" in results[0].text
|
assert "banana" in results[0].text or "crocodile" in results[0].text
|
||||||
|
|
||||||
@@ -110,26 +114,31 @@ def test_backend_options():
|
|||||||
assert len(list(Path(diskann_path).parent.glob(f"{Path(diskann_path).stem}.*"))) > 0
|
assert len(list(Path(diskann_path).parent.glob(f"{Path(diskann_path).stem}.*"))) > 0
|
||||||
|
|
||||||
|
|
||||||
def test_llm_config_simulated():
|
@pytest.mark.parametrize("backend_name", ["hnsw", "diskann"])
|
||||||
"""Test simulated LLM configuration option."""
|
def test_llm_config_simulated(backend_name):
|
||||||
|
"""Test simulated LLM configuration option with both backends."""
|
||||||
# Skip on macOS CI due to MPS environment issues with all-MiniLM-L6-v2
|
# Skip on macOS CI due to MPS environment issues with all-MiniLM-L6-v2
|
||||||
if os.environ.get("CI") == "true" and platform.system() == "Darwin":
|
if os.environ.get("CI") == "true" and platform.system() == "Darwin":
|
||||||
pytest.skip("Skipping on macOS CI due to MPS environment issues with all-MiniLM-L6-v2")
|
pytest.skip("Skipping on macOS CI due to MPS environment issues with all-MiniLM-L6-v2")
|
||||||
|
|
||||||
|
# Skip DiskANN tests in CI due to hardware requirements
|
||||||
|
if os.environ.get("CI") == "true" and backend_name == "diskann":
|
||||||
|
pytest.skip("Skip DiskANN tests in CI - requires specific hardware and large memory")
|
||||||
|
|
||||||
from leann import LeannBuilder, LeannChat
|
from leann import LeannBuilder, LeannChat
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
# Build a simple index
|
# Build a simple index
|
||||||
index_path = str(Path(temp_dir) / "test.leann")
|
index_path = str(Path(temp_dir) / f"test_{backend_name}.leann")
|
||||||
# Use smaller model in CI to avoid memory issues
|
# Use smaller model in CI to avoid memory issues
|
||||||
if os.environ.get("CI") == "true":
|
if os.environ.get("CI") == "true":
|
||||||
builder = LeannBuilder(
|
builder = LeannBuilder(
|
||||||
backend_name="hnsw",
|
backend_name=backend_name,
|
||||||
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
||||||
dimensions=384,
|
dimensions=384,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
builder = LeannBuilder(backend_name="hnsw")
|
builder = LeannBuilder(backend_name=backend_name)
|
||||||
builder.add_text("Test document for LLM testing")
|
builder.add_text("Test document for LLM testing")
|
||||||
builder.build_index(index_path)
|
builder.build_index(index_path)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user