* Add ty type checker to CI and fix type errors - Add ty (Astral's fast Python type checker) to GitHub CI workflow - Fix type annotations across all RAG apps: - Update load_data return types from list[str] to list[dict[str, Any]] - Fix base_rag_example.py to properly handle dict format from create_text_chunks - Fix type errors in leann-core: - chunking_utils.py: Add explicit type annotations - cli.py: Fix return type annotations for PDF extraction functions - interactive_utils.py: Fix readline import type handling - Fix type errors in apps: - wechat_history.py: Fix return type annotations - document_rag.py, code_rag.py: Replace **kwargs with explicit arguments - Add ty configuration to pyproject.toml This resolves the bug introduced in PR #157 where create_text_chunks() changed to return list[dict] but callers were not updated. * Fix remaining ty type errors - Fix slack_mcp_reader.py channel parameter can be None - Fix embedding_compute.py ContextProp type issue - Fix searcher_base.py method override signatures - Fix chunking_utils.py chunk_text assignment - Fix slack_rag.py and twitter_rag.py return types - Fix email.py and image_rag.py method overrides * Fix multimodal benchmark scripts type errors - Fix undefined LeannRetriever -> LeannMultiVector - Add proper type casts for HuggingFace Dataset iteration - Cast task config values to correct types - Add type annotations for dataset row dicts * Enable ty check for multimodal scripts in CI All type errors in multimodal scripts have been fixed, so we can now include them in the CI type checking. * Fix all test type errors and enable ty check on tests - Fix test_basic.py: search() takes str not list - Fix test_cli_prompt_template.py: add type: ignore for Mock assignments - Fix test_prompt_template_persistence.py: match BaseSearcher.search signature - Fix test_prompt_template_e2e.py: add type narrowing asserts after skip - Fix test_readme_examples.py: use explicit kwargs instead of **model_args - Fix metadata_filter.py: allow Optional[MetadataFilters] - Update CI to run ty check on tests * Format code with ruff * Format searcher_base.py
This commit is contained in:
@@ -97,17 +97,17 @@ def test_backend_options():
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Use smaller model in CI to avoid memory issues
|
||||
if os.environ.get("CI") == "true":
|
||||
model_args = {
|
||||
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"dimensions": 384,
|
||||
}
|
||||
else:
|
||||
model_args = {}
|
||||
is_ci = os.environ.get("CI") == "true"
|
||||
embedding_model = (
|
||||
"sentence-transformers/all-MiniLM-L6-v2" if is_ci else "facebook/contriever"
|
||||
)
|
||||
dimensions = 384 if is_ci else None
|
||||
|
||||
# Test HNSW backend (as shown in README)
|
||||
hnsw_path = str(Path(temp_dir) / "test_hnsw.leann")
|
||||
builder_hnsw = LeannBuilder(backend_name="hnsw", **model_args)
|
||||
builder_hnsw = LeannBuilder(
|
||||
backend_name="hnsw", embedding_model=embedding_model, dimensions=dimensions
|
||||
)
|
||||
builder_hnsw.add_text("Test document for HNSW backend")
|
||||
builder_hnsw.build_index(hnsw_path)
|
||||
assert Path(hnsw_path).parent.exists()
|
||||
@@ -115,7 +115,9 @@ def test_backend_options():
|
||||
|
||||
# Test DiskANN backend (mentioned as available option)
|
||||
diskann_path = str(Path(temp_dir) / "test_diskann.leann")
|
||||
builder_diskann = LeannBuilder(backend_name="diskann", **model_args)
|
||||
builder_diskann = LeannBuilder(
|
||||
backend_name="diskann", embedding_model=embedding_model, dimensions=dimensions
|
||||
)
|
||||
builder_diskann.add_text("Test document for DiskANN backend")
|
||||
builder_diskann.build_index(diskann_path)
|
||||
assert Path(diskann_path).parent.exists()
|
||||
|
||||
Reference in New Issue
Block a user