Add ty type checker to CI and fix type errors

- Add ty (Astral's fast Python type checker) to GitHub CI workflow - Fix type annotations across all RAG apps: - Update load_data return types from list[str] to list[dict[str, Any]] - Fix base_rag_example.py to properly handle dict format from create_text_chunks - Fix type errors in leann-core: - chunking_utils.py: Add explicit type annotations - cli.py: Fix return type annotations for PDF extraction functions - interactive_utils.py: Fix readline import type handling - Fix type errors in apps: - wechat_history.py: Fix return type annotations - document_rag.py, code_rag.py: Replace **kwargs with explicit arguments - Add ty configuration to pyproject.toml This resolves the bug introduced in PR #157 where create_text_chunks() changed to return list[dict] but callers were not updated.
2025-12-23 09:04:20 +00:00
parent 8a2ea37871
commit d83a463c26
18 changed files with 83 additions and 47 deletions
--- a/apps/document_rag.py
+++ b/apps/document_rag.py
@@ -5,7 +5,7 @@ Supports PDF, TXT, MD, and other document formats.

 import sys
 from pathlib import Path
-from typing import Any, Union
+from typing import Any

 # Add parent directory to path for imports
 sys.path.insert(0, str(Path(__file__).parent))
@@ -52,7 +52,7 @@ class DocumentRAG(BaseRAGExample):
            help="Enable AST-aware chunking for code files in the data directory",
        )

-    async def load_data(self, args) -> list[Union[str, dict[str, Any]]]:
+    async def load_data(self, args) -> list[dict[str, Any]]:
        """Load documents and convert to text chunks."""
        print(f"Loading documents from: {args.data_dir}")
        if args.file_types:
@@ -66,16 +66,12 @@ class DocumentRAG(BaseRAGExample):
            raise ValueError(f"Data directory not found: {args.data_dir}")

        # Load documents
-        reader_kwargs = {
-            "recursive": True,
-            "encoding": "utf-8",
-        }
-        if args.file_types:
-            reader_kwargs["required_exts"] = args.file_types
-
-        documents = SimpleDirectoryReader(args.data_dir, **reader_kwargs).load_data(
-            show_progress=True
-        )
+        documents = SimpleDirectoryReader(
+            args.data_dir,
+            recursive=True,
+            encoding="utf-8",
+            required_exts=args.file_types if args.file_types else None,
+        ).load_data(show_progress=True)

        if not documents:
            print(f"No documents found in {args.data_dir} with extensions {args.file_types}")