Fix remaining ty type errors

- Fix slack_mcp_reader.py channel parameter can be None - Fix embedding_compute.py ContextProp type issue - Fix searcher_base.py method override signatures - Fix chunking_utils.py chunk_text assignment - Fix slack_rag.py and twitter_rag.py return types - Fix email.py and image_rag.py method overrides
2025-12-23 09:11:09 +00:00
parent d83a463c26
commit de56ab8fa7
8 changed files with 20 additions and 11 deletions
--- a/apps/email_data/email.py
+++ b/apps/email_data/email.py
@@ -127,11 +127,12 @@ class EmlxMboxReader(MboxReader):
    def load_data(
        self,
-        directory: Path,
+        file: Path,  # Note: for EmlxMboxReader, this is actually a directory
        extra_info: dict | None = None,
        fs: AbstractFileSystem | None = None,
    ) -> list[Document]:
        """Parse .emlx files from directory into strings using MboxReader logic."""
        directory = file  # Rename for clarity - this is a directory of .emlx files
        import os
        import tempfile
--- a/apps/image_rag.py
+++ b/apps/image_rag.py
@@ -169,7 +169,7 @@ class ImageRAG(BaseRAGExample):
        print(f"✅ Processed {len(image_data)} images")
        return image_data
-    async def build_index(self, args, texts: list[str]) -> str:
+    async def build_index(self, args, texts: list[dict[str, Any]]) -> str:
        """Build index using pre-computed CLIP embeddings."""
        from leann.api import LeannBuilder
--- a/apps/slack_data/slack_mcp_reader.py
+++ b/apps/slack_data/slack_mcp_reader.py
@@ -177,7 +177,9 @@ class SlackMCPReader:
                    break
        # If we get here, all retries failed or it's not a retryable error
-        raise last_exception
+        if last_exception is not None:
            raise last_exception
        raise RuntimeError("Unexpected error: no exception captured during retry loop")
    async def fetch_slack_messages(
        self, channel: Optional[str] = None, limit: int = 100
@@ -267,7 +269,10 @@ class SlackMCPReader:
                    messages = json.loads(content["text"])
                except json.JSONDecodeError:
                    # If not JSON, try to parse as CSV format (Slack MCP server format)
-                    messages = self._parse_csv_messages(content["text"], channel)
+                    text_content = content.get("text", "")
                    messages = self._parse_csv_messages(
                        text_content if text_content else "", channel or "unknown"
                    )
            else:
                messages = result["content"]
        else:
--- a/apps/slack_rag.py
+++ b/apps/slack_rag.py
@@ -189,7 +189,8 @@ class SlackMCPRAG(BaseRAGExample):
                print(sample_text)
                print("-" * 40)
-            return texts
+            # Convert strings to dict format expected by base class
            return [{"text": text, "metadata": {"source": "slack"}} for text in texts]
        except Exception as e:
            print(f"Error loading Slack data: {e}")
--- a/apps/twitter_rag.py
+++ b/apps/twitter_rag.py
@@ -157,7 +157,8 @@ class TwitterMCPRAG(BaseRAGExample):
                print(sample_text)
                print("-" * 50)
-            return texts
+            # Convert strings to dict format expected by base class
            return [{"text": text, "metadata": {"source": "twitter"}} for text in texts]
        except Exception as e:
            print(f"❌ Error loading Twitter bookmarks: {e}")
--- a/packages/leann-core/src/leann/chunking_utils.py
+++ b/packages/leann-core/src/leann/chunking_utils.py
@@ -243,7 +243,7 @@ def create_ast_chunks(
                astchunk_metadata: dict[str, Any] = {}
                if hasattr(chunk, "text"):
-                    chunk_text = chunk.text
+                    chunk_text = str(chunk.text) if chunk.text else None
                elif isinstance(chunk, str):
                    chunk_text = chunk
                elif isinstance(chunk, dict):
--- a/packages/leann-core/src/leann/embedding_compute.py
+++ b/packages/leann-core/src/leann/embedding_compute.py
@@ -451,7 +451,8 @@ def compute_embeddings_sentence_transformers(
            # TODO: Haven't tested this yet
            torch.set_num_threads(min(8, os.cpu_count() or 4))
            try:
-                torch.backends.mkldnn.enabled = True
+                # PyTorch's ContextProp type is complex; cast for type checker
                torch.backends.mkldnn.enabled = True  # type: ignore[assignment]
            except AttributeError:
                pass
--- a/packages/leann-core/src/leann/searcher_base.py
+++ b/packages/leann-core/src/leann/searcher_base.py
@@ -56,7 +56,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        with open(meta_path, encoding="utf-8") as f:
            return json.load(f)
-    def _ensure_server_running(self, passages_source_file: str, port: int, **kwargs) -> int:
+    def _ensure_server_running(self, passages_source_file: str, port: Optional[int], **kwargs) -> int:
        """
        Ensures the embedding server is running if recompute is needed.
        This is a helper for subclasses.
@@ -81,7 +81,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        }
        server_started, actual_port = self.embedding_server_manager.start_server(
-            port=port,
+            port=port if port is not None else 5557,
            model_name=self.embedding_model,
            embedding_mode=self.embedding_mode,
            passages_file=passages_source_file,
@@ -98,7 +98,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
        self,
        query: str,
        use_server_if_available: bool = True,
-        zmq_port: int = 5557,
+        zmq_port: Optional[int] = None,
        query_template: Optional[str] = None,
    ) -> np.ndarray:
        """