diff --git a/apps/email_data/email.py b/apps/email_data/email.py index cad4062..0bb003a 100644 --- a/apps/email_data/email.py +++ b/apps/email_data/email.py @@ -127,11 +127,12 @@ class EmlxMboxReader(MboxReader): def load_data( self, - directory: Path, + file: Path, # Note: for EmlxMboxReader, this is actually a directory extra_info: dict | None = None, fs: AbstractFileSystem | None = None, ) -> list[Document]: """Parse .emlx files from directory into strings using MboxReader logic.""" + directory = file # Rename for clarity - this is a directory of .emlx files import os import tempfile diff --git a/apps/image_rag.py b/apps/image_rag.py index 2a1d110..8dcd62b 100644 --- a/apps/image_rag.py +++ b/apps/image_rag.py @@ -169,7 +169,7 @@ class ImageRAG(BaseRAGExample): print(f"✅ Processed {len(image_data)} images") return image_data - async def build_index(self, args, texts: list[str]) -> str: + async def build_index(self, args, texts: list[dict[str, Any]]) -> str: """Build index using pre-computed CLIP embeddings.""" from leann.api import LeannBuilder diff --git a/apps/slack_data/slack_mcp_reader.py b/apps/slack_data/slack_mcp_reader.py index 8f24e1d..f1aaf41 100644 --- a/apps/slack_data/slack_mcp_reader.py +++ b/apps/slack_data/slack_mcp_reader.py @@ -177,7 +177,9 @@ class SlackMCPReader: break # If we get here, all retries failed or it's not a retryable error - raise last_exception + if last_exception is not None: + raise last_exception + raise RuntimeError("Unexpected error: no exception captured during retry loop") async def fetch_slack_messages( self, channel: Optional[str] = None, limit: int = 100 @@ -267,7 +269,10 @@ class SlackMCPReader: messages = json.loads(content["text"]) except json.JSONDecodeError: # If not JSON, try to parse as CSV format (Slack MCP server format) - messages = self._parse_csv_messages(content["text"], channel) + text_content = content.get("text", "") + messages = self._parse_csv_messages( + text_content if text_content else "", channel or "unknown" + ) else: messages = result["content"] else: diff --git a/apps/slack_rag.py b/apps/slack_rag.py index cf29aa6..8980457 100644 --- a/apps/slack_rag.py +++ b/apps/slack_rag.py @@ -189,7 +189,8 @@ class SlackMCPRAG(BaseRAGExample): print(sample_text) print("-" * 40) - return texts + # Convert strings to dict format expected by base class + return [{"text": text, "metadata": {"source": "slack"}} for text in texts] except Exception as e: print(f"Error loading Slack data: {e}") diff --git a/apps/twitter_rag.py b/apps/twitter_rag.py index 15abf24..5446a5a 100644 --- a/apps/twitter_rag.py +++ b/apps/twitter_rag.py @@ -157,7 +157,8 @@ class TwitterMCPRAG(BaseRAGExample): print(sample_text) print("-" * 50) - return texts + # Convert strings to dict format expected by base class + return [{"text": text, "metadata": {"source": "twitter"}} for text in texts] except Exception as e: print(f"❌ Error loading Twitter bookmarks: {e}") diff --git a/packages/leann-core/src/leann/chunking_utils.py b/packages/leann-core/src/leann/chunking_utils.py index e7f0a39..aae8761 100644 --- a/packages/leann-core/src/leann/chunking_utils.py +++ b/packages/leann-core/src/leann/chunking_utils.py @@ -243,7 +243,7 @@ def create_ast_chunks( astchunk_metadata: dict[str, Any] = {} if hasattr(chunk, "text"): - chunk_text = chunk.text + chunk_text = str(chunk.text) if chunk.text else None elif isinstance(chunk, str): chunk_text = chunk elif isinstance(chunk, dict): diff --git a/packages/leann-core/src/leann/embedding_compute.py b/packages/leann-core/src/leann/embedding_compute.py index 093a710..eb2a1be 100644 --- a/packages/leann-core/src/leann/embedding_compute.py +++ b/packages/leann-core/src/leann/embedding_compute.py @@ -451,7 +451,8 @@ def compute_embeddings_sentence_transformers( # TODO: Haven't tested this yet torch.set_num_threads(min(8, os.cpu_count() or 4)) try: - torch.backends.mkldnn.enabled = True + # PyTorch's ContextProp type is complex; cast for type checker + torch.backends.mkldnn.enabled = True # type: ignore[assignment] except AttributeError: pass diff --git a/packages/leann-core/src/leann/searcher_base.py b/packages/leann-core/src/leann/searcher_base.py index f8ab71c..8d26f05 100644 --- a/packages/leann-core/src/leann/searcher_base.py +++ b/packages/leann-core/src/leann/searcher_base.py @@ -56,7 +56,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC): with open(meta_path, encoding="utf-8") as f: return json.load(f) - def _ensure_server_running(self, passages_source_file: str, port: int, **kwargs) -> int: + def _ensure_server_running(self, passages_source_file: str, port: Optional[int], **kwargs) -> int: """ Ensures the embedding server is running if recompute is needed. This is a helper for subclasses. @@ -81,7 +81,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC): } server_started, actual_port = self.embedding_server_manager.start_server( - port=port, + port=port if port is not None else 5557, model_name=self.embedding_model, embedding_mode=self.embedding_mode, passages_file=passages_source_file, @@ -98,7 +98,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC): self, query: str, use_server_if_available: bool = True, - zmq_port: int = 5557, + zmq_port: Optional[int] = None, query_template: Optional[str] = None, ) -> np.ndarray: """