Fix remaining ty type errors

- Fix slack_mcp_reader.py channel parameter can be None
- Fix embedding_compute.py ContextProp type issue
- Fix searcher_base.py method override signatures
- Fix chunking_utils.py chunk_text assignment
- Fix slack_rag.py and twitter_rag.py return types
- Fix email.py and image_rag.py method overrides
This commit is contained in:
Andy Lee
2025-12-23 09:11:09 +00:00
parent d83a463c26
commit de56ab8fa7
8 changed files with 20 additions and 11 deletions

View File

@@ -127,11 +127,12 @@ class EmlxMboxReader(MboxReader):
def load_data( def load_data(
self, self,
directory: Path, file: Path, # Note: for EmlxMboxReader, this is actually a directory
extra_info: dict | None = None, extra_info: dict | None = None,
fs: AbstractFileSystem | None = None, fs: AbstractFileSystem | None = None,
) -> list[Document]: ) -> list[Document]:
"""Parse .emlx files from directory into strings using MboxReader logic.""" """Parse .emlx files from directory into strings using MboxReader logic."""
directory = file # Rename for clarity - this is a directory of .emlx files
import os import os
import tempfile import tempfile

View File

@@ -169,7 +169,7 @@ class ImageRAG(BaseRAGExample):
print(f"✅ Processed {len(image_data)} images") print(f"✅ Processed {len(image_data)} images")
return image_data return image_data
async def build_index(self, args, texts: list[str]) -> str: async def build_index(self, args, texts: list[dict[str, Any]]) -> str:
"""Build index using pre-computed CLIP embeddings.""" """Build index using pre-computed CLIP embeddings."""
from leann.api import LeannBuilder from leann.api import LeannBuilder

View File

@@ -177,7 +177,9 @@ class SlackMCPReader:
break break
# If we get here, all retries failed or it's not a retryable error # If we get here, all retries failed or it's not a retryable error
raise last_exception if last_exception is not None:
raise last_exception
raise RuntimeError("Unexpected error: no exception captured during retry loop")
async def fetch_slack_messages( async def fetch_slack_messages(
self, channel: Optional[str] = None, limit: int = 100 self, channel: Optional[str] = None, limit: int = 100
@@ -267,7 +269,10 @@ class SlackMCPReader:
messages = json.loads(content["text"]) messages = json.loads(content["text"])
except json.JSONDecodeError: except json.JSONDecodeError:
# If not JSON, try to parse as CSV format (Slack MCP server format) # If not JSON, try to parse as CSV format (Slack MCP server format)
messages = self._parse_csv_messages(content["text"], channel) text_content = content.get("text", "")
messages = self._parse_csv_messages(
text_content if text_content else "", channel or "unknown"
)
else: else:
messages = result["content"] messages = result["content"]
else: else:

View File

@@ -189,7 +189,8 @@ class SlackMCPRAG(BaseRAGExample):
print(sample_text) print(sample_text)
print("-" * 40) print("-" * 40)
return texts # Convert strings to dict format expected by base class
return [{"text": text, "metadata": {"source": "slack"}} for text in texts]
except Exception as e: except Exception as e:
print(f"Error loading Slack data: {e}") print(f"Error loading Slack data: {e}")

View File

@@ -157,7 +157,8 @@ class TwitterMCPRAG(BaseRAGExample):
print(sample_text) print(sample_text)
print("-" * 50) print("-" * 50)
return texts # Convert strings to dict format expected by base class
return [{"text": text, "metadata": {"source": "twitter"}} for text in texts]
except Exception as e: except Exception as e:
print(f"❌ Error loading Twitter bookmarks: {e}") print(f"❌ Error loading Twitter bookmarks: {e}")

View File

@@ -243,7 +243,7 @@ def create_ast_chunks(
astchunk_metadata: dict[str, Any] = {} astchunk_metadata: dict[str, Any] = {}
if hasattr(chunk, "text"): if hasattr(chunk, "text"):
chunk_text = chunk.text chunk_text = str(chunk.text) if chunk.text else None
elif isinstance(chunk, str): elif isinstance(chunk, str):
chunk_text = chunk chunk_text = chunk
elif isinstance(chunk, dict): elif isinstance(chunk, dict):

View File

@@ -451,7 +451,8 @@ def compute_embeddings_sentence_transformers(
# TODO: Haven't tested this yet # TODO: Haven't tested this yet
torch.set_num_threads(min(8, os.cpu_count() or 4)) torch.set_num_threads(min(8, os.cpu_count() or 4))
try: try:
torch.backends.mkldnn.enabled = True # PyTorch's ContextProp type is complex; cast for type checker
torch.backends.mkldnn.enabled = True # type: ignore[assignment]
except AttributeError: except AttributeError:
pass pass

View File

@@ -56,7 +56,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
with open(meta_path, encoding="utf-8") as f: with open(meta_path, encoding="utf-8") as f:
return json.load(f) return json.load(f)
def _ensure_server_running(self, passages_source_file: str, port: int, **kwargs) -> int: def _ensure_server_running(self, passages_source_file: str, port: Optional[int], **kwargs) -> int:
""" """
Ensures the embedding server is running if recompute is needed. Ensures the embedding server is running if recompute is needed.
This is a helper for subclasses. This is a helper for subclasses.
@@ -81,7 +81,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
} }
server_started, actual_port = self.embedding_server_manager.start_server( server_started, actual_port = self.embedding_server_manager.start_server(
port=port, port=port if port is not None else 5557,
model_name=self.embedding_model, model_name=self.embedding_model,
embedding_mode=self.embedding_mode, embedding_mode=self.embedding_mode,
passages_file=passages_source_file, passages_file=passages_source_file,
@@ -98,7 +98,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
self, self,
query: str, query: str,
use_server_if_available: bool = True, use_server_if_available: bool = True,
zmq_port: int = 5557, zmq_port: Optional[int] = None,
query_template: Optional[str] = None, query_template: Optional[str] = None,
) -> np.ndarray: ) -> np.ndarray:
""" """