Fix remaining ty type errors
- Fix slack_mcp_reader.py channel parameter can be None - Fix embedding_compute.py ContextProp type issue - Fix searcher_base.py method override signatures - Fix chunking_utils.py chunk_text assignment - Fix slack_rag.py and twitter_rag.py return types - Fix email.py and image_rag.py method overrides
This commit is contained in:
@@ -127,11 +127,12 @@ class EmlxMboxReader(MboxReader):
|
|||||||
|
|
||||||
def load_data(
|
def load_data(
|
||||||
self,
|
self,
|
||||||
directory: Path,
|
file: Path, # Note: for EmlxMboxReader, this is actually a directory
|
||||||
extra_info: dict | None = None,
|
extra_info: dict | None = None,
|
||||||
fs: AbstractFileSystem | None = None,
|
fs: AbstractFileSystem | None = None,
|
||||||
) -> list[Document]:
|
) -> list[Document]:
|
||||||
"""Parse .emlx files from directory into strings using MboxReader logic."""
|
"""Parse .emlx files from directory into strings using MboxReader logic."""
|
||||||
|
directory = file # Rename for clarity - this is a directory of .emlx files
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
|||||||
@@ -169,7 +169,7 @@ class ImageRAG(BaseRAGExample):
|
|||||||
print(f"✅ Processed {len(image_data)} images")
|
print(f"✅ Processed {len(image_data)} images")
|
||||||
return image_data
|
return image_data
|
||||||
|
|
||||||
async def build_index(self, args, texts: list[str]) -> str:
|
async def build_index(self, args, texts: list[dict[str, Any]]) -> str:
|
||||||
"""Build index using pre-computed CLIP embeddings."""
|
"""Build index using pre-computed CLIP embeddings."""
|
||||||
from leann.api import LeannBuilder
|
from leann.api import LeannBuilder
|
||||||
|
|
||||||
|
|||||||
@@ -177,7 +177,9 @@ class SlackMCPReader:
|
|||||||
break
|
break
|
||||||
|
|
||||||
# If we get here, all retries failed or it's not a retryable error
|
# If we get here, all retries failed or it's not a retryable error
|
||||||
raise last_exception
|
if last_exception is not None:
|
||||||
|
raise last_exception
|
||||||
|
raise RuntimeError("Unexpected error: no exception captured during retry loop")
|
||||||
|
|
||||||
async def fetch_slack_messages(
|
async def fetch_slack_messages(
|
||||||
self, channel: Optional[str] = None, limit: int = 100
|
self, channel: Optional[str] = None, limit: int = 100
|
||||||
@@ -267,7 +269,10 @@ class SlackMCPReader:
|
|||||||
messages = json.loads(content["text"])
|
messages = json.loads(content["text"])
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# If not JSON, try to parse as CSV format (Slack MCP server format)
|
# If not JSON, try to parse as CSV format (Slack MCP server format)
|
||||||
messages = self._parse_csv_messages(content["text"], channel)
|
text_content = content.get("text", "")
|
||||||
|
messages = self._parse_csv_messages(
|
||||||
|
text_content if text_content else "", channel or "unknown"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
messages = result["content"]
|
messages = result["content"]
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -189,7 +189,8 @@ class SlackMCPRAG(BaseRAGExample):
|
|||||||
print(sample_text)
|
print(sample_text)
|
||||||
print("-" * 40)
|
print("-" * 40)
|
||||||
|
|
||||||
return texts
|
# Convert strings to dict format expected by base class
|
||||||
|
return [{"text": text, "metadata": {"source": "slack"}} for text in texts]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error loading Slack data: {e}")
|
print(f"Error loading Slack data: {e}")
|
||||||
|
|||||||
@@ -157,7 +157,8 @@ class TwitterMCPRAG(BaseRAGExample):
|
|||||||
print(sample_text)
|
print(sample_text)
|
||||||
print("-" * 50)
|
print("-" * 50)
|
||||||
|
|
||||||
return texts
|
# Convert strings to dict format expected by base class
|
||||||
|
return [{"text": text, "metadata": {"source": "twitter"}} for text in texts]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Error loading Twitter bookmarks: {e}")
|
print(f"❌ Error loading Twitter bookmarks: {e}")
|
||||||
|
|||||||
@@ -243,7 +243,7 @@ def create_ast_chunks(
|
|||||||
astchunk_metadata: dict[str, Any] = {}
|
astchunk_metadata: dict[str, Any] = {}
|
||||||
|
|
||||||
if hasattr(chunk, "text"):
|
if hasattr(chunk, "text"):
|
||||||
chunk_text = chunk.text
|
chunk_text = str(chunk.text) if chunk.text else None
|
||||||
elif isinstance(chunk, str):
|
elif isinstance(chunk, str):
|
||||||
chunk_text = chunk
|
chunk_text = chunk
|
||||||
elif isinstance(chunk, dict):
|
elif isinstance(chunk, dict):
|
||||||
|
|||||||
@@ -451,7 +451,8 @@ def compute_embeddings_sentence_transformers(
|
|||||||
# TODO: Haven't tested this yet
|
# TODO: Haven't tested this yet
|
||||||
torch.set_num_threads(min(8, os.cpu_count() or 4))
|
torch.set_num_threads(min(8, os.cpu_count() or 4))
|
||||||
try:
|
try:
|
||||||
torch.backends.mkldnn.enabled = True
|
# PyTorch's ContextProp type is complex; cast for type checker
|
||||||
|
torch.backends.mkldnn.enabled = True # type: ignore[assignment]
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
|
|||||||
with open(meta_path, encoding="utf-8") as f:
|
with open(meta_path, encoding="utf-8") as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def _ensure_server_running(self, passages_source_file: str, port: int, **kwargs) -> int:
|
def _ensure_server_running(self, passages_source_file: str, port: Optional[int], **kwargs) -> int:
|
||||||
"""
|
"""
|
||||||
Ensures the embedding server is running if recompute is needed.
|
Ensures the embedding server is running if recompute is needed.
|
||||||
This is a helper for subclasses.
|
This is a helper for subclasses.
|
||||||
@@ -81,7 +81,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
|
|||||||
}
|
}
|
||||||
|
|
||||||
server_started, actual_port = self.embedding_server_manager.start_server(
|
server_started, actual_port = self.embedding_server_manager.start_server(
|
||||||
port=port,
|
port=port if port is not None else 5557,
|
||||||
model_name=self.embedding_model,
|
model_name=self.embedding_model,
|
||||||
embedding_mode=self.embedding_mode,
|
embedding_mode=self.embedding_mode,
|
||||||
passages_file=passages_source_file,
|
passages_file=passages_source_file,
|
||||||
@@ -98,7 +98,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
|
|||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
use_server_if_available: bool = True,
|
use_server_if_available: bool = True,
|
||||||
zmq_port: int = 5557,
|
zmq_port: Optional[int] = None,
|
||||||
query_template: Optional[str] = None,
|
query_template: Optional[str] = None,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user