fix readme
This commit is contained in:
@@ -40,8 +40,8 @@ Tests DiskANN graph partitioning functionality:
|
||||
|
||||
### Install test dependencies:
|
||||
```bash
|
||||
# Using extras
|
||||
uv pip install -e ".[test]"
|
||||
# Using uv dependency groups (tools only)
|
||||
uv sync --only-group test
|
||||
```
|
||||
|
||||
### Run all tests:
|
||||
|
||||
14
tests/test_cli_ask.py
Normal file
14
tests/test_cli_ask.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from leann.cli import LeannCLI
|
||||
|
||||
|
||||
def test_cli_ask_accepts_positional_query(tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
cli = LeannCLI()
|
||||
parser = cli.create_parser()
|
||||
|
||||
args = parser.parse_args(["ask", "my-docs", "Where are prompts configured?"])
|
||||
|
||||
assert args.command == "ask"
|
||||
assert args.index_name == "my-docs"
|
||||
assert args.query == "Where are prompts configured?"
|
||||
137
tests/test_embedding_server_manager.py
Normal file
137
tests/test_embedding_server_manager.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from leann.embedding_server_manager import EmbeddingServerManager
|
||||
|
||||
|
||||
class DummyProcess:
|
||||
def __init__(self):
|
||||
self.pid = 12345
|
||||
self._terminated = False
|
||||
|
||||
def poll(self):
|
||||
return 0 if self._terminated else None
|
||||
|
||||
def terminate(self):
|
||||
self._terminated = True
|
||||
|
||||
def kill(self):
|
||||
self._terminated = True
|
||||
|
||||
def wait(self, timeout=None):
|
||||
self._terminated = True
|
||||
return 0
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def embedding_manager(monkeypatch):
|
||||
manager = EmbeddingServerManager("leann_backend_hnsw.hnsw_embedding_server")
|
||||
|
||||
def fake_get_available_port(start_port):
|
||||
return start_port
|
||||
|
||||
monkeypatch.setattr(
|
||||
"leann.embedding_server_manager._get_available_port",
|
||||
fake_get_available_port,
|
||||
)
|
||||
|
||||
start_calls = []
|
||||
|
||||
def fake_start_new_server(self, port, model_name, embedding_mode, **kwargs):
|
||||
config_signature = kwargs.get("config_signature")
|
||||
start_calls.append(config_signature)
|
||||
self.server_process = DummyProcess()
|
||||
self.server_port = port
|
||||
self._server_config = config_signature
|
||||
return True, port
|
||||
|
||||
monkeypatch.setattr(
|
||||
EmbeddingServerManager,
|
||||
"_start_new_server",
|
||||
fake_start_new_server,
|
||||
)
|
||||
|
||||
# Ensure stop_server doesn't try to operate on real subprocesses
|
||||
def fake_stop_server(self):
|
||||
self.server_process = None
|
||||
self.server_port = None
|
||||
self._server_config = None
|
||||
|
||||
monkeypatch.setattr(EmbeddingServerManager, "stop_server", fake_stop_server)
|
||||
|
||||
return manager, start_calls
|
||||
|
||||
|
||||
def _write_meta(meta_path, passages_name, index_name, total):
|
||||
meta_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"backend_name": "hnsw",
|
||||
"embedding_model": "test-model",
|
||||
"embedding_mode": "sentence-transformers",
|
||||
"dimensions": 3,
|
||||
"backend_kwargs": {},
|
||||
"passage_sources": [
|
||||
{
|
||||
"type": "jsonl",
|
||||
"path": passages_name,
|
||||
"index_path": index_name,
|
||||
}
|
||||
],
|
||||
"total_passages": total,
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def test_server_restarts_when_metadata_changes(tmp_path, embedding_manager):
|
||||
manager, start_calls = embedding_manager
|
||||
|
||||
meta_path = tmp_path / "example.meta.json"
|
||||
passages_path = tmp_path / "example.passages.jsonl"
|
||||
index_path = tmp_path / "example.passages.idx"
|
||||
|
||||
passages_path.write_text("first\n", encoding="utf-8")
|
||||
index_path.write_bytes(b"index")
|
||||
_write_meta(meta_path, passages_path.name, index_path.name, total=1)
|
||||
|
||||
# Initial start populates signature
|
||||
ok, port = manager.start_server(
|
||||
port=6000,
|
||||
model_name="test-model",
|
||||
passages_file=str(meta_path),
|
||||
)
|
||||
assert ok
|
||||
assert port == 6000
|
||||
assert len(start_calls) == 1
|
||||
|
||||
initial_signature = start_calls[0]["passages_signature"]
|
||||
|
||||
# No metadata change => reuse existing server
|
||||
ok, port_again = manager.start_server(
|
||||
port=6000,
|
||||
model_name="test-model",
|
||||
passages_file=str(meta_path),
|
||||
)
|
||||
assert ok
|
||||
assert port_again == 6000
|
||||
assert len(start_calls) == 1
|
||||
|
||||
# Modify passage data and metadata to force signature change
|
||||
time.sleep(0.01) # Ensure filesystem timestamps move forward
|
||||
passages_path.write_text("second\n", encoding="utf-8")
|
||||
_write_meta(meta_path, passages_path.name, index_path.name, total=2)
|
||||
|
||||
ok, port_third = manager.start_server(
|
||||
port=6000,
|
||||
model_name="test-model",
|
||||
passages_file=str(meta_path),
|
||||
)
|
||||
assert ok
|
||||
assert port_third == 6000
|
||||
assert len(start_calls) == 2
|
||||
|
||||
updated_signature = start_calls[1]["passages_signature"]
|
||||
assert updated_signature != initial_signature
|
||||
208
tests/test_mcp_integration.py
Normal file
208
tests/test_mcp_integration.py
Normal file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for MCP integration implementations.
|
||||
|
||||
This script tests the basic functionality of the MCP readers and RAG applications
|
||||
without requiring actual MCP servers to be running.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add the parent directory to the path so we can import from apps
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
from apps.slack_data.slack_mcp_reader import SlackMCPReader
|
||||
from apps.slack_rag import SlackMCPRAG
|
||||
from apps.twitter_data.twitter_mcp_reader import TwitterMCPReader
|
||||
from apps.twitter_rag import TwitterMCPRAG
|
||||
|
||||
|
||||
def test_slack_reader_initialization():
|
||||
"""Test that SlackMCPReader can be initialized with various parameters."""
|
||||
print("Testing SlackMCPReader initialization...")
|
||||
|
||||
# Test basic initialization
|
||||
reader = SlackMCPReader("slack-mcp-server")
|
||||
assert reader.mcp_server_command == "slack-mcp-server"
|
||||
assert reader.concatenate_conversations
|
||||
assert reader.max_messages_per_conversation == 100
|
||||
|
||||
# Test with custom parameters
|
||||
reader = SlackMCPReader(
|
||||
"custom-slack-server",
|
||||
workspace_name="test-workspace",
|
||||
concatenate_conversations=False,
|
||||
max_messages_per_conversation=50,
|
||||
)
|
||||
assert reader.workspace_name == "test-workspace"
|
||||
assert not reader.concatenate_conversations
|
||||
assert reader.max_messages_per_conversation == 50
|
||||
|
||||
print("✅ SlackMCPReader initialization tests passed")
|
||||
|
||||
|
||||
def test_twitter_reader_initialization():
|
||||
"""Test that TwitterMCPReader can be initialized with various parameters."""
|
||||
print("Testing TwitterMCPReader initialization...")
|
||||
|
||||
# Test basic initialization
|
||||
reader = TwitterMCPReader("twitter-mcp-server")
|
||||
assert reader.mcp_server_command == "twitter-mcp-server"
|
||||
assert reader.include_tweet_content
|
||||
assert reader.include_metadata
|
||||
assert reader.max_bookmarks == 1000
|
||||
|
||||
# Test with custom parameters
|
||||
reader = TwitterMCPReader(
|
||||
"custom-twitter-server",
|
||||
username="testuser",
|
||||
include_tweet_content=False,
|
||||
include_metadata=False,
|
||||
max_bookmarks=500,
|
||||
)
|
||||
assert reader.username == "testuser"
|
||||
assert not reader.include_tweet_content
|
||||
assert not reader.include_metadata
|
||||
assert reader.max_bookmarks == 500
|
||||
|
||||
print("✅ TwitterMCPReader initialization tests passed")
|
||||
|
||||
|
||||
def test_slack_message_formatting():
|
||||
"""Test Slack message formatting functionality."""
|
||||
print("Testing Slack message formatting...")
|
||||
|
||||
reader = SlackMCPReader("slack-mcp-server")
|
||||
|
||||
# Test basic message formatting
|
||||
message = {
|
||||
"text": "Hello, world!",
|
||||
"user": "john_doe",
|
||||
"channel": "general",
|
||||
"ts": "1234567890.123456",
|
||||
}
|
||||
|
||||
formatted = reader._format_message(message)
|
||||
assert "Channel: #general" in formatted
|
||||
assert "User: john_doe" in formatted
|
||||
assert "Message: Hello, world!" in formatted
|
||||
assert "Time:" in formatted
|
||||
|
||||
# Test with missing fields
|
||||
message = {"text": "Simple message"}
|
||||
formatted = reader._format_message(message)
|
||||
assert "Message: Simple message" in formatted
|
||||
|
||||
print("✅ Slack message formatting tests passed")
|
||||
|
||||
|
||||
def test_twitter_bookmark_formatting():
|
||||
"""Test Twitter bookmark formatting functionality."""
|
||||
print("Testing Twitter bookmark formatting...")
|
||||
|
||||
reader = TwitterMCPReader("twitter-mcp-server")
|
||||
|
||||
# Test basic bookmark formatting
|
||||
bookmark = {
|
||||
"text": "This is a great article about AI!",
|
||||
"author": "ai_researcher",
|
||||
"created_at": "2024-01-01T12:00:00Z",
|
||||
"url": "https://twitter.com/ai_researcher/status/123456789",
|
||||
"likes": 42,
|
||||
"retweets": 15,
|
||||
}
|
||||
|
||||
formatted = reader._format_bookmark(bookmark)
|
||||
assert "=== Twitter Bookmark ===" in formatted
|
||||
assert "Author: @ai_researcher" in formatted
|
||||
assert "Content:" in formatted
|
||||
assert "This is a great article about AI!" in formatted
|
||||
assert "URL: https://twitter.com" in formatted
|
||||
assert "Likes: 42" in formatted
|
||||
assert "Retweets: 15" in formatted
|
||||
|
||||
# Test with minimal data
|
||||
bookmark = {"text": "Simple tweet"}
|
||||
formatted = reader._format_bookmark(bookmark)
|
||||
assert "=== Twitter Bookmark ===" in formatted
|
||||
assert "Simple tweet" in formatted
|
||||
|
||||
print("✅ Twitter bookmark formatting tests passed")
|
||||
|
||||
|
||||
def test_slack_rag_initialization():
|
||||
"""Test that SlackMCPRAG can be initialized."""
|
||||
print("Testing SlackMCPRAG initialization...")
|
||||
|
||||
app = SlackMCPRAG()
|
||||
assert app.default_index_name == "slack_messages"
|
||||
assert hasattr(app, "parser")
|
||||
|
||||
print("✅ SlackMCPRAG initialization tests passed")
|
||||
|
||||
|
||||
def test_twitter_rag_initialization():
|
||||
"""Test that TwitterMCPRAG can be initialized."""
|
||||
print("Testing TwitterMCPRAG initialization...")
|
||||
|
||||
app = TwitterMCPRAG()
|
||||
assert app.default_index_name == "twitter_bookmarks"
|
||||
assert hasattr(app, "parser")
|
||||
|
||||
print("✅ TwitterMCPRAG initialization tests passed")
|
||||
|
||||
|
||||
def test_concatenated_content_creation():
|
||||
"""Test creation of concatenated content from multiple messages."""
|
||||
print("Testing concatenated content creation...")
|
||||
|
||||
reader = SlackMCPReader("slack-mcp-server", workspace_name="test-workspace")
|
||||
|
||||
messages = [
|
||||
{"text": "First message", "user": "alice", "ts": "1000"},
|
||||
{"text": "Second message", "user": "bob", "ts": "2000"},
|
||||
{"text": "Third message", "user": "charlie", "ts": "3000"},
|
||||
]
|
||||
|
||||
content = reader._create_concatenated_content(messages, "general")
|
||||
|
||||
assert "Slack Channel: #general" in content
|
||||
assert "Message Count: 3" in content
|
||||
assert "Workspace: test-workspace" in content
|
||||
assert "First message" in content
|
||||
assert "Second message" in content
|
||||
assert "Third message" in content
|
||||
|
||||
print("✅ Concatenated content creation tests passed")
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all tests."""
|
||||
print("🧪 Running MCP Integration Tests")
|
||||
print("=" * 50)
|
||||
|
||||
try:
|
||||
test_slack_reader_initialization()
|
||||
test_twitter_reader_initialization()
|
||||
test_slack_message_formatting()
|
||||
test_twitter_bookmark_formatting()
|
||||
test_slack_rag_initialization()
|
||||
test_twitter_rag_initialization()
|
||||
test_concatenated_content_creation()
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("🎉 All tests passed! MCP integration is working correctly.")
|
||||
print("\nNext steps:")
|
||||
print("1. Install actual MCP servers for Slack and Twitter")
|
||||
print("2. Configure API credentials")
|
||||
print("3. Test with --test-connection flag")
|
||||
print("4. Start indexing your live data!")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Test failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
221
tests/test_mcp_standalone.py
Normal file
221
tests/test_mcp_standalone.py
Normal file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Standalone test script for MCP integration implementations.
|
||||
|
||||
This script tests the basic functionality of the MCP readers
|
||||
without requiring LEANN core dependencies.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add the parent directory to the path so we can import from apps
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
|
||||
def test_slack_reader_basic():
|
||||
"""Test basic SlackMCPReader functionality without async operations."""
|
||||
print("Testing SlackMCPReader basic functionality...")
|
||||
|
||||
# Import and test initialization
|
||||
from apps.slack_data.slack_mcp_reader import SlackMCPReader
|
||||
|
||||
reader = SlackMCPReader("slack-mcp-server")
|
||||
assert reader.mcp_server_command == "slack-mcp-server"
|
||||
assert reader.concatenate_conversations
|
||||
|
||||
# Test message formatting
|
||||
message = {
|
||||
"text": "Hello team! How's the project going?",
|
||||
"user": "john_doe",
|
||||
"channel": "general",
|
||||
"ts": "1234567890.123456",
|
||||
}
|
||||
|
||||
formatted = reader._format_message(message)
|
||||
assert "Channel: #general" in formatted
|
||||
assert "User: john_doe" in formatted
|
||||
assert "Message: Hello team!" in formatted
|
||||
|
||||
# Test concatenated content creation
|
||||
messages = [
|
||||
{"text": "First message", "user": "alice", "ts": "1000"},
|
||||
{"text": "Second message", "user": "bob", "ts": "2000"},
|
||||
]
|
||||
|
||||
content = reader._create_concatenated_content(messages, "dev-team")
|
||||
assert "Slack Channel: #dev-team" in content
|
||||
assert "Message Count: 2" in content
|
||||
assert "First message" in content
|
||||
assert "Second message" in content
|
||||
|
||||
print("✅ SlackMCPReader basic tests passed")
|
||||
|
||||
|
||||
def test_twitter_reader_basic():
|
||||
"""Test basic TwitterMCPReader functionality."""
|
||||
print("Testing TwitterMCPReader basic functionality...")
|
||||
|
||||
from apps.twitter_data.twitter_mcp_reader import TwitterMCPReader
|
||||
|
||||
reader = TwitterMCPReader("twitter-mcp-server")
|
||||
assert reader.mcp_server_command == "twitter-mcp-server"
|
||||
assert reader.include_tweet_content
|
||||
assert reader.max_bookmarks == 1000
|
||||
|
||||
# Test bookmark formatting
|
||||
bookmark = {
|
||||
"text": "Amazing article about the future of AI! Must read for everyone interested in tech.",
|
||||
"author": "tech_guru",
|
||||
"created_at": "2024-01-15T14:30:00Z",
|
||||
"url": "https://twitter.com/tech_guru/status/123456789",
|
||||
"likes": 156,
|
||||
"retweets": 42,
|
||||
"replies": 23,
|
||||
"hashtags": ["AI", "tech", "future"],
|
||||
"mentions": ["@openai", "@anthropic"],
|
||||
}
|
||||
|
||||
formatted = reader._format_bookmark(bookmark)
|
||||
assert "=== Twitter Bookmark ===" in formatted
|
||||
assert "Author: @tech_guru" in formatted
|
||||
assert "Amazing article about the future of AI!" in formatted
|
||||
assert "Likes: 156" in formatted
|
||||
assert "Retweets: 42" in formatted
|
||||
assert "Hashtags: AI, tech, future" in formatted
|
||||
assert "Mentions: @openai, @anthropic" in formatted
|
||||
|
||||
# Test with minimal data
|
||||
simple_bookmark = {"text": "Short tweet", "author": "user123"}
|
||||
formatted_simple = reader._format_bookmark(simple_bookmark)
|
||||
assert "=== Twitter Bookmark ===" in formatted_simple
|
||||
assert "Short tweet" in formatted_simple
|
||||
assert "Author: @user123" in formatted_simple
|
||||
|
||||
print("✅ TwitterMCPReader basic tests passed")
|
||||
|
||||
|
||||
def test_mcp_request_format():
|
||||
"""Test MCP request formatting."""
|
||||
print("Testing MCP request formatting...")
|
||||
|
||||
# Test initialization request format
|
||||
init_request = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "initialize",
|
||||
"params": {
|
||||
"protocolVersion": "2024-11-05",
|
||||
"capabilities": {},
|
||||
"clientInfo": {"name": "leann-slack-reader", "version": "1.0.0"},
|
||||
},
|
||||
}
|
||||
|
||||
# Verify it's valid JSON
|
||||
json_str = json.dumps(init_request)
|
||||
parsed = json.loads(json_str)
|
||||
assert parsed["jsonrpc"] == "2.0"
|
||||
assert parsed["method"] == "initialize"
|
||||
assert parsed["params"]["protocolVersion"] == "2024-11-05"
|
||||
|
||||
# Test tools/list request
|
||||
list_request = {"jsonrpc": "2.0", "id": 2, "method": "tools/list", "params": {}}
|
||||
|
||||
json_str = json.dumps(list_request)
|
||||
parsed = json.loads(json_str)
|
||||
assert parsed["method"] == "tools/list"
|
||||
|
||||
print("✅ MCP request formatting tests passed")
|
||||
|
||||
|
||||
def test_data_processing():
|
||||
"""Test data processing capabilities."""
|
||||
print("Testing data processing capabilities...")
|
||||
|
||||
from apps.slack_data.slack_mcp_reader import SlackMCPReader
|
||||
from apps.twitter_data.twitter_mcp_reader import TwitterMCPReader
|
||||
|
||||
# Test Slack message processing with various formats
|
||||
slack_reader = SlackMCPReader("test-server")
|
||||
|
||||
messages_with_timestamps = [
|
||||
{"text": "Meeting in 5 minutes", "user": "alice", "ts": "1000.123"},
|
||||
{"text": "On my way!", "user": "bob", "ts": "1001.456"},
|
||||
{"text": "Starting now", "user": "charlie", "ts": "1002.789"},
|
||||
]
|
||||
|
||||
content = slack_reader._create_concatenated_content(messages_with_timestamps, "meetings")
|
||||
assert "Meeting in 5 minutes" in content
|
||||
assert "On my way!" in content
|
||||
assert "Starting now" in content
|
||||
|
||||
# Test Twitter bookmark processing with engagement data
|
||||
twitter_reader = TwitterMCPReader("test-server", include_metadata=True)
|
||||
|
||||
high_engagement_bookmark = {
|
||||
"text": "Thread about startup lessons learned 🧵",
|
||||
"author": "startup_founder",
|
||||
"likes": 1250,
|
||||
"retweets": 340,
|
||||
"replies": 89,
|
||||
}
|
||||
|
||||
formatted = twitter_reader._format_bookmark(high_engagement_bookmark)
|
||||
assert "Thread about startup lessons learned" in formatted
|
||||
assert "Likes: 1250" in formatted
|
||||
assert "Retweets: 340" in formatted
|
||||
assert "Replies: 89" in formatted
|
||||
|
||||
# Test with metadata disabled
|
||||
twitter_reader_no_meta = TwitterMCPReader("test-server", include_metadata=False)
|
||||
formatted_no_meta = twitter_reader_no_meta._format_bookmark(high_engagement_bookmark)
|
||||
assert "Thread about startup lessons learned" in formatted_no_meta
|
||||
assert "Likes:" not in formatted_no_meta
|
||||
assert "Retweets:" not in formatted_no_meta
|
||||
|
||||
print("✅ Data processing tests passed")
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all standalone tests."""
|
||||
print("🧪 Running MCP Integration Standalone Tests")
|
||||
print("=" * 60)
|
||||
print("Testing core functionality without LEANN dependencies...")
|
||||
print()
|
||||
|
||||
try:
|
||||
test_slack_reader_basic()
|
||||
test_twitter_reader_basic()
|
||||
test_mcp_request_format()
|
||||
test_data_processing()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("🎉 All standalone tests passed!")
|
||||
print("\n✨ MCP Integration Summary:")
|
||||
print("- SlackMCPReader: Ready for Slack message processing")
|
||||
print("- TwitterMCPReader: Ready for Twitter bookmark processing")
|
||||
print("- MCP Protocol: Properly formatted JSON-RPC requests")
|
||||
print("- Data Processing: Handles various message/bookmark formats")
|
||||
|
||||
print("\n🚀 Next Steps:")
|
||||
print("1. Install MCP servers: npm install -g slack-mcp-server twitter-mcp-server")
|
||||
print("2. Configure API credentials for Slack and Twitter")
|
||||
print("3. Test connections: python -m apps.slack_rag --test-connection")
|
||||
print("4. Start indexing live data from your platforms!")
|
||||
|
||||
print("\n📖 Documentation:")
|
||||
print("- Check README.md for detailed setup instructions")
|
||||
print("- Run examples/mcp_integration_demo.py for usage examples")
|
||||
print("- Explore apps/slack_rag.py and apps/twitter_rag.py for implementation details")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Test failed: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user