diff --git a/README.md b/README.md index 93032be..c59e738 100755 --- a/README.md +++ b/README.md @@ -777,7 +777,7 @@ Once your iMessage conversations are indexed, you can search with queries like: ### MCP Integration: RAG on Live Data from Any Platform -**NEW!** Connect to live data sources through the Model Context Protocol (MCP). LEANN now supports real-time RAG on platforms like Slack, Twitter, and more through standardized MCP servers. +Connect to live data sources through the Model Context Protocol (MCP). LEANN now supports real-time RAG on platforms like Slack, Twitter, and more through standardized MCP servers. **Key Benefits:** - **Live Data Access**: Fetch real-time data without manual exports @@ -801,18 +801,17 @@ python -m apps.slack_rag \ --query "What did we decide about the product launch?" ``` -**Setup Requirements:** +**📖 Comprehensive Setup Guide**: For detailed setup instructions, troubleshooting common issues (like "users cache is not ready yet"), and advanced configuration options, see our [**Slack Setup Guide**](docs/slack-setup-guide.md). + +**Quick Setup:** 1. Install a Slack MCP server (e.g., `npm install -g slack-mcp-server`) -2. Create a Slack App and get API credentials: - - Go to [api.slack.com/apps](https://api.slack.com/apps) and create a new app - - Under "OAuth & Permissions", add these Bot Token Scopes: `channels:read`, `channels:history`, `groups:read`, `groups:history`, `im:read`, `im:history`, `mpim:read`, `mpim:history` - - Install the app to your workspace and copy the "Bot User OAuth Token" (starts with `xoxb-`) - - Under "App-Level Tokens", create a token with `connections:write` scope (starts with `xapp-`) +2. Create a Slack App and get API credentials (see detailed guide above) +3. Set environment variables: ```bash export SLACK_BOT_TOKEN="xoxb-your-bot-token" - export SLACK_APP_TOKEN="xapp-your-app-token" + export SLACK_APP_TOKEN="xapp-your-app-token" # Optional ``` -3. Test connection with `--test-connection` flag +4. Test connection with `--test-connection` flag **Arguments:** - `--mcp-server`: Command to start the Slack MCP server @@ -820,6 +819,8 @@ python -m apps.slack_rag \ - `--channels`: Specific channels to index (optional) - `--concatenate-conversations`: Group messages by channel (default: true) - `--max-messages-per-channel`: Limit messages per channel (default: 100) +- `--max-retries`: Maximum retries for cache sync issues (default: 5) +- `--retry-delay`: Initial delay between retries in seconds (default: 2.0) #### 🐦 Twitter Bookmarks: Your Personal Tweet Library @@ -921,7 +922,7 @@ Want to add support for other platforms? LEANN's MCP integration is designed for ### 🚀 Claude Code Integration: Transform Your Development Workflow!
-NEW!! AST‑Aware Code Chunking +AST‑Aware Code Chunking LEANN features intelligent code chunking that preserves semantic boundaries (functions, classes, methods) for Python, Java, C#, and TypeScript, improving code understanding compared to text-based chunking. diff --git a/apps/slack_data/slack_mcp_reader.py b/apps/slack_data/slack_mcp_reader.py index e55820f..7580951 100644 --- a/apps/slack_data/slack_mcp_reader.py +++ b/apps/slack_data/slack_mcp_reader.py @@ -29,6 +29,8 @@ class SlackMCPReader: workspace_name: Optional[str] = None, concatenate_conversations: bool = True, max_messages_per_conversation: int = 100, + max_retries: int = 5, + retry_delay: float = 2.0, ): """ Initialize the Slack MCP Reader. @@ -38,11 +40,15 @@ class SlackMCPReader: workspace_name: Optional workspace name to filter messages concatenate_conversations: Whether to group messages by channel/thread max_messages_per_conversation: Maximum messages to include per conversation + max_retries: Maximum number of retries for failed operations + retry_delay: Initial delay between retries in seconds """ self.mcp_server_command = mcp_server_command self.workspace_name = workspace_name self.concatenate_conversations = concatenate_conversations self.max_messages_per_conversation = max_messages_per_conversation + self.max_retries = max_retries + self.retry_delay = retry_delay self.mcp_process = None async def start_mcp_server(self): @@ -110,11 +116,73 @@ class SlackMCPReader: return response.get("result", {}).get("tools", []) + def _is_cache_sync_error(self, error: dict) -> bool: + """Check if the error is related to users cache not being ready.""" + if isinstance(error, dict): + message = error.get("message", "").lower() + return ( + "users cache is not ready" in message or "sync process is still running" in message + ) + return False + + async def _retry_with_backoff(self, func, *args, **kwargs): + """Retry a function with exponential backoff, especially for cache sync issues.""" + last_exception = None + + for attempt in range(self.max_retries + 1): + try: + return await func(*args, **kwargs) + except Exception as e: + last_exception = e + + # Check if this is a cache sync error + error_dict = {} + if hasattr(e, "args") and e.args and isinstance(e.args[0], dict): + error_dict = e.args[0] + elif "Failed to fetch messages" in str(e): + # Try to extract error from the exception message + import re + + match = re.search(r"'error':\s*(\{[^}]+\})", str(e)) + if match: + try: + error_dict = eval(match.group(1)) + except (ValueError, SyntaxError, NameError): + pass + else: + # Try alternative format + match = re.search(r"Failed to fetch messages:\s*(\{[^}]+\})", str(e)) + if match: + try: + error_dict = eval(match.group(1)) + except (ValueError, SyntaxError, NameError): + pass + + if self._is_cache_sync_error(error_dict): + if attempt < self.max_retries: + delay = self.retry_delay * (2**attempt) # Exponential backoff + logger.info( + f"Cache sync not ready, waiting {delay:.1f}s before retry {attempt + 1}/{self.max_retries}" + ) + await asyncio.sleep(delay) + continue + else: + logger.warning( + f"Cache sync still not ready after {self.max_retries} retries, giving up" + ) + break + else: + # Not a cache sync error, don't retry + break + + # If we get here, all retries failed or it's not a retryable error + raise last_exception + async def fetch_slack_messages( self, channel: Optional[str] = None, limit: int = 100 ) -> list[dict[str, Any]]: """ - Fetch Slack messages using MCP tools. + Fetch Slack messages using MCP tools with retry logic for cache sync issues. Args: channel: Optional channel name to filter messages @@ -123,6 +191,14 @@ class SlackMCPReader: Returns: List of message dictionaries """ + return await self._retry_with_backoff(self._fetch_slack_messages_impl, channel, limit) + + async def _fetch_slack_messages_impl( + self, channel: Optional[str] = None, limit: int = 100 + ) -> list[dict[str, Any]]: + """ + Internal implementation of fetch_slack_messages without retry logic. + """ # This is a generic implementation - specific MCP servers may have different tool names # Common tool names might be: 'get_messages', 'list_messages', 'fetch_channel_history' diff --git a/apps/slack_rag.py b/apps/slack_rag.py index c3cc0fc..cb145b1 100644 --- a/apps/slack_rag.py +++ b/apps/slack_rag.py @@ -78,6 +78,20 @@ class SlackMCPRAG(BaseRAGExample): help="Test MCP server connection and list available tools without indexing", ) + parser.add_argument( + "--max-retries", + type=int, + default=5, + help="Maximum number of retries for failed operations (default: 5)", + ) + + parser.add_argument( + "--retry-delay", + type=float, + default=2.0, + help="Initial delay between retries in seconds (default: 2.0)", + ) + async def test_mcp_connection(self, args) -> bool: """Test the MCP server connection and display available tools.""" print(f"Testing connection to MCP server: {args.mcp_server}") @@ -88,12 +102,14 @@ class SlackMCPRAG(BaseRAGExample): workspace_name=args.workspace_name, concatenate_conversations=not args.no_concatenate_conversations, max_messages_per_conversation=args.max_messages_per_channel, + max_retries=args.max_retries, + retry_delay=args.retry_delay, ) async with reader: tools = await reader.list_available_tools() - print("\n✅ Successfully connected to MCP server!") + print("Successfully connected to MCP server!") print(f"Available tools ({len(tools)}):") for i, tool in enumerate(tools, 1): @@ -115,7 +131,7 @@ class SlackMCPRAG(BaseRAGExample): return True except Exception as e: - print(f"\n❌ Failed to connect to MCP server: {e}") + print(f"Failed to connect to MCP server: {e}") print("\nTroubleshooting tips:") print("1. Make sure the MCP server is installed and accessible") print("2. Check if the server command is correct") @@ -146,18 +162,20 @@ class SlackMCPRAG(BaseRAGExample): workspace_name=args.workspace_name, concatenate_conversations=concatenate, max_messages_per_conversation=args.max_messages_per_channel, + max_retries=args.max_retries, + retry_delay=args.retry_delay, ) texts = await reader.read_slack_data(channels=args.channels) if not texts: - print("❌ No messages found! This could mean:") + print("No messages found! This could mean:") print("- The MCP server couldn't fetch messages") print("- The specified channels don't exist or are empty") print("- Authentication issues with the Slack workspace") return [] - print(f"✅ Successfully loaded {len(texts)} text chunks from Slack") + print(f"Successfully loaded {len(texts)} text chunks from Slack") # Show sample of what was loaded if texts: @@ -170,7 +188,7 @@ class SlackMCPRAG(BaseRAGExample): return texts except Exception as e: - print(f"❌ Error loading Slack data: {e}") + print(f"Error loading Slack data: {e}") print("\nThis might be due to:") print("- MCP server connection issues") print("- Authentication problems") @@ -188,7 +206,7 @@ class SlackMCPRAG(BaseRAGExample): if not success: return print( - "\n🎉 MCP server is working! You can now run without --test-connection to start indexing." + "MCP server is working! You can now run without --test-connection to start indexing." ) return diff --git a/docs/slack-setup-guide.md b/docs/slack-setup-guide.md new file mode 100644 index 0000000..28cb611 --- /dev/null +++ b/docs/slack-setup-guide.md @@ -0,0 +1,274 @@ +# Slack Integration Setup Guide + +This comprehensive guide will walk you through setting up Slack integration with LEANN, including troubleshooting common issues like the "users cache is not ready yet" error. + +## Overview + +LEANN's Slack integration uses MCP (Model Context Protocol) servers to fetch and index your Slack messages for RAG (Retrieval-Augmented Generation). This allows you to search through your Slack conversations using natural language queries. + +## Prerequisites + +1. **Slack Workspace Access**: You need admin or owner permissions in your Slack workspace to create apps and configure OAuth tokens. + +2. **Slack MCP Server**: Install a Slack MCP server (e.g., `slack-mcp-server` via npm) + +3. **LEANN**: Ensure you have LEANN installed and working + +## Step 1: Create a Slack App + +### 1.1 Go to Slack API Dashboard + +1. Visit [https://api.slack.com/apps](https://api.slack.com/apps) +2. Click **"Create New App"** +3. Choose **"From scratch"** +4. Enter your app name (e.g., "LEANN Slack Integration") +5. Select your workspace +6. Click **"Create App"** + +### 1.2 Configure App Permissions + +#### Bot Token Scopes + +1. In your app dashboard, go to **"OAuth & Permissions"** in the left sidebar +2. Scroll down to **"Scopes"** section +3. Under **"Bot Token Scopes"**, click **"Add an OAuth Scope"** +4. Add the following scopes: + - `channels:read` - Read public channel information + - `channels:history` - Read messages in public channels + - `groups:read` - Read private channel information + - `groups:history` - Read messages in private channels + - `im:read` - Read direct message information + - `im:history` - Read direct messages + - `mpim:read` - Read group direct message information + - `mpim:history` - Read group direct messages + - `users:read` - Read user information + - `team:read` - Read workspace information + +#### App-Level Tokens (Optional) + +Some MCP servers may require app-level tokens: + +1. Go to **"Basic Information"** in the left sidebar +2. Scroll down to **"App-Level Tokens"** +3. Click **"Generate Token and Scopes"** +4. Enter a name (e.g., "LEANN Integration") +5. Add the `connections:write` scope +6. Click **"Generate"** +7. Copy the token (starts with `xapp-`) + +### 1.3 Install App to Workspace + +1. Go to **"OAuth & Permissions"** in the left sidebar +2. Click **"Install to Workspace"** +3. Review the permissions and click **"Allow"** +4. Copy the **"Bot User OAuth Token"** (starts with `xoxb-`) + +## Step 2: Install Slack MCP Server + +### Option A: Using npm (Recommended) + +```bash +# Install globally +npm install -g slack-mcp-server + +# Or install locally +npm install slack-mcp-server +``` + +### Option B: Using npx (No installation required) + +```bash +# Use directly without installation +npx slack-mcp-server +``` + +## Step 3: Configure Environment Variables + +Create a `.env` file or set environment variables: + +```bash +# Required: Bot User OAuth Token +SLACK_BOT_TOKEN=xoxb-your-bot-token-here + +# Optional: App-Level Token (if your MCP server requires it) +SLACK_APP_TOKEN=xapp-your-app-token-here + +# Optional: Workspace-specific settings +SLACK_WORKSPACE_ID=T1234567890 # Your workspace ID (optional) +``` + +## Step 4: Test the Setup + +### 4.1 Test MCP Server Connection + +```bash +python -m apps.slack_rag \ + --mcp-server "slack-mcp-server" \ + --test-connection \ + --workspace-name "Your Workspace Name" +``` + +This will test the connection and list available tools without indexing any data. + +### 4.2 Index a Specific Channel + +```bash +python -m apps.slack_rag \ + --mcp-server "slack-mcp-server" \ + --workspace-name "Your Workspace Name" \ + --channels general \ + --query "What did we discuss about the project?" +``` + +## Common Issues and Solutions + +### Issue 1: "users cache is not ready yet" Error + +**Problem**: You see this warning: +``` +WARNING - Failed to fetch messages from channel random: Failed to fetch messages: {'code': -32603, 'message': 'users cache is not ready yet, sync process is still running... please wait'} +``` + +**Solution**: This is a common timing issue. The LEANN integration now includes automatic retry logic: + +1. **Wait and Retry**: The system will automatically retry with exponential backoff (2s, 4s, 8s, etc.) +2. **Increase Retry Parameters**: If needed, you can customize retry behavior: + ```bash + python -m apps.slack_rag \ + --mcp-server "slack-mcp-server" \ + --max-retries 10 \ + --retry-delay 3.0 \ + --channels general \ + --query "Your query here" + ``` +3. **Keep MCP Server Running**: Start the MCP server separately and keep it running: + ```bash + # Terminal 1: Start MCP server + slack-mcp-server + + # Terminal 2: Run LEANN (it will connect to the running server) + python -m apps.slack_rag --mcp-server "slack-mcp-server" --channels general --query "test" + ``` + +### Issue 2: "No message fetching tool found" + +**Problem**: The MCP server doesn't have the expected tools. + +**Solution**: +1. Check if your MCP server is properly installed and configured +2. Verify your Slack tokens are correct +3. Try a different MCP server implementation +4. Check the MCP server documentation for required configuration + +### Issue 3: Permission Denied Errors + +**Problem**: You get permission errors when trying to access channels. + +**Solutions**: +1. **Check Bot Permissions**: Ensure your bot has been added to the channels you want to access +2. **Verify Token Scopes**: Make sure you have all required scopes configured +3. **Channel Access**: For private channels, the bot needs to be explicitly invited +4. **Workspace Permissions**: Ensure your Slack app has the necessary workspace permissions + +### Issue 4: Empty Results + +**Problem**: No messages are returned even though the channel has messages. + +**Solutions**: +1. **Check Channel Names**: Ensure channel names are correct (without the # symbol) +2. **Verify Bot Access**: Make sure the bot can access the channels +3. **Check Date Ranges**: Some MCP servers have limitations on message history +4. **Increase Message Limits**: Try increasing the message limit: + ```bash + python -m apps.slack_rag \ + --mcp-server "slack-mcp-server" \ + --channels general \ + --max-messages-per-channel 1000 \ + --query "test" + ``` + +## Advanced Configuration + +### Custom MCP Server Commands + +If you need to pass additional parameters to your MCP server: + +```bash +python -m apps.slack_rag \ + --mcp-server "slack-mcp-server --token-file /path/to/tokens.json" \ + --workspace-name "Your Workspace" \ + --channels general \ + --query "Your query" +``` + +### Multiple Workspaces + +To work with multiple Slack workspaces, you can: + +1. Create separate apps for each workspace +2. Use different environment variables +3. Run separate instances with different configurations + +### Performance Optimization + +For better performance with large workspaces: + +```bash +python -m apps.slack_rag \ + --mcp-server "slack-mcp-server" \ + --workspace-name "Your Workspace" \ + --max-messages-per-channel 500 \ + --no-concatenate-conversations \ + --query "Your query" +``` + +## Troubleshooting Checklist + +- [ ] Slack app created with proper permissions +- [ ] Bot token (xoxb-) copied correctly +- [ ] App-level token (xapp-) created if needed +- [ ] MCP server installed and accessible +- [ ] Environment variables set correctly +- [ ] Bot invited to relevant channels +- [ ] Channel names specified without # symbol +- [ ] Sufficient retry attempts configured +- [ ] Network connectivity to Slack APIs + +## Getting Help + +If you continue to have issues: + +1. **Check Logs**: Look for detailed error messages in the console output +2. **Test MCP Server**: Use `--test-connection` to verify the MCP server is working +3. **Verify Tokens**: Double-check that your Slack tokens are valid and have the right scopes +4. **Community Support**: Reach out to the LEANN community for help + +## Example Commands + +### Basic Usage +```bash +# Test connection +python -m apps.slack_rag --mcp-server "slack-mcp-server" --test-connection + +# Index specific channels +python -m apps.slack_rag \ + --mcp-server "slack-mcp-server" \ + --workspace-name "My Company" \ + --channels general random \ + --query "What did we decide about the project timeline?" +``` + +### Advanced Usage +```bash +# With custom retry settings +python -m apps.slack_rag \ + --mcp-server "slack-mcp-server" \ + --workspace-name "My Company" \ + --channels general \ + --max-retries 10 \ + --retry-delay 5.0 \ + --max-messages-per-channel 2000 \ + --query "Show me all decisions made in the last month" +``` + +This guide should help you get Slack integration working smoothly with LEANN. The retry logic improvements should significantly reduce the "cache not ready" errors you were experiencing.