From 67c5a3e83872b9aeca35e37e068bcc4a54a0013a Mon Sep 17 00:00:00 2001
From: Andy Lee <andylizf@outlook.com>
Date: Sat, 9 Aug 2025 00:28:25 -0700
Subject: [PATCH] fix: remove leann_ask

---
 packages/leann-core/src/leann/mcp.py | 307 +++++++++++++++++++++++++--
 1 file changed, 291 insertions(+), 16 deletions(-)

diff --git a/packages/leann-core/src/leann/mcp.py b/packages/leann-core/src/leann/mcp.py
index f5a2cae..44fdcf5 100755
--- a/packages/leann-core/src/leann/mcp.py
+++ b/packages/leann-core/src/leann/mcp.py
@@ -24,33 +24,155 @@ def handle_request(request):
             "result": {
                 "tools": [
                     {
-                        "name": "leann_search",
-                        "description": "Search LEANN index",
+                        "name": "leann_index",
+                        "description": """🏗️ Index a codebase for intelligent code search and understanding.
+
+🎯 **When to use**: Before analyzing, modifying, or understanding any codebase
+📁 **What it does**: Creates a semantic search index of code files and documentation
+⚡ **Why it's useful**: Enables fast, intelligent searches like "authentication logic", "error handling patterns", "API endpoints"
+
+This is your first step for any serious codebase work - think of it as giving yourself superpowers to understand and navigate code.""",
                         "inputSchema": {
                             "type": "object",
                             "properties": {
-                                "index_name": {"type": "string"},
-                                "query": {"type": "string"},
-                                "top_k": {"type": "integer", "default": 5},
+                                "index_name": {
+                                    "type": "string",
+                                    "description": "Name for the new index. Use descriptive names like 'my-project' or 'backend-api'.",
+                                },
+                                "docs_path": {
+                                    "type": "string",
+                                    "description": "Path to the directory containing code/documents to index. Can be relative (e.g., './src') or absolute.",
+                                },
+                                "force": {
+                                    "type": "boolean",
+                                    "default": False,
+                                    "description": "Force rebuild of existing index. Use when you want to completely reindex and overwrite existing data.",
+                                },
+                                "backend": {
+                                    "type": "string",
+                                    "enum": ["hnsw", "diskann"],
+                                    "default": "hnsw",
+                                    "description": "Vector index backend: 'hnsw' for balanced performance, 'diskann' for large-scale datasets.",
+                                },
+                                "embedding_model": {
+                                    "type": "string",
+                                    "default": "facebook/contriever",
+                                    "description": "Embedding model to use. Popular options: 'facebook/contriever', 'sentence-transformers/all-MiniLM-L6-v2'",
+                                },
+                                "file_types": {
+                                    "type": "array",
+                                    "items": {"type": "string"},
+                                    "description": "File extensions to include (e.g., ['.py', '.js', '.ts', '.md']). If not specified, uses default supported types.",
+                                },
+                                "ignore_patterns": {
+                                    "type": "array",
+                                    "items": {"type": "string"},
+                                    "default": [],
+                                    "description": "Patterns to ignore during indexing (e.g., ['node_modules', '__pycache__', '*.tmp', 'dist']). Common patterns are automatically ignored.",
+                                },
+                            },
+                            "required": ["index_name", "docs_path"],
+                        },
+                    },
+                    {
+                        "name": "leann_search",
+                        "description": """🔍 Search code using natural language - like having a coding assistant who knows your entire codebase!
+
+🎯 **Perfect for**:
+- "How does authentication work?" → finds auth-related code
+- "Error handling patterns" → locates try-catch blocks and error logic
+- "Database connection setup" → finds DB initialization code
+- "API endpoint definitions" → locates route handlers
+- "Configuration management" → finds config files and usage
+
+💡 **Pro tip**: Use this before making any changes to understand existing patterns and conventions.""",
+                        "inputSchema": {
+                            "type": "object",
+                            "properties": {
+                                "index_name": {
+                                    "type": "string",
+                                    "description": "Name of the LEANN index to search. Use 'leann_list' first to see available indexes.",
+                                },
+                                "query": {
+                                    "type": "string",
+                                    "description": "Search query - can be natural language (e.g., 'how to handle errors') or technical terms (e.g., 'async function definition')",
+                                },
+                                "top_k": {
+                                    "type": "integer",
+                                    "default": 5,
+                                    "minimum": 1,
+                                    "maximum": 20,
+                                    "description": "Number of search results to return. Use 5-10 for focused results, 15-20 for comprehensive exploration.",
+                                },
+                                "complexity": {
+                                    "type": "integer",
+                                    "default": 32,
+                                    "minimum": 16,
+                                    "maximum": 128,
+                                    "description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
+                                },
+                                "search_mode": {
+                                    "type": "string",
+                                    "enum": ["fast", "balanced", "precise"],
+                                    "default": "balanced",
+                                    "description": "Search strategy: 'fast' (~2-5s), 'balanced' (~5-10s), 'precise' (~10-20s). Choose based on time vs accuracy needs.",
+                                },
+                                "recompute_embeddings": {
+                                    "type": "boolean",
+                                    "default": False,
+                                    "description": "Recompute embeddings for maximum accuracy. Enable only when precision is more important than speed.",
+                                },
+                                "file_types": {
+                                    "type": "array",
+                                    "items": {"type": "string"},
+                                    "description": "Filter results by file types (e.g., ['py', 'js', 'ts']). Searches all indexed file types if not specified.",
+                                },
+                                "min_score": {
+                                    "type": "number",
+                                    "minimum": 0.0,
+                                    "maximum": 1.0,
+                                    "default": 0.0,
+                                    "description": "Minimum relevance score threshold (0.0-1.0). Higher values return more relevant but fewer results.",
+                                },
                             },
                             "required": ["index_name", "query"],
                         },
                     },
                     {
-                        "name": "leann_ask",
-                        "description": "Ask question using LEANN RAG",
+                        "name": "leann_status",
+                        "description": "📊 Check the health and stats of your code indexes - like a medical checkup for your codebase knowledge!",
                         "inputSchema": {
                             "type": "object",
                             "properties": {
-                                "index_name": {"type": "string"},
-                                "question": {"type": "string"},
+                                "index_name": {
+                                    "type": "string",
+                                    "description": "Optional: Name of specific index to check. If not provided, shows status of all indexes.",
+                                }
                             },
-                            "required": ["index_name", "question"],
+                        },
+                    },
+                    {
+                        "name": "leann_clear",
+                        "description": "🗑️ Safely delete a code index (with confirmation required). Think of it as 'rm -rf' but for your search indexes - be careful!",
+                        "inputSchema": {
+                            "type": "object",
+                            "properties": {
+                                "index_name": {
+                                    "type": "string",
+                                    "description": "Name of the index to clear/delete.",
+                                },
+                                "confirm": {
+                                    "type": "boolean",
+                                    "default": False,
+                                    "description": "Confirmation flag. Must be set to true to actually perform the deletion.",
+                                },
+                            },
+                            "required": ["index_name"],
                         },
                     },
                     {
                         "name": "leann_list",
-                        "description": "List all LEANN indexes",
+                        "description": "📋 Show all your indexed codebases - your personal code library! Use this to see what's available for search.",
                         "inputSchema": {"type": "object", "properties": {}},
                     },
                 ]
@@ -62,20 +184,173 @@ def handle_request(request):
         args = request["params"].get("arguments", {})
 
         try:
-            if tool_name == "leann_search":
+            if tool_name == "leann_index":
+                # Validate required parameters
+                if not args.get("index_name") or not args.get("docs_path"):
+                    return {
+                        "jsonrpc": "2.0",
+                        "id": request.get("id"),
+                        "result": {
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": "Error: Both index_name and docs_path are required",
+                                }
+                            ]
+                        },
+                    }
+
+                # Validate docs_path exists
+                import os
+
+                docs_path = args["docs_path"]
+                if not os.path.exists(docs_path):
+                    return {
+                        "jsonrpc": "2.0",
+                        "id": request.get("id"),
+                        "result": {
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": f"Error: Path '{docs_path}' does not exist",
+                                }
+                            ]
+                        },
+                    }
+
+                # Build index command
+                cmd = [
+                    "leann",
+                    "build",
+                    args["index_name"],
+                    "--docs",
+                    docs_path,
+                    "--backend",
+                    args.get("backend", "hnsw"),
+                    "--embedding-model",
+                    args.get("embedding_model", "facebook/contriever"),
+                ]
+
+                # Add force flag if specified
+                if args.get("force", False):
+                    cmd.append("--force")
+
+                # Add file types if specified (now as array)
+                file_types = args.get("file_types")
+                if file_types and isinstance(file_types, list):
+                    cmd.extend(["--file-types", ",".join(file_types)])
+
+                # Add ignore patterns if specified
+                ignore_patterns = args.get("ignore_patterns", [])
+                if ignore_patterns and isinstance(ignore_patterns, list):
+                    # For now, pass as comma-separated string - CLI can be enhanced later
+                    cmd.extend(["--ignore", ",".join(ignore_patterns)])
+                result = subprocess.run(cmd, capture_output=True, text=True)
+
+            elif tool_name == "leann_search":
+                # Validate required parameters
+                if not args.get("index_name") or not args.get("query"):
+                    return {
+                        "jsonrpc": "2.0",
+                        "id": request.get("id"),
+                        "result": {
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": "Error: Both index_name and query are required",
+                                }
+                            ]
+                        },
+                    }
+
+                # Build command with enhanced parameters
                 cmd = [
                     "leann",
                     "search",
                     args["index_name"],
                     args["query"],
-                    "--recompute-embeddings",
                     f"--top-k={args.get('top_k', 5)}",
                 ]
+
+                # Handle search mode mapping to set complexity and beam width
+                search_mode = args.get("search_mode", "balanced")
+                if search_mode == "fast":
+                    cmd.extend(["--complexity=16", "--beam-width=1"])
+                elif search_mode == "precise":
+                    cmd.extend(["--complexity=64", "--beam-width=2"])
+                else:  # balanced mode
+                    complexity = args.get("complexity", 32)
+                    cmd.append(f"--complexity={complexity}")
+
+                # Handle recompute embeddings
+                if args.get("recompute_embeddings", False):
+                    cmd.append("--recompute-embeddings")
+
+                # Handle file types filtering
+                file_types = args.get("file_types")
+                if file_types and isinstance(file_types, list):
+                    # Validate file extensions
+                    valid_extensions = []
+                    for ext in file_types:
+                        if isinstance(ext, str) and ext.strip():
+                            clean_ext = ext.strip()
+                            if not clean_ext.startswith("."):
+                                clean_ext = "." + clean_ext
+                            valid_extensions.append(clean_ext)
+
+                    if valid_extensions:
+                        cmd.extend(["--filter-extensions", ",".join(valid_extensions)])
+
                 result = subprocess.run(cmd, capture_output=True, text=True)
 
-            elif tool_name == "leann_ask":
-                cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b'
-                result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+                # Handle min_score filtering in post-processing if needed
+                min_score = args.get("min_score", 0.0)
+                if min_score > 0.0 and result.returncode == 0:
+                    # Note: This is a basic implementation. For full support,
+                    # the CLI would need to return structured data for filtering
+                    pass
+
+            elif tool_name == "leann_status":
+                if args.get("index_name"):
+                    # Check specific index status - for now, we'll use leann list and filter
+                    result = subprocess.run(["leann", "list"], capture_output=True, text=True)
+                    # We could enhance this to show more detailed status per index
+                else:
+                    # Show all indexes status
+                    result = subprocess.run(["leann", "list"], capture_output=True, text=True)
+
+            elif tool_name == "leann_clear":
+                index_name = args["index_name"]
+                confirm = args.get("confirm", False)
+
+                if not confirm:
+                    return {
+                        "jsonrpc": "2.0",
+                        "id": request.get("id"),
+                        "result": {
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": f"Warning: This will permanently delete index '{index_name}'. To proceed, call this tool again with confirm=true.",
+                                }
+                            ]
+                        },
+                    }
+
+                # For clearing, we need to implement this in the CLI
+                # For now, we'll return a message explaining the limitation
+                return {
+                    "jsonrpc": "2.0",
+                    "id": request.get("id"),
+                    "result": {
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": f"Clear functionality for index '{index_name}' is not yet implemented in CLI. You can manually delete the index files in .leann/indexes/{index_name}/",
+                            }
+                        ]
+                    },
+                }
 
             elif tool_name == "leann_list":
                 result = subprocess.run(["leann", "list"], capture_output=True, text=True)