diff --git a/README.md b/README.md index 5fa5248..09d811c 100755 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg **Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy. +> **šŸš€ NEW: Claude Code Integration!** LEANN now provides native MCP integration for Claude Code users. Index your codebase and get intelligent code assistance directly in Claude Code. [Setup Guide →](packages/leann-mcp/README.md) + ## Why LEANN? @@ -427,7 +429,7 @@ source .venv/bin/activate leann --help ``` -**To make it globally available (recommended for daily use):** +**To make it globally available:** ```bash # Install the LEANN CLI globally using uv tool uv tool install leann @@ -436,12 +438,17 @@ uv tool install leann leann --help ``` +> **Note**: Global installation is required for Claude Code integration. The `leann_mcp` server depends on the globally available `leann` command. + ### Usage Examples ```bash -# Build an index from documents +# Build an index from current directory (default) +leann build my-docs + +# Or from specific directory leann build my-docs --docs ./documents # Search your documents diff --git a/assets/claude_code_leann.png b/assets/claude_code_leann.png new file mode 100644 index 0000000..12894ef Binary files /dev/null and b/assets/claude_code_leann.png differ diff --git a/packages/leann-core/pyproject.toml b/packages/leann-core/pyproject.toml index d526154..ca041f0 100644 --- a/packages/leann-core/pyproject.toml +++ b/packages/leann-core/pyproject.toml @@ -44,6 +44,7 @@ colab = [ [project.scripts] leann = "leann.cli:main" +leann_mcp = "leann.mcp:main" [tool.setuptools.packages.find] where = ["src"] diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py index 61ccbb5..7f9a09e 100644 --- a/packages/leann-core/src/leann/cli.py +++ b/packages/leann-core/src/leann/cli.py @@ -41,13 +41,23 @@ def extract_pdf_text_with_pdfplumber(file_path: str) -> str: class LeannCLI: def __init__(self): - self.indexes_dir = Path.home() / ".leann" / "indexes" + # Always use project-local .leann directory (like .git) + self.indexes_dir = Path.cwd() / ".leann" / "indexes" self.indexes_dir.mkdir(parents=True, exist_ok=True) + # Default parser for documents self.node_parser = SentenceSplitter( chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n" ) + # Code-optimized parser + self.code_parser = SentenceSplitter( + chunk_size=512, # Larger chunks for code context + chunk_overlap=50, # Less overlap to preserve function boundaries + separator="\n", # Split by lines for code + paragraph_separator="\n\n", # Preserve logical code blocks + ) + def get_index_path(self, index_name: str) -> str: index_dir = self.indexes_dir / index_name return str(index_dir / "documents.leann") @@ -76,7 +86,9 @@ Examples: # Build command build_parser = subparsers.add_parser("build", help="Build document index") build_parser.add_argument("index_name", help="Index name") - build_parser.add_argument("--docs", type=str, required=True, help="Documents directory") + build_parser.add_argument( + "--docs", type=str, default=".", help="Documents directory (default: current directory)" + ) build_parser.add_argument( "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"] ) @@ -131,37 +143,109 @@ Examples: return parser + def register_project_dir(self): + """Register current project directory in global registry""" + global_registry = Path.home() / ".leann" / "projects.json" + global_registry.parent.mkdir(exist_ok=True) + + current_dir = str(Path.cwd()) + + # Load existing registry + projects = [] + if global_registry.exists(): + try: + import json + + with open(global_registry) as f: + projects = json.load(f) + except Exception: + projects = [] + + # Add current directory if not already present + if current_dir not in projects: + projects.append(current_dir) + + # Save registry + import json + + with open(global_registry, "w") as f: + json.dump(projects, f, indent=2) + def list_indexes(self): print("Stored LEANN indexes:") - if not self.indexes_dir.exists(): + # Get all project directories with .leann + global_registry = Path.home() / ".leann" / "projects.json" + all_projects = [] + + if global_registry.exists(): + try: + import json + + with open(global_registry) as f: + all_projects = json.load(f) + except Exception: + pass + + # Filter to only existing directories with .leann + valid_projects = [] + for project_dir in all_projects: + project_path = Path(project_dir) + if project_path.exists() and (project_path / ".leann" / "indexes").exists(): + valid_projects.append(project_path) + + # Add current project if it has .leann but not in registry + current_path = Path.cwd() + if (current_path / ".leann" / "indexes").exists() and current_path not in valid_projects: + valid_projects.append(current_path) + + if not valid_projects: print("No indexes found. Use 'leann build --docs ' to create one.") return - index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()] + total_indexes = 0 + current_dir = Path.cwd() - if not index_dirs: - print("No indexes found. Use 'leann build --docs ' to create one.") - return + for project_path in valid_projects: + indexes_dir = project_path / ".leann" / "indexes" + if not indexes_dir.exists(): + continue - print(f"Found {len(index_dirs)} indexes:") - for i, index_dir in enumerate(index_dirs, 1): - index_name = index_dir.name - status = "āœ“" if self.index_exists(index_name) else "āœ—" + index_dirs = [d for d in indexes_dir.iterdir() if d.is_dir()] + if not index_dirs: + continue - print(f" {i}. {index_name} [{status}]") - if self.index_exists(index_name): - index_dir / "documents.leann.meta.json" - size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / ( - 1024 * 1024 - ) - print(f" Size: {size_mb:.1f} MB") + # Show project header + if project_path == current_dir: + print(f"\nšŸ“ Current project ({project_path}):") + else: + print(f"\nšŸ“‚ {project_path}:") - if index_dirs: - example_name = index_dirs[0].name - print("\nUsage:") - print(f' leann search {example_name} "your query"') - print(f" leann ask {example_name} --interactive") + for index_dir in index_dirs: + total_indexes += 1 + index_name = index_dir.name + meta_file = index_dir / "documents.leann.meta.json" + status = "āœ“" if meta_file.exists() else "āœ—" + + print(f" {total_indexes}. {index_name} [{status}]") + if status == "āœ“": + size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / ( + 1024 * 1024 + ) + print(f" Size: {size_mb:.1f} MB") + + if total_indexes > 0: + print(f"\nTotal: {total_indexes} indexes across {len(valid_projects)} projects") + print("\nUsage (current project only):") + + # Show example from current project + current_indexes_dir = current_dir / ".leann" / "indexes" + if current_indexes_dir.exists(): + current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()] + if current_index_dirs: + example_name = current_index_dirs[0].name + print(f' leann search {example_name} "your query"') + print(f" leann ask {example_name} --interactive") def load_documents(self, docs_dir: str): print(f"Loading documents from {docs_dir}...") @@ -196,17 +280,125 @@ Examples: documents.extend(default_docs) # Load other file types with default reader + code_extensions = [ + # Original document types + ".txt", + ".md", + ".docx", + # Code files for Claude Code integration + ".py", + ".js", + ".ts", + ".jsx", + ".tsx", + ".java", + ".cpp", + ".c", + ".h", + ".hpp", + ".cs", + ".go", + ".rs", + ".rb", + ".php", + ".swift", + ".kt", + ".scala", + ".r", + ".sql", + ".sh", + ".bash", + ".zsh", + ".fish", + ".ps1", + ".bat", + # Config and markup files + ".json", + ".yaml", + ".yml", + ".xml", + ".toml", + ".ini", + ".cfg", + ".conf", + ".html", + ".css", + ".scss", + ".less", + ".vue", + ".svelte", + # Data science + ".ipynb", + ".R", + ".py", + ".jl", + ] other_docs = SimpleDirectoryReader( docs_dir, recursive=True, encoding="utf-8", - required_exts=[".txt", ".md", ".docx"], + required_exts=code_extensions, ).load_data(show_progress=True) documents.extend(other_docs) all_texts = [] + + # Define code file extensions for intelligent chunking + code_file_exts = { + ".py", + ".js", + ".ts", + ".jsx", + ".tsx", + ".java", + ".cpp", + ".c", + ".h", + ".hpp", + ".cs", + ".go", + ".rs", + ".rb", + ".php", + ".swift", + ".kt", + ".scala", + ".r", + ".sql", + ".sh", + ".bash", + ".zsh", + ".fish", + ".ps1", + ".bat", + ".json", + ".yaml", + ".yml", + ".xml", + ".toml", + ".ini", + ".cfg", + ".conf", + ".html", + ".css", + ".scss", + ".less", + ".vue", + ".svelte", + ".ipynb", + ".R", + ".jl", + } + for doc in documents: - nodes = self.node_parser.get_nodes_from_documents([doc]) + # Check if this is a code file based on source path + source_path = doc.metadata.get("source", "") + is_code_file = any(source_path.endswith(ext) for ext in code_file_exts) + + # Use appropriate parser based on file type + parser = self.code_parser if is_code_file else self.node_parser + nodes = parser.get_nodes_from_documents([doc]) + for node in nodes: all_texts.append(node.get_content()) @@ -219,6 +411,8 @@ Examples: index_dir = self.indexes_dir / index_name index_path = self.get_index_path(index_name) + print(f"šŸ“‚ Indexing: {Path(docs_dir).resolve()}") + if index_dir.exists() and not args.force: print(f"Index '{index_name}' already exists. Use --force to rebuild.") return @@ -248,6 +442,9 @@ Examples: builder.build_index(index_path) print(f"Index built at {index_path}") + # Register this project directory in global registry + self.register_project_dir() + async def search_documents(self, args): index_name = args.index_name query = args.query diff --git a/packages/leann-core/src/leann/mcp.py b/packages/leann-core/src/leann/mcp.py new file mode 100755 index 0000000..6de6750 --- /dev/null +++ b/packages/leann-core/src/leann/mcp.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 + +import json +import os +import subprocess +import sys + + +def handle_request(request): + if request.get("method") == "initialize": + return { + "jsonrpc": "2.0", + "id": request.get("id"), + "result": { + "capabilities": {"tools": {}}, + "protocolVersion": "2024-11-05", + "serverInfo": {"name": "leann-mcp", "version": "1.0.0"}, + }, + } + + elif request.get("method") == "tools/list": + return { + "jsonrpc": "2.0", + "id": request.get("id"), + "result": { + "tools": [ + { + "name": "leann_search", + "description": "Search LEANN index", + "inputSchema": { + "type": "object", + "properties": { + "index_name": {"type": "string"}, + "query": {"type": "string"}, + "top_k": {"type": "integer", "default": 5}, + }, + "required": ["index_name", "query"], + }, + }, + { + "name": "leann_ask", + "description": "Ask question using LEANN RAG", + "inputSchema": { + "type": "object", + "properties": { + "index_name": {"type": "string"}, + "question": {"type": "string"}, + }, + "required": ["index_name", "question"], + }, + }, + { + "name": "leann_list", + "description": "List all LEANN indexes", + "inputSchema": {"type": "object", "properties": {}}, + }, + ] + }, + } + + elif request.get("method") == "tools/call": + tool_name = request["params"]["name"] + args = request["params"].get("arguments", {}) + + # Set working directory and environment + env = os.environ.copy() + cwd = "/Users/andyl/Projects/LEANN-RAG" + + try: + if tool_name == "leann_search": + cmd = [ + "leann", + "search", + args["index_name"], + args["query"], + "--recompute-embeddings", + f"--top-k={args.get('top_k', 5)}", + ] + result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, env=env) + + elif tool_name == "leann_ask": + cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b' + result = subprocess.run( + cmd, shell=True, capture_output=True, text=True, cwd=cwd, env=env + ) + + elif tool_name == "leann_list": + result = subprocess.run( + ["leann", "list"], capture_output=True, text=True, cwd=cwd, env=env + ) + + return { + "jsonrpc": "2.0", + "id": request.get("id"), + "result": { + "content": [ + { + "type": "text", + "text": result.stdout + if result.returncode == 0 + else f"Error: {result.stderr}", + } + ] + }, + } + + except Exception as e: + return { + "jsonrpc": "2.0", + "id": request.get("id"), + "error": {"code": -1, "message": str(e)}, + } + + +def main(): + for line in sys.stdin: + try: + request = json.loads(line.strip()) + response = handle_request(request) + if response: + print(json.dumps(response)) + sys.stdout.flush() + except Exception as e: + error_response = { + "jsonrpc": "2.0", + "id": None, + "error": {"code": -1, "message": str(e)}, + } + print(json.dumps(error_response)) + sys.stdout.flush() + + +if __name__ == "__main__": + main() diff --git a/packages/leann-mcp/README.md b/packages/leann-mcp/README.md index c1d7929..bcda6a0 100644 --- a/packages/leann-mcp/README.md +++ b/packages/leann-mcp/README.md @@ -1,204 +1,69 @@ -# LEANN MCP Server +# LEANN Claude Code Integration -**Transform Claude Code into a RAG-Powered Development Assistant** +Intelligent code assistance using LEANN's vector search directly in Claude Code. -This package provides a Model Context Protocol (MCP) server that integrates LEANN's vector search and RAG capabilities directly into Claude Code, enabling intelligent code analysis, documentation Q&A, and knowledge-driven development. +## Prerequisites -## šŸš€ Quick Start - -### 1. Install +First, install LEANN CLI globally: ```bash -# Install dependencies -pip install leann mcp - -# Clone or download this package -git clone https://github.com/yichuan-w/LEANN.git -cd LEANN-RAG/packages/leann-mcp +uv tool install leann ``` -### 2. Configure Claude Code +This makes the `leann` command available system-wide, which `leann_mcp` requires. -Add to your `~/.claude/mcp.json`: +## Quick Setup -```json -{ - "mcpServers": { - "leann-rag": { - "command": "python", - "args": ["/absolute/path/to/leann_mcp_server.py"] - } - } -} -``` - -### 3. Start Using +Add the LEANN MCP server to Claude Code: ```bash +claude mcp add leann-server -- leann_mcp +``` + +## Available Tools + +- **`leann_list`** - List available indexes across all projects +- **`leann_search`** - Search code and documents with semantic queries +- **`leann_ask`** - Ask questions and get AI-powered answers from your codebase + +## Quick Start + +```bash +# Build an index for your project +leann build my-project + # Start Claude Code claude - -# In Claude, use LEANN tools: -# "Build an index from my codebase and help me understand the architecture" ``` -## šŸ› ļø Available Tools - -### `leann_build` -Build a vector index from documents or code -```python -leann_build( - index_name="my-project", - data_path="./src", - backend="hnsw", # or "diskann" - embedding_model="facebook/contriever" -) +Then in Claude Code: +``` +Help me understand this codebase. List available indexes and search for authentication patterns. ``` -### `leann_search` -Search through an index for relevant passages -```python -leann_search( - query="authentication middleware", - index_name="my-project", - top_k=10, - complexity=64 -) +

+ LEANN in Claude Code +

+ + +## How It Works + +- **`leann`** - Core CLI tool for indexing and searching (installed globally) +- **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration +- Claude Code calls `leann_mcp`, which executes `leann` commands and returns results + +## File Support + +Python, JavaScript, TypeScript, Java, Go, Rust, SQL, YAML, JSON, and 30+ more file types. + +## Storage + +- Project indexes in `.leann/` directory (like `.git`) +- Global project registry at `~/.leann/projects.json` +- Multi-project support built-in + +## Removing + +```bash +claude mcp remove leann-server ``` - -### `leann_ask` -Ask questions using RAG with LLM responses -```python -leann_ask( - question="How does user authentication work?", - index_name="my-project", - llm_config={"type": "ollama", "model": "qwen3:7b"} -) -``` - -### `leann_list_indexes` -List all available indexes - -### `leann_delete_index` -Delete an index (with confirmation) - -## šŸ’” Use Cases - -### šŸ“š **Code Understanding** -``` -"Build an index from my codebase and explain the authentication flow" -``` - -### šŸ” **Smart Code Search** -``` -"Search for error handling patterns in our API endpoints" -``` - -### šŸ“– **Documentation Q&A** -``` -"Create an index from our docs and answer: What are the deployment requirements?" -``` - -### šŸ—ļø **Architecture Analysis** -``` -"Analyze our system architecture and suggest improvements" -``` - -### šŸ”§ **Development Assistance** -``` -"Based on existing code patterns, help me implement user permissions" -``` - -## šŸŽÆ Key Features - -- **šŸ”Œ Zero-Config Integration**: Works out of the box with Claude Code -- **🧠 Smart Indexing**: Automatically handles multiple file formats -- **⚔ High Performance**: LEANN's 97% storage savings + fast search -- **šŸ”„ Real-Time**: Build and query indexes during development -- **šŸŽØ Flexible**: Support for multiple backends and embedding models -- **šŸ’¬ Conversational**: Natural language interface for complex queries - -## šŸ“ Project Structure - -``` -packages/leann-mcp/ -ā”œā”€ā”€ leann_mcp_server.py # Main MCP server implementation -ā”œā”€ā”€ requirements.txt # Python dependencies -ā”œā”€ā”€ package.json # NPM package metadata -ā”œā”€ā”€ claude-config-examples/ # Configuration examples -│ ā”œā”€ā”€ claude-mcp-config.json # Basic Claude configuration -│ └── usage-examples.md # Detailed usage examples -└── README.md # This file -``` - -## šŸ”§ Advanced Configuration - -### Custom Index Directory -```python -# In your environment or server config -DEFAULT_CONFIG = { - "indexes_dir": "/custom/path/to/indexes", - "embedding_model": "BAAI/bge-base-en-v1.5", - "backend": "diskann" -} -``` - -### Hook Integration -Automatically reindex when files change: - -```json -{ - "hooks": { - "PostToolUse": [ - { - "matcher": "Write.*\\.(py|js|ts)$", - "hooks": [{"type": "mcp_call", "server": "leann-rag", "tool": "leann_build"}] - } - ] - } -} -``` - -### Sub-Agent Templates -Create specialized RAG agents in `.claude/agents/`: - -```markdown ---- -name: code-analyst -description: Code analysis using LEANN RAG -tools: leann_build, leann_search, leann_ask ---- - -You are a senior code analyst with access to LEANN RAG. -When analyzing code, always: -1. Build indexes of relevant code sections -2. Search for patterns and anti-patterns -3. Provide evidence-based recommendations -``` - -## šŸš€ Performance & Scaling - -- **Small Projects** (<1K files): Use HNSW backend -- **Large Codebases** (>10K files): Use DiskANN backend -- **Memory Usage**: ~100MB per index (vs ~10GB traditional) -- **Build Time**: 2-5 minutes for typical project -- **Search Time**: <100ms for most queries - -## šŸ¤ Contributing - -This MCP server is part of the larger LEANN project. See the main README for contribution guidelines. - -## šŸ“„ License - -MIT License - see the main LEANN project for details. - -## šŸ”— Links - -- [LEANN Main Project](../../README.md) -- [Claude Code Documentation](https://docs.anthropic.com/claude/docs/claude-code) -- [MCP Specification](https://modelcontextprotocol.io/) -- [Usage Examples](claude-config-examples/usage-examples.md) - ---- - -**Built with ā¤ļø by the LEANN team for the Claude Code community**