Merge pull request #19 from yichuan-w/feature/claude-code-research
Feature/claude code research
This commit is contained in:
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: af2a26481e...67a2611ad1
@@ -44,6 +44,7 @@ colab = [
|
||||
|
||||
[project.scripts]
|
||||
leann = "leann.cli:main"
|
||||
leann_mcp = "leann.mcp:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
|
||||
@@ -41,13 +41,23 @@ def extract_pdf_text_with_pdfplumber(file_path: str) -> str:
|
||||
|
||||
class LeannCLI:
|
||||
def __init__(self):
|
||||
self.indexes_dir = Path.home() / ".leann" / "indexes"
|
||||
# Always use project-local .leann directory (like .git)
|
||||
self.indexes_dir = Path.cwd() / ".leann" / "indexes"
|
||||
self.indexes_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Default parser for documents
|
||||
self.node_parser = SentenceSplitter(
|
||||
chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
|
||||
)
|
||||
|
||||
# Code-optimized parser
|
||||
self.code_parser = SentenceSplitter(
|
||||
chunk_size=512, # Larger chunks for code context
|
||||
chunk_overlap=50, # Less overlap to preserve function boundaries
|
||||
separator="\n", # Split by lines for code
|
||||
paragraph_separator="\n\n", # Preserve logical code blocks
|
||||
)
|
||||
|
||||
def get_index_path(self, index_name: str) -> str:
|
||||
index_dir = self.indexes_dir / index_name
|
||||
return str(index_dir / "documents.leann")
|
||||
@@ -76,7 +86,9 @@ Examples:
|
||||
# Build command
|
||||
build_parser = subparsers.add_parser("build", help="Build document index")
|
||||
build_parser.add_argument("index_name", help="Index name")
|
||||
build_parser.add_argument("--docs", type=str, required=True, help="Documents directory")
|
||||
build_parser.add_argument(
|
||||
"--docs", type=str, default=".", help="Documents directory (default: current directory)"
|
||||
)
|
||||
build_parser.add_argument(
|
||||
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
|
||||
)
|
||||
@@ -138,37 +150,109 @@ Examples:
|
||||
|
||||
return parser
|
||||
|
||||
def register_project_dir(self):
|
||||
"""Register current project directory in global registry"""
|
||||
global_registry = Path.home() / ".leann" / "projects.json"
|
||||
global_registry.parent.mkdir(exist_ok=True)
|
||||
|
||||
current_dir = str(Path.cwd())
|
||||
|
||||
# Load existing registry
|
||||
projects = []
|
||||
if global_registry.exists():
|
||||
try:
|
||||
import json
|
||||
|
||||
with open(global_registry) as f:
|
||||
projects = json.load(f)
|
||||
except Exception:
|
||||
projects = []
|
||||
|
||||
# Add current directory if not already present
|
||||
if current_dir not in projects:
|
||||
projects.append(current_dir)
|
||||
|
||||
# Save registry
|
||||
import json
|
||||
|
||||
with open(global_registry, "w") as f:
|
||||
json.dump(projects, f, indent=2)
|
||||
|
||||
def list_indexes(self):
|
||||
print("Stored LEANN indexes:")
|
||||
|
||||
if not self.indexes_dir.exists():
|
||||
# Get all project directories with .leann
|
||||
global_registry = Path.home() / ".leann" / "projects.json"
|
||||
all_projects = []
|
||||
|
||||
if global_registry.exists():
|
||||
try:
|
||||
import json
|
||||
|
||||
with open(global_registry) as f:
|
||||
all_projects = json.load(f)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Filter to only existing directories with .leann
|
||||
valid_projects = []
|
||||
for project_dir in all_projects:
|
||||
project_path = Path(project_dir)
|
||||
if project_path.exists() and (project_path / ".leann" / "indexes").exists():
|
||||
valid_projects.append(project_path)
|
||||
|
||||
# Add current project if it has .leann but not in registry
|
||||
current_path = Path.cwd()
|
||||
if (current_path / ".leann" / "indexes").exists() and current_path not in valid_projects:
|
||||
valid_projects.append(current_path)
|
||||
|
||||
if not valid_projects:
|
||||
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
||||
return
|
||||
|
||||
index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]
|
||||
total_indexes = 0
|
||||
current_dir = Path.cwd()
|
||||
|
||||
if not index_dirs:
|
||||
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
||||
return
|
||||
for project_path in valid_projects:
|
||||
indexes_dir = project_path / ".leann" / "indexes"
|
||||
if not indexes_dir.exists():
|
||||
continue
|
||||
|
||||
print(f"Found {len(index_dirs)} indexes:")
|
||||
for i, index_dir in enumerate(index_dirs, 1):
|
||||
index_name = index_dir.name
|
||||
status = "✓" if self.index_exists(index_name) else "✗"
|
||||
index_dirs = [d for d in indexes_dir.iterdir() if d.is_dir()]
|
||||
if not index_dirs:
|
||||
continue
|
||||
|
||||
print(f" {i}. {index_name} [{status}]")
|
||||
if self.index_exists(index_name):
|
||||
index_dir / "documents.leann.meta.json"
|
||||
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
|
||||
1024 * 1024
|
||||
)
|
||||
print(f" Size: {size_mb:.1f} MB")
|
||||
# Show project header
|
||||
if project_path == current_dir:
|
||||
print(f"\n📁 Current project ({project_path}):")
|
||||
else:
|
||||
print(f"\n📂 {project_path}:")
|
||||
|
||||
if index_dirs:
|
||||
example_name = index_dirs[0].name
|
||||
print("\nUsage:")
|
||||
print(f' leann search {example_name} "your query"')
|
||||
print(f" leann ask {example_name} --interactive")
|
||||
for index_dir in index_dirs:
|
||||
total_indexes += 1
|
||||
index_name = index_dir.name
|
||||
meta_file = index_dir / "documents.leann.meta.json"
|
||||
status = "✓" if meta_file.exists() else "✗"
|
||||
|
||||
print(f" {total_indexes}. {index_name} [{status}]")
|
||||
if status == "✓":
|
||||
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
|
||||
1024 * 1024
|
||||
)
|
||||
print(f" Size: {size_mb:.1f} MB")
|
||||
|
||||
if total_indexes > 0:
|
||||
print(f"\nTotal: {total_indexes} indexes across {len(valid_projects)} projects")
|
||||
print("\nUsage (current project only):")
|
||||
|
||||
# Show example from current project
|
||||
current_indexes_dir = current_dir / ".leann" / "indexes"
|
||||
if current_indexes_dir.exists():
|
||||
current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()]
|
||||
if current_index_dirs:
|
||||
example_name = current_index_dirs[0].name
|
||||
print(f' leann search {example_name} "your query"')
|
||||
print(f" leann ask {example_name} --interactive")
|
||||
|
||||
def load_documents(self, docs_dir: str):
|
||||
print(f"Loading documents from {docs_dir}...")
|
||||
@@ -203,17 +287,125 @@ Examples:
|
||||
documents.extend(default_docs)
|
||||
|
||||
# Load other file types with default reader
|
||||
code_extensions = [
|
||||
# Original document types
|
||||
".txt",
|
||||
".md",
|
||||
".docx",
|
||||
# Code files for Claude Code integration
|
||||
".py",
|
||||
".js",
|
||||
".ts",
|
||||
".jsx",
|
||||
".tsx",
|
||||
".java",
|
||||
".cpp",
|
||||
".c",
|
||||
".h",
|
||||
".hpp",
|
||||
".cs",
|
||||
".go",
|
||||
".rs",
|
||||
".rb",
|
||||
".php",
|
||||
".swift",
|
||||
".kt",
|
||||
".scala",
|
||||
".r",
|
||||
".sql",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
".fish",
|
||||
".ps1",
|
||||
".bat",
|
||||
# Config and markup files
|
||||
".json",
|
||||
".yaml",
|
||||
".yml",
|
||||
".xml",
|
||||
".toml",
|
||||
".ini",
|
||||
".cfg",
|
||||
".conf",
|
||||
".html",
|
||||
".css",
|
||||
".scss",
|
||||
".less",
|
||||
".vue",
|
||||
".svelte",
|
||||
# Data science
|
||||
".ipynb",
|
||||
".R",
|
||||
".py",
|
||||
".jl",
|
||||
]
|
||||
other_docs = SimpleDirectoryReader(
|
||||
docs_dir,
|
||||
recursive=True,
|
||||
encoding="utf-8",
|
||||
required_exts=[".txt", ".md", ".docx"],
|
||||
required_exts=code_extensions,
|
||||
).load_data(show_progress=True)
|
||||
documents.extend(other_docs)
|
||||
|
||||
all_texts = []
|
||||
|
||||
# Define code file extensions for intelligent chunking
|
||||
code_file_exts = {
|
||||
".py",
|
||||
".js",
|
||||
".ts",
|
||||
".jsx",
|
||||
".tsx",
|
||||
".java",
|
||||
".cpp",
|
||||
".c",
|
||||
".h",
|
||||
".hpp",
|
||||
".cs",
|
||||
".go",
|
||||
".rs",
|
||||
".rb",
|
||||
".php",
|
||||
".swift",
|
||||
".kt",
|
||||
".scala",
|
||||
".r",
|
||||
".sql",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
".fish",
|
||||
".ps1",
|
||||
".bat",
|
||||
".json",
|
||||
".yaml",
|
||||
".yml",
|
||||
".xml",
|
||||
".toml",
|
||||
".ini",
|
||||
".cfg",
|
||||
".conf",
|
||||
".html",
|
||||
".css",
|
||||
".scss",
|
||||
".less",
|
||||
".vue",
|
||||
".svelte",
|
||||
".ipynb",
|
||||
".R",
|
||||
".jl",
|
||||
}
|
||||
|
||||
for doc in documents:
|
||||
nodes = self.node_parser.get_nodes_from_documents([doc])
|
||||
# Check if this is a code file based on source path
|
||||
source_path = doc.metadata.get("source", "")
|
||||
is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
|
||||
|
||||
# Use appropriate parser based on file type
|
||||
parser = self.code_parser if is_code_file else self.node_parser
|
||||
nodes = parser.get_nodes_from_documents([doc])
|
||||
|
||||
for node in nodes:
|
||||
all_texts.append(node.get_content())
|
||||
|
||||
@@ -226,6 +418,8 @@ Examples:
|
||||
index_dir = self.indexes_dir / index_name
|
||||
index_path = self.get_index_path(index_name)
|
||||
|
||||
print(f"📂 Indexing: {Path(docs_dir).resolve()}")
|
||||
|
||||
if index_dir.exists() and not args.force:
|
||||
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
||||
return
|
||||
@@ -255,6 +449,9 @@ Examples:
|
||||
builder.build_index(index_path)
|
||||
print(f"Index built at {index_path}")
|
||||
|
||||
# Register this project directory in global registry
|
||||
self.register_project_dir()
|
||||
|
||||
async def search_documents(self, args):
|
||||
index_name = args.index_name
|
||||
query = args.query
|
||||
|
||||
134
packages/leann-core/src/leann/mcp.py
Executable file
134
packages/leann-core/src/leann/mcp.py
Executable file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def handle_request(request):
|
||||
if request.get("method") == "initialize":
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": request.get("id"),
|
||||
"result": {
|
||||
"capabilities": {"tools": {}},
|
||||
"protocolVersion": "2024-11-05",
|
||||
"serverInfo": {"name": "leann-mcp", "version": "1.0.0"},
|
||||
},
|
||||
}
|
||||
|
||||
elif request.get("method") == "tools/list":
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": request.get("id"),
|
||||
"result": {
|
||||
"tools": [
|
||||
{
|
||||
"name": "leann_search",
|
||||
"description": "Search LEANN index",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index_name": {"type": "string"},
|
||||
"query": {"type": "string"},
|
||||
"top_k": {"type": "integer", "default": 5},
|
||||
},
|
||||
"required": ["index_name", "query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "leann_ask",
|
||||
"description": "Ask question using LEANN RAG",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index_name": {"type": "string"},
|
||||
"question": {"type": "string"},
|
||||
},
|
||||
"required": ["index_name", "question"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "leann_list",
|
||||
"description": "List all LEANN indexes",
|
||||
"inputSchema": {"type": "object", "properties": {}},
|
||||
},
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
elif request.get("method") == "tools/call":
|
||||
tool_name = request["params"]["name"]
|
||||
args = request["params"].get("arguments", {})
|
||||
|
||||
# Set working directory and environment
|
||||
env = os.environ.copy()
|
||||
cwd = "/Users/andyl/Projects/LEANN-RAG"
|
||||
|
||||
try:
|
||||
if tool_name == "leann_search":
|
||||
cmd = [
|
||||
"leann",
|
||||
"search",
|
||||
args["index_name"],
|
||||
args["query"],
|
||||
"--recompute-embeddings",
|
||||
f"--top-k={args.get('top_k', 5)}",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, env=env)
|
||||
|
||||
elif tool_name == "leann_ask":
|
||||
cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b'
|
||||
result = subprocess.run(
|
||||
cmd, shell=True, capture_output=True, text=True, cwd=cwd, env=env
|
||||
)
|
||||
|
||||
elif tool_name == "leann_list":
|
||||
result = subprocess.run(
|
||||
["leann", "list"], capture_output=True, text=True, cwd=cwd, env=env
|
||||
)
|
||||
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": request.get("id"),
|
||||
"result": {
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": result.stdout
|
||||
if result.returncode == 0
|
||||
else f"Error: {result.stderr}",
|
||||
}
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": request.get("id"),
|
||||
"error": {"code": -1, "message": str(e)},
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
for line in sys.stdin:
|
||||
try:
|
||||
request = json.loads(line.strip())
|
||||
response = handle_request(request)
|
||||
if response:
|
||||
print(json.dumps(response))
|
||||
sys.stdout.flush()
|
||||
except Exception as e:
|
||||
error_response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": None,
|
||||
"error": {"code": -1, "message": str(e)},
|
||||
}
|
||||
print(json.dumps(error_response))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
69
packages/leann-mcp/README.md
Normal file
69
packages/leann-mcp/README.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# LEANN Claude Code Integration
|
||||
|
||||
Intelligent code assistance using LEANN's vector search directly in Claude Code.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
First, install LEANN CLI globally:
|
||||
|
||||
```bash
|
||||
uv tool install leann
|
||||
```
|
||||
|
||||
This makes the `leann` command available system-wide, which `leann_mcp` requires.
|
||||
|
||||
## Quick Setup
|
||||
|
||||
Add the LEANN MCP server to Claude Code:
|
||||
|
||||
```bash
|
||||
claude mcp add leann-server -- leann_mcp
|
||||
```
|
||||
|
||||
## Available Tools
|
||||
|
||||
- **`leann_list`** - List available indexes across all projects
|
||||
- **`leann_search`** - Search code and documents with semantic queries
|
||||
- **`leann_ask`** - Ask questions and get AI-powered answers from your codebase
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Build an index for your project
|
||||
leann build my-project
|
||||
|
||||
# Start Claude Code
|
||||
claude
|
||||
```
|
||||
|
||||
Then in Claude Code:
|
||||
```
|
||||
Help me understand this codebase. List available indexes and search for authentication patterns.
|
||||
```
|
||||
|
||||
<p align="center">
|
||||
<img src="../../assets/claude_code_leann.png" alt="LEANN in Claude Code" width="80%">
|
||||
</p>
|
||||
|
||||
|
||||
## How It Works
|
||||
|
||||
- **`leann`** - Core CLI tool for indexing and searching (installed globally)
|
||||
- **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration
|
||||
- Claude Code calls `leann_mcp`, which executes `leann` commands and returns results
|
||||
|
||||
## File Support
|
||||
|
||||
Python, JavaScript, TypeScript, Java, Go, Rust, SQL, YAML, JSON, and 30+ more file types.
|
||||
|
||||
## Storage
|
||||
|
||||
- Project indexes in `.leann/` directory (like `.git`)
|
||||
- Global project registry at `~/.leann/projects.json`
|
||||
- Multi-project support built-in
|
||||
|
||||
## Removing
|
||||
|
||||
```bash
|
||||
claude mcp remove leann-server
|
||||
```
|
||||
Reference in New Issue
Block a user