From 8b9c2be8c921dd966dc23d55a055a951f27672fe Mon Sep 17 00:00:00 2001
From: Andy Lee <andylizf@outlook.com>
Date: Sat, 9 Aug 2025 20:37:17 -0700
Subject: [PATCH] Feat/claude code refine (#24)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: Add Ollama embedding support for local embedding models

* docs: Add clear documentation for Ollama embedding usage

* fix: remove leann_ask

* docs: remove ollama embedding extra instructions

* simplify MCP interface for Claude Code

- Remove unnecessary search parameters: search_mode, recompute_embeddings, file_types, min_score
- Remove leann_clear tool (not needed for Claude Code workflow)
- Streamline search to only use: query, index_name, top_k, complexity
- Keep core tools: leann_index, leann_search, leann_status, leann_list

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* remove leann_index from MCP interface

Users should use CLI command 'leann build' to create indexes first.
MCP now only provides search functionality:
- leann_search: search existing indexes
- leann_status: check index health
- leann_list: list available indexes

This separates index creation (CLI) from search (Claude Code).

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* improve CLI with auto project name and .gitignore support

- Make index_name optional, auto-use current directory name
- Read .gitignore patterns and respect them during indexing
- Add _read_gitignore_patterns() to parse .gitignore files
- Add _should_exclude_file() for pattern matching
- Apply exclusion patterns to both PDF and general file processing
- Show helpful messages about gitignore usage

Now users can simply run: leann build
And it will use project name + respect .gitignore patterns.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>
---
 README.md                            |   1 +
 packages/leann-core/src/leann/cli.py | 129 ++++++++++++++++++++++-----
 packages/leann-core/src/leann/mcp.py |  79 +++++++++++++---
 3 files changed, 171 insertions(+), 38 deletions(-)
diff --git a/README.md b/README.md
index 40c07ec..748b252 100755
--- a/README.md
+++ b/README.md
@@ -97,6 +97,7 @@ uv sync
 
 </details>
 
+
 ## Quick Start
 
 Our declarative API makes RAG as easy as writing a config file.
diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py
index f307204..5171afa 100644
--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -86,7 +86,9 @@ Examples:
 
         # Build command
         build_parser = subparsers.add_parser("build", help="Build document index")
-        build_parser.add_argument("index_name", help="Index name")
+        build_parser.add_argument(
+            "index_name", nargs="?", help="Index name (default: current directory name)"
+        )
         build_parser.add_argument(
             "--docs", type=str, default=".", help="Documents directory (default: current directory)"
         )
@@ -201,6 +203,63 @@ Examples:
         with open(global_registry, "w") as f:
             json.dump(projects, f, indent=2)
 
+    def _read_gitignore_patterns(self, docs_dir: str) -> list[str]:
+        """Read .gitignore file and return patterns for exclusion."""
+        gitignore_path = Path(docs_dir) / ".gitignore"
+        patterns = []
+
+        # Add some essential patterns that should always be excluded
+        essential_patterns = [
+            ".git",
+            ".DS_Store",
+        ]
+        patterns.extend(essential_patterns)
+
+        if gitignore_path.exists():
+            try:
+                with open(gitignore_path, encoding="utf-8") as f:
+                    for line in f:
+                        line = line.strip()
+                        # Skip empty lines and comments
+                        if line and not line.startswith("#"):
+                            # Remove leading slash if present (make it relative)
+                            if line.startswith("/"):
+                                line = line[1:]
+                            patterns.append(line)
+                print(
+                    f"📋 Loaded {len(patterns) - len(essential_patterns)} patterns from .gitignore"
+                )
+            except Exception as e:
+                print(f"Warning: Could not read .gitignore: {e}")
+        else:
+            print("📋 No .gitignore found, using minimal exclusion patterns")
+
+        return patterns
+
+    def _should_exclude_file(self, relative_path: Path, exclude_patterns: list[str]) -> bool:
+        """Check if a file should be excluded based on gitignore-style patterns."""
+        path_str = str(relative_path)
+
+        for pattern in exclude_patterns:
+            # Simple pattern matching (could be enhanced with full gitignore syntax)
+            if pattern.endswith("*"):
+                # Wildcard pattern
+                prefix = pattern[:-1]
+                if path_str.startswith(prefix):
+                    return True
+            elif "*" in pattern:
+                # Contains wildcard - simple glob-like matching
+                import fnmatch
+
+                if fnmatch.fnmatch(path_str, pattern):
+                    return True
+            else:
+                # Exact match or directory match
+                if path_str == pattern or path_str.startswith(pattern + "/"):
+                    return True
+
+        return False
+
     def list_indexes(self):
         print("Stored LEANN indexes:")
 
@@ -282,34 +341,49 @@ Examples:
         if custom_file_types:
             print(f"Using custom file types: {custom_file_types}")
 
-        # Try to use better PDF parsers first
+        # Read .gitignore patterns first
+        exclude_patterns = self._read_gitignore_patterns(docs_dir)
+
+        # Try to use better PDF parsers first, but only if PDFs are requested
         documents = []
         docs_path = Path(docs_dir)
 
-        for file_path in docs_path.rglob("*.pdf"):
-            print(f"Processing PDF: {file_path}")
+        # Check if we should process PDFs
+        should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types
 
-            # Try PyMuPDF first (best quality)
-            text = extract_pdf_text_with_pymupdf(str(file_path))
-            if text is None:
-                # Try pdfplumber
-                text = extract_pdf_text_with_pdfplumber(str(file_path))
+        if should_process_pdfs:
+            for file_path in docs_path.rglob("*.pdf"):
+                # Check if file matches any exclude pattern
+                relative_path = file_path.relative_to(docs_path)
+                if self._should_exclude_file(relative_path, exclude_patterns):
+                    continue
 
-            if text:
-                # Create a simple document structure
-                from llama_index.core import Document
+                print(f"Processing PDF: {file_path}")
 
-                doc = Document(text=text, metadata={"source": str(file_path)})
-                documents.append(doc)
-            else:
-                # Fallback to default reader
-                print(f"Using default reader for {file_path}")
-                default_docs = SimpleDirectoryReader(
-                    str(file_path.parent),
-                    filename_as_id=True,
-                    required_exts=[file_path.suffix],
-                ).load_data()
-                documents.extend(default_docs)
+                # Try PyMuPDF first (best quality)
+                text = extract_pdf_text_with_pymupdf(str(file_path))
+                if text is None:
+                    # Try pdfplumber
+                    text = extract_pdf_text_with_pdfplumber(str(file_path))
+
+                if text:
+                    # Create a simple document structure
+                    from llama_index.core import Document
+
+                    doc = Document(text=text, metadata={"source": str(file_path)})
+                    documents.append(doc)
+                else:
+                    # Fallback to default reader
+                    print(f"Using default reader for {file_path}")
+                    try:
+                        default_docs = SimpleDirectoryReader(
+                            str(file_path.parent),
+                            filename_as_id=True,
+                            required_exts=[file_path.suffix],
+                        ).load_data()
+                        documents.extend(default_docs)
+                    except Exception as e:
+                        print(f"Warning: Could not process {file_path}: {e}")
 
         # Load other file types with default reader
         if custom_file_types:
@@ -380,6 +454,7 @@ Examples:
                 recursive=True,
                 encoding="utf-8",
                 required_exts=code_extensions,
+                exclude=exclude_patterns,
             ).load_data(show_progress=True)
             documents.extend(other_docs)
         except ValueError as e:
@@ -454,7 +529,13 @@ Examples:
 
     async def build_index(self, args):
         docs_dir = args.docs
-        index_name = args.index_name
+        # Use current directory name if index_name not provided
+        if args.index_name:
+            index_name = args.index_name
+        else:
+            index_name = Path.cwd().name
+            print(f"Using current directory name as index: '{index_name}'")
+
         index_dir = self.indexes_dir / index_name
         index_path = self.get_index_path(index_name)
 
diff --git a/packages/leann-core/src/leann/mcp.py b/packages/leann-core/src/leann/mcp.py
index f5a2cae..92f79e3 100755
--- a/packages/leann-core/src/leann/mcp.py
+++ b/packages/leann-core/src/leann/mcp.py
@@ -25,32 +25,61 @@ def handle_request(request):
                 "tools": [
                     {
                         "name": "leann_search",
-                        "description": "Search LEANN index",
+                        "description": """🔍 Search code using natural language - like having a coding assistant who knows your entire codebase!
+
+🎯 **Perfect for**:
+- "How does authentication work?" → finds auth-related code
+- "Error handling patterns" → locates try-catch blocks and error logic
+- "Database connection setup" → finds DB initialization code
+- "API endpoint definitions" → locates route handlers
+- "Configuration management" → finds config files and usage
+
+💡 **Pro tip**: Use this before making any changes to understand existing patterns and conventions.""",
                         "inputSchema": {
                             "type": "object",
                             "properties": {
-                                "index_name": {"type": "string"},
-                                "query": {"type": "string"},
-                                "top_k": {"type": "integer", "default": 5},
+                                "index_name": {
+                                    "type": "string",
+                                    "description": "Name of the LEANN index to search. Use 'leann_list' first to see available indexes.",
+                                },
+                                "query": {
+                                    "type": "string",
+                                    "description": "Search query - can be natural language (e.g., 'how to handle errors') or technical terms (e.g., 'async function definition')",
+                                },
+                                "top_k": {
+                                    "type": "integer",
+                                    "default": 5,
+                                    "minimum": 1,
+                                    "maximum": 20,
+                                    "description": "Number of search results to return. Use 5-10 for focused results, 15-20 for comprehensive exploration.",
+                                },
+                                "complexity": {
+                                    "type": "integer",
+                                    "default": 32,
+                                    "minimum": 16,
+                                    "maximum": 128,
+                                    "description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
+                                },
                             },
                             "required": ["index_name", "query"],
                         },
                     },
                     {
-                        "name": "leann_ask",
-                        "description": "Ask question using LEANN RAG",
+                        "name": "leann_status",
+                        "description": "📊 Check the health and stats of your code indexes - like a medical checkup for your codebase knowledge!",
                         "inputSchema": {
                             "type": "object",
                             "properties": {
-                                "index_name": {"type": "string"},
-                                "question": {"type": "string"},
+                                "index_name": {
+                                    "type": "string",
+                                    "description": "Optional: Name of specific index to check. If not provided, shows status of all indexes.",
+                                }
                             },
-                            "required": ["index_name", "question"],
                         },
                     },
                     {
                         "name": "leann_list",
-                        "description": "List all LEANN indexes",
+                        "description": "📋 Show all your indexed codebases - your personal code library! Use this to see what's available for search.",
                         "inputSchema": {"type": "object", "properties": {}},
                     },
                 ]
@@ -63,19 +92,41 @@ def handle_request(request):
 
         try:
             if tool_name == "leann_search":
+                # Validate required parameters
+                if not args.get("index_name") or not args.get("query"):
+                    return {
+                        "jsonrpc": "2.0",
+                        "id": request.get("id"),
+                        "result": {
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": "Error: Both index_name and query are required",
+                                }
+                            ]
+                        },
+                    }
+
+                # Build simplified command
                 cmd = [
                     "leann",
                     "search",
                     args["index_name"],
                     args["query"],
-                    "--recompute-embeddings",
                     f"--top-k={args.get('top_k', 5)}",
+                    f"--complexity={args.get('complexity', 32)}",
                 ]
+
                 result = subprocess.run(cmd, capture_output=True, text=True)
 
-            elif tool_name == "leann_ask":
-                cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b'
-                result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+            elif tool_name == "leann_status":
+                if args.get("index_name"):
+                    # Check specific index status - for now, we'll use leann list and filter
+                    result = subprocess.run(["leann", "list"], capture_output=True, text=True)
+                    # We could enhance this to show more detailed status per index
+                else:
+                    # Show all indexes status
+                    result = subprocess.run(["leann", "list"], capture_output=True, text=True)
 
             elif tool_name == "leann_list":
                 result = subprocess.run(["leann", "list"], capture_output=True, text=True)