diff --git a/README.md b/README.md
index 1eb5410..279a37a 100755
--- a/README.md
+++ b/README.md
@@ -484,6 +484,9 @@ leann ask my-docs --interactive
# List all your indexes
leann list
+
+# Remove an index
+leann remove my-docs
```
**Key CLI features:**
@@ -496,7 +499,7 @@ leann list
š Click to expand: Complete CLI Reference
-You can use `leann --help`, or `leann build --help`, `leann search --help`, `leann ask --help` to get the complete CLI reference.
+You can use `leann --help`, or `leann build --help`, `leann search --help`, `leann ask --help`, `leann list --help`, `leann remove --help` to get the complete CLI reference.
**Build Command:**
```bash
@@ -534,6 +537,31 @@ Options:
--top-k N Retrieval count (default: 20)
```
+**List Command:**
+```bash
+leann list
+
+# Lists all indexes across all projects with status indicators:
+# ā
- Index is complete and ready to use
+# ā - Index is incomplete or corrupted
+# š - CLI-created index (in .leann/indexes/)
+# š - App-created index (*.leann.meta.json files)
+```
+
+**Remove Command:**
+```bash
+leann remove INDEX_NAME [OPTIONS]
+
+Options:
+ --force, -f Force removal without confirmation
+
+# Smart removal: automatically finds and safely removes indexes
+# - Shows all matching indexes across projects
+# - Requires confirmation for cross-project removal
+# - Interactive selection when multiple matches found
+# - Supports both CLI and app-created indexes
+```
+
## šļø Architecture & How It Works
diff --git a/apps/base_rag_example.py b/apps/base_rag_example.py
index 9fbb7d2..8d539d3 100644
--- a/apps/base_rag_example.py
+++ b/apps/base_rag_example.py
@@ -10,6 +10,7 @@ from typing import Any
import dotenv
from leann.api import LeannBuilder, LeannChat
+from leann.registry import register_project_directory
from llama_index.core.node_parser import SentenceSplitter
dotenv.load_dotenv()
@@ -214,6 +215,11 @@ class BaseRAGExample(ABC):
builder.build_index(index_path)
print(f"Index saved to: {index_path}")
+ # Register project directory so leann list can discover this index
+ # The index is saved as args.index_dir/index_name.leann
+ # We want to register the current working directory where the app is run
+ register_project_directory(Path.cwd())
+
return index_path
async def run_interactive_chat(self, args, index_path: str):
diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py
index 96787fa..021f7d6 100644
--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -8,6 +8,7 @@ from llama_index.core.node_parser import SentenceSplitter
from tqdm import tqdm
from .api import LeannBuilder, LeannChat, LeannSearcher
+from .registry import register_project_directory
def extract_pdf_text_with_pymupdf(file_path: str) -> str:
@@ -84,6 +85,7 @@ Examples:
leann search my-docs "query" # Search in my-docs index
leann ask my-docs "question" # Ask my-docs index
leann list # List all stored indexes
+ leann remove my-docs # Remove an index (local first, then global)
""",
)
@@ -251,35 +253,18 @@ Examples:
# List command
subparsers.add_parser("list", help="List all indexes")
+ # Remove command
+ remove_parser = subparsers.add_parser("remove", help="Remove an index")
+ remove_parser.add_argument("index_name", help="Index name to remove")
+ remove_parser.add_argument(
+ "--force", "-f", action="store_true", help="Force removal without confirmation"
+ )
+
return parser
def register_project_dir(self):
"""Register current project directory in global registry"""
- global_registry = Path.home() / ".leann" / "projects.json"
- global_registry.parent.mkdir(exist_ok=True)
-
- current_dir = str(Path.cwd())
-
- # Load existing registry
- projects = []
- if global_registry.exists():
- try:
- import json
-
- with open(global_registry) as f:
- projects = json.load(f)
- except Exception:
- projects = []
-
- # Add current directory if not already present
- if current_dir not in projects:
- projects.append(current_dir)
-
- # Save registry
- import json
-
- with open(global_registry, "w") as f:
- json.dump(projects, f, indent=2)
+ register_project_directory()
def _build_gitignore_parser(self, docs_dir: str):
"""Build gitignore parser using gitignore-parser library."""
@@ -339,8 +324,6 @@ Examples:
return False
def list_indexes(self):
- print("Stored LEANN indexes:")
-
# Get all project directories with .leann
global_registry = Path.home() / ".leann" / "projects.json"
all_projects = []
@@ -366,55 +349,320 @@ Examples:
if (current_path / ".leann" / "indexes").exists() and current_path not in valid_projects:
valid_projects.append(current_path)
- if not valid_projects:
- print(
- "No indexes found. Use 'leann build --docs [ ...]' to create one."
- )
- return
-
- total_indexes = 0
- current_dir = Path.cwd()
+ # Separate current and other projects
+ other_projects = []
for project_path in valid_projects:
- indexes_dir = project_path / ".leann" / "indexes"
- if not indexes_dir.exists():
- continue
+ if project_path != current_path:
+ other_projects.append(project_path)
- index_dirs = [d for d in indexes_dir.iterdir() if d.is_dir()]
- if not index_dirs:
- continue
+ print("š LEANN Indexes")
+ print("=" * 50)
- # Show project header
- if project_path == current_dir:
- print(f"\nš Current project ({project_path}):")
- else:
- print(f"\nš {project_path}:")
+ total_indexes = 0
+ current_indexes_count = 0
- for index_dir in index_dirs:
+ # Show current project first (most important)
+ print("\nš Current Project")
+ print(f" {current_path}")
+ print(" " + "ā" * 45)
+
+ current_indexes = self._discover_indexes_in_project(current_path)
+ if current_indexes:
+ for idx in current_indexes:
total_indexes += 1
- index_name = index_dir.name
- meta_file = index_dir / "documents.leann.meta.json"
- status = "ā" if meta_file.exists() else "ā"
+ current_indexes_count += 1
+ type_icon = "š" if idx["type"] == "cli" else "š"
+ print(f" {current_indexes_count}. {type_icon} {idx['name']} {idx['status']}")
+ if idx["size_mb"] > 0:
+ print(f" š¦ Size: {idx['size_mb']:.1f} MB")
+ else:
+ print(" š No indexes in current project")
- print(f" {total_indexes}. {index_name} [{status}]")
- if status == "ā":
- size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
- 1024 * 1024
+ # Show other projects (reference information)
+ if other_projects:
+ print("\n\nšļø Other Projects")
+ print(" " + "ā" * 45)
+
+ for project_path in other_projects:
+ project_indexes = self._discover_indexes_in_project(project_path)
+ if not project_indexes:
+ continue
+
+ print(f"\n š {project_path.name}")
+ print(f" {project_path}")
+
+ for idx in project_indexes:
+ total_indexes += 1
+ type_icon = "š" if idx["type"] == "cli" else "š"
+ print(f" ⢠{type_icon} {idx['name']} {idx['status']}")
+ if idx["size_mb"] > 0:
+ print(f" š¦ {idx['size_mb']:.1f} MB")
+
+ # Summary and usage info
+ print("\n" + "=" * 50)
+ if total_indexes == 0:
+ print("š” Get started:")
+ print(" leann build my-docs --docs ./documents")
+ else:
+ projects_count = len(
+ [
+ p
+ for p in valid_projects
+ if (p / ".leann" / "indexes").exists()
+ and list((p / ".leann" / "indexes").iterdir())
+ ]
+ )
+ print(f"š Total: {total_indexes} indexes across {projects_count} projects")
+
+ if current_indexes_count > 0:
+ print("\nš« Quick start (current project):")
+ # Get first index from current project for example
+ current_indexes_dir = current_path / ".leann" / "indexes"
+ if current_indexes_dir.exists():
+ current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()]
+ if current_index_dirs:
+ example_name = current_index_dirs[0].name
+ print(f' leann search {example_name} "your query"')
+ print(f" leann ask {example_name} --interactive")
+ else:
+ print("\nš” Create your first index:")
+ print(" leann build my-docs --docs ./documents")
+
+ def _discover_indexes_in_project(self, project_path: Path):
+ """Discover all indexes in a project directory (both CLI and apps formats)"""
+ indexes = []
+
+ # 1. CLI format: .leann/indexes/index_name/
+ cli_indexes_dir = project_path / ".leann" / "indexes"
+ if cli_indexes_dir.exists():
+ for index_dir in cli_indexes_dir.iterdir():
+ if index_dir.is_dir():
+ meta_file = index_dir / "documents.leann.meta.json"
+ status = "ā
" if meta_file.exists() else "ā"
+
+ size_mb = 0
+ if meta_file.exists():
+ try:
+ size_mb = sum(
+ f.stat().st_size for f in index_dir.iterdir() if f.is_file()
+ ) / (1024 * 1024)
+ except (OSError, PermissionError):
+ pass
+
+ indexes.append(
+ {
+ "name": index_dir.name,
+ "type": "cli",
+ "status": status,
+ "size_mb": size_mb,
+ "path": index_dir,
+ }
)
- print(f" Size: {size_mb:.1f} MB")
- if total_indexes > 0:
- print(f"\nTotal: {total_indexes} indexes across {len(valid_projects)} projects")
- print("\nUsage (current project only):")
+ # 2. Apps format: *.leann.meta.json files anywhere in the project
+ for meta_file in project_path.rglob("*.leann.meta.json"):
+ if meta_file.is_file():
+ # Extract index name from filename (remove .leann.meta.json extension)
+ index_name = meta_file.name.replace(".leann.meta.json", "")
- # Show example from current project
- current_indexes_dir = current_dir / ".leann" / "indexes"
- if current_indexes_dir.exists():
- current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()]
- if current_index_dirs:
- example_name = current_index_dirs[0].name
- print(f' leann search {example_name} "your query"')
- print(f" leann ask {example_name} --interactive")
+ # Apps indexes are considered complete if the .leann.meta.json file exists
+ status = "ā
"
+
+ # Calculate total size of all related files
+ size_mb = 0
+ try:
+ index_dir = meta_file.parent
+ for related_file in index_dir.glob(f"{index_name}.leann*"):
+ size_mb += related_file.stat().st_size / (1024 * 1024)
+ except (OSError, PermissionError):
+ pass
+
+ indexes.append(
+ {
+ "name": index_name,
+ "type": "app",
+ "status": status,
+ "size_mb": size_mb,
+ "path": meta_file,
+ }
+ )
+
+ return indexes
+
+ def remove_index(self, index_name: str, force: bool = False):
+ """Safely remove an index - always show all matches for transparency"""
+
+ # Always do a comprehensive search for safety
+ print(f"š Searching for all indexes named '{index_name}'...")
+ all_matches = self._find_all_matching_indexes(index_name)
+
+ if not all_matches:
+ print(f"ā Index '{index_name}' not found in any project.")
+ return False
+
+ if len(all_matches) == 1:
+ return self._remove_single_match(all_matches[0], index_name, force)
+ else:
+ return self._remove_from_multiple_matches(all_matches, index_name, force)
+
+ def _find_all_matching_indexes(self, index_name: str):
+ """Find all indexes with the given name across all projects"""
+ matches = []
+
+ # Get all registered projects
+ global_registry = Path.home() / ".leann" / "projects.json"
+ all_projects = []
+
+ if global_registry.exists():
+ try:
+ import json
+
+ with open(global_registry) as f:
+ all_projects = json.load(f)
+ except Exception:
+ pass
+
+ # Always include current project
+ current_path = Path.cwd()
+ if str(current_path) not in all_projects:
+ all_projects.append(str(current_path))
+
+ # Search across all projects
+ for project_dir in all_projects:
+ project_path = Path(project_dir)
+ if not project_path.exists():
+ continue
+
+ index_dir = project_path / ".leann" / "indexes" / index_name
+ if index_dir.exists():
+ is_current = project_path == current_path
+ matches.append(
+ {"project_path": project_path, "index_dir": index_dir, "is_current": is_current}
+ )
+
+ # Sort: current project first, then by project name
+ matches.sort(key=lambda x: (not x["is_current"], x["project_path"].name))
+ return matches
+
+ def _remove_single_match(self, match, index_name: str, force: bool):
+ """Handle removal when only one match is found"""
+ project_path = match["project_path"]
+ index_dir = match["index_dir"]
+ is_current = match["is_current"]
+
+ if is_current:
+ location_info = "current project"
+ emoji = "š "
+ else:
+ location_info = f"other project '{project_path.name}'"
+ emoji = "š"
+
+ print(f"ā
Found 1 index named '{index_name}':")
+ print(f" {emoji} Location: {location_info}")
+ print(f" š Path: {project_path}")
+
+ if not force:
+ if not is_current:
+ print("\nā ļø CROSS-PROJECT REMOVAL!")
+ print(" This will delete the index from another project.")
+
+ response = input(f" ā Confirm removal from {location_info}? (y/N): ").strip().lower()
+ if response not in ["y", "yes"]:
+ print(" ā Removal cancelled.")
+ return False
+
+ return self._delete_index_directory(
+ index_dir, index_name, project_path if not is_current else None
+ )
+
+ def _remove_from_multiple_matches(self, matches, index_name: str, force: bool):
+ """Handle removal when multiple matches are found"""
+
+ print(f"ā ļø Found {len(matches)} indexes named '{index_name}':")
+ print(" " + "ā" * 50)
+
+ for i, match in enumerate(matches, 1):
+ project_path = match["project_path"]
+ is_current = match["is_current"]
+
+ if is_current:
+ print(f" {i}. š Current project")
+ print(f" š {project_path}")
+ else:
+ print(f" {i}. š {project_path.name}")
+ print(f" š {project_path}")
+
+ # Show size info
+ try:
+ size_mb = sum(
+ f.stat().st_size for f in match["index_dir"].iterdir() if f.is_file()
+ ) / (1024 * 1024)
+ print(f" š¦ Size: {size_mb:.1f} MB")
+ except (OSError, PermissionError):
+ pass
+
+ print(" " + "ā" * 50)
+
+ if force:
+ print(" ā Multiple matches found, but --force specified.")
+ print(" Please run without --force to choose which one to remove.")
+ return False
+
+ try:
+ choice = input(
+ f" ā Which one to remove? (1-{len(matches)}, or 'c' to cancel): "
+ ).strip()
+ if choice.lower() == "c":
+ print(" ā Removal cancelled.")
+ return False
+
+ choice_idx = int(choice) - 1
+ if 0 <= choice_idx < len(matches):
+ selected_match = matches[choice_idx]
+ project_path = selected_match["project_path"]
+ index_dir = selected_match["index_dir"]
+ is_current = selected_match["is_current"]
+
+ location = "current project" if is_current else f"'{project_path.name}' project"
+ print(f" šÆ Selected: Remove from {location}")
+
+ # Final confirmation for safety
+ confirm = input(
+ f" ā FINAL CONFIRMATION - Type '{index_name}' to proceed: "
+ ).strip()
+ if confirm != index_name:
+ print(" ā Confirmation failed. Removal cancelled.")
+ return False
+
+ return self._delete_index_directory(
+ index_dir, index_name, project_path if not is_current else None
+ )
+ else:
+ print(" ā Invalid choice. Removal cancelled.")
+ return False
+
+ except (ValueError, KeyboardInterrupt):
+ print("\n ā Invalid input. Removal cancelled.")
+ return False
+
+ def _delete_index_directory(
+ self, index_dir: Path, index_name: str, project_path: Path | None = None
+ ):
+ """Actually delete the index directory"""
+ try:
+ import shutil
+
+ shutil.rmtree(index_dir)
+
+ if project_path:
+ print(f"ā
Index '{index_name}' removed from {project_path.name}")
+ else:
+ print(f"ā
Index '{index_name}' removed successfully")
+ return True
+ except Exception as e:
+ print(f"ā Error removing index '{index_name}': {e}")
+ return False
def load_documents(
self,
@@ -942,6 +1190,8 @@ Examples:
if args.command == "list":
self.list_indexes()
+ elif args.command == "remove":
+ self.remove_index(args.index_name, args.force)
elif args.command == "build":
await self.build_index(args)
elif args.command == "search":
@@ -953,10 +1203,15 @@ Examples:
def main():
+ import logging
+
import dotenv
dotenv.load_dotenv()
+ # Set clean logging for CLI usage
+ logging.getLogger().setLevel(logging.WARNING) # Only show warnings and errors
+
cli = LeannCLI()
asyncio.run(cli.run())
diff --git a/packages/leann-core/src/leann/registry.py b/packages/leann-core/src/leann/registry.py
index 054ef23..25d2235 100644
--- a/packages/leann-core/src/leann/registry.py
+++ b/packages/leann-core/src/leann/registry.py
@@ -2,11 +2,17 @@
import importlib
import importlib.metadata
+import json
+import logging
+from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from leann.interface import LeannBackendFactoryInterface
+# Set up logger for this module
+logger = logging.getLogger(__name__)
+
BACKEND_REGISTRY: dict[str, "LeannBackendFactoryInterface"] = {}
@@ -14,7 +20,7 @@ def register_backend(name: str):
"""A decorator to register a new backend class."""
def decorator(cls):
- print(f"INFO: Registering backend '{name}'")
+ logger.debug(f"Registering backend '{name}'")
BACKEND_REGISTRY[name] = cls
return cls
@@ -39,3 +45,54 @@ def autodiscover_backends():
# print(f"WARN: Could not import backend module '{backend_module_name}': {e}")
pass
# print("INFO: Backend auto-discovery finished.")
+
+
+def register_project_directory(project_dir: str | Path | None = None):
+ """
+ Register a project directory in the global LEANN registry.
+
+ This allows `leann list` to discover indexes created by apps or other tools.
+
+ Args:
+ project_dir: Directory to register. If None, uses current working directory.
+ """
+ if project_dir is None:
+ project_dir = Path.cwd()
+ else:
+ project_dir = Path(project_dir)
+
+ # Only register directories that have some kind of LEANN content
+ # Either .leann/indexes/ (CLI format) or *.leann.meta.json files (apps format)
+ has_cli_indexes = (project_dir / ".leann" / "indexes").exists()
+ has_app_indexes = any(project_dir.rglob("*.leann.meta.json"))
+
+ if not (has_cli_indexes or has_app_indexes):
+ # Don't register if there are no LEANN indexes
+ return
+
+ global_registry = Path.home() / ".leann" / "projects.json"
+ global_registry.parent.mkdir(exist_ok=True)
+
+ project_str = str(project_dir.resolve())
+
+ # Load existing registry
+ projects = []
+ if global_registry.exists():
+ try:
+ with open(global_registry) as f:
+ projects = json.load(f)
+ except Exception:
+ logger.debug("Could not load existing project registry")
+ projects = []
+
+ # Add project if not already present
+ if project_str not in projects:
+ projects.append(project_str)
+
+ # Save updated registry
+ try:
+ with open(global_registry, "w") as f:
+ json.dump(projects, f, indent=2)
+ logger.debug(f"Registered project directory: {project_str}")
+ except Exception as e:
+ logger.warning(f"Could not save project registry: {e}")