diff --git a/packages/leann-core/pyproject.toml b/packages/leann-core/pyproject.toml index 7f64793..08d2b4e 100644 --- a/packages/leann-core/pyproject.toml +++ b/packages/leann-core/pyproject.toml @@ -15,5 +15,8 @@ dependencies = [ "tqdm>=4.60.0" ] +[project.scripts] +leann = "leann.cli:main" + [tool.setuptools.packages.find] where = ["src"] \ No newline at end of file diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py new file mode 100644 index 0000000..2dbb46b --- /dev/null +++ b/packages/leann-core/src/leann/cli.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 +import argparse +import asyncio +import sys +from pathlib import Path +from typing import Optional +import os + +from llama_index.core import SimpleDirectoryReader +from llama_index.core.node_parser import SentenceSplitter + +from .api import LeannBuilder, LeannSearcher, LeannChat + + +class LeannCLI: + def __init__(self): + self.indexes_dir = Path.home() / ".leann" / "indexes" + self.indexes_dir.mkdir(parents=True, exist_ok=True) + + self.node_parser = SentenceSplitter( + chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n" + ) + + def get_index_path(self, index_name: str) -> str: + index_dir = self.indexes_dir / index_name + return str(index_dir / "documents.leann") + + def index_exists(self, index_name: str) -> bool: + index_dir = self.indexes_dir / index_name + meta_file = index_dir / "documents.leann.meta.json" + return meta_file.exists() + + def create_parser(self) -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="leann", + description="LEANN - Local Enhanced AI Navigation", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + leann build my-docs --docs ./documents # Build index named my-docs + leann search my-docs "query" # Search in my-docs index + leann ask my-docs "question" # Ask my-docs index + leann list # List all stored indexes + """ + ) + + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # Build command + build_parser = subparsers.add_parser("build", help="Build document index") + build_parser.add_argument("index_name", help="Index name") + build_parser.add_argument("--docs", type=str, required=True, help="Documents directory") + build_parser.add_argument("--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]) + build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever") + build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild") + + # Search command + search_parser = subparsers.add_parser("search", help="Search documents") + search_parser.add_argument("index_name", help="Index name") + search_parser.add_argument("query", help="Search query") + search_parser.add_argument("--top-k", type=int, default=5) + + # Ask command + ask_parser = subparsers.add_parser("ask", help="Ask questions") + ask_parser.add_argument("index_name", help="Index name") + ask_parser.add_argument("--llm", type=str, default="ollama", choices=["simulated", "ollama", "hf", "openai"]) + ask_parser.add_argument("--model", type=str, default="qwen3:8b") + ask_parser.add_argument("--host", type=str, default="http://localhost:11434") + ask_parser.add_argument("--interactive", "-i", action="store_true") + + # List command + list_parser = subparsers.add_parser("list", help="List all indexes") + + return parser + + def list_indexes(self): + print("Stored LEANN indexes:") + + if not self.indexes_dir.exists(): + print("No indexes found. Use 'leann build --docs ' to create one.") + return + + index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()] + + if not index_dirs: + print("No indexes found. Use 'leann build --docs ' to create one.") + return + + print(f"Found {len(index_dirs)} indexes:") + for i, index_dir in enumerate(index_dirs, 1): + index_name = index_dir.name + status = "✓" if self.index_exists(index_name) else "✗" + + print(f" {i}. {index_name} [{status}]") + if self.index_exists(index_name): + meta_file = index_dir / "documents.leann.meta.json" + size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (1024 * 1024) + print(f" Size: {size_mb:.1f} MB") + + if index_dirs: + example_name = index_dirs[0].name + print(f"\nUsage:") + print(f" leann search {example_name} \"your query\"") + print(f" leann ask {example_name} --interactive") + + def load_documents(self, docs_dir: str): + print(f"Loading documents from {docs_dir}...") + + documents = SimpleDirectoryReader( + docs_dir, + recursive=True, + encoding="utf-8", + required_exts=[".pdf", ".txt", ".md", ".docx"], + ).load_data(show_progress=True) + + all_texts = [] + for doc in documents: + nodes = self.node_parser.get_nodes_from_documents([doc]) + for node in nodes: + all_texts.append(node.get_content()) + + print(f"Loaded {len(documents)} documents, {len(all_texts)} chunks") + return all_texts + + async def build_index(self, args): + docs_dir = args.docs + index_name = args.index_name + index_dir = self.indexes_dir / index_name + index_path = self.get_index_path(index_name) + + if index_dir.exists() and not args.force: + print(f"Index '{index_name}' already exists. Use --force to rebuild.") + return + + all_texts = self.load_documents(docs_dir) + if not all_texts: + print("No documents found") + return + + index_dir.mkdir(parents=True, exist_ok=True) + + print(f"Building index '{index_name}' with {args.backend} backend...") + + builder = LeannBuilder( + backend_name=args.backend, + embedding_model=args.embedding_model, + graph_degree=32, + complexity=64, + is_compact=True, + is_recompute=True, + num_threads=1, + ) + + for chunk_text in all_texts: + builder.add_text(chunk_text) + + builder.build_index(index_path) + print(f"Index built at {index_path}") + + async def search_documents(self, args): + index_name = args.index_name + query = args.query + index_path = self.get_index_path(index_name) + + if not self.index_exists(index_name): + print(f"Index '{index_name}' not found. Use 'leann build {index_name} --docs ' to create it.") + return + + searcher = LeannSearcher(index_path=index_path) + results = searcher.search(query, top_k=args.top_k) + + print(f"Search results for '{query}' (top {len(results)}):") + for i, result in enumerate(results, 1): + print(f"{i}. Score: {result.score:.3f}") + print(f" {result.text[:200]}...") + print() + + async def ask_questions(self, args): + index_name = args.index_name + index_path = self.get_index_path(index_name) + + if not self.index_exists(index_name): + print(f"Index '{index_name}' not found. Use 'leann build {index_name} --docs ' to create it.") + return + + print(f"Starting chat with index '{index_name}'...") + print(f"Using {args.model} ({args.llm})") + + llm_config = {"type": args.llm, "model": args.model} + if args.llm == "ollama": + llm_config["host"] = args.host + + chat = LeannChat(index_path=index_path, llm_config=llm_config) + + if args.interactive: + print("LEANN Assistant ready! Type 'quit' to exit") + print("=" * 40) + + while True: + user_input = input("\nYou: ").strip() + if user_input.lower() in ['quit', 'exit', 'q']: + print("Goodbye!") + break + + if not user_input: + continue + + response = chat.ask( + user_input, + top_k=20, + recompute_beighbor_embeddings=True, + complexity=32 + ) + print(f"LEANN: {response}") + else: + query = input("Enter your question: ").strip() + if query: + response = chat.ask( + query, + top_k=20, + recompute_beighbor_embeddings=True, + complexity=32 + ) + print(f"LEANN: {response}") + + async def run(self, args=None): + parser = self.create_parser() + + if args is None: + args = parser.parse_args() + + if not args.command: + parser.print_help() + return + + if args.command == "list": + self.list_indexes() + elif args.command == "build": + await self.build_index(args) + elif args.command == "search": + await self.search_documents(args) + elif args.command == "ask": + await self.ask_questions(args) + else: + parser.print_help() + + +def main(): + import dotenv + dotenv.load_dotenv() + + cli = LeannCLI() + asyncio.run(cli.run()) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/packages/leann-core/src/leann/registry.py b/packages/leann-core/src/leann/registry.py index bda797a..043a784 100644 --- a/packages/leann-core/src/leann/registry.py +++ b/packages/leann-core/src/leann/registry.py @@ -7,30 +7,37 @@ import importlib.metadata if TYPE_CHECKING: from leann.interface import LeannBackendFactoryInterface -BACKEND_REGISTRY: Dict[str, 'LeannBackendFactoryInterface'] = {} +BACKEND_REGISTRY: Dict[str, "LeannBackendFactoryInterface"] = {} + def register_backend(name: str): """A decorator to register a new backend class.""" + def decorator(cls): print(f"INFO: Registering backend '{name}'") BACKEND_REGISTRY[name] = cls return cls + return decorator + def autodiscover_backends(): """Automatically discovers and imports all 'leann-backend-*' packages.""" - print("INFO: Starting backend auto-discovery...") + # print("INFO: Starting backend auto-discovery...") discovered_backends = [] for dist in importlib.metadata.distributions(): - dist_name = dist.metadata['name'] - if dist_name.startswith('leann-backend-'): - backend_module_name = dist_name.replace('-', '_') + dist_name = dist.metadata["name"] + if dist_name.startswith("leann-backend-"): + backend_module_name = dist_name.replace("-", "_") discovered_backends.append(backend_module_name) - - for backend_module_name in sorted(discovered_backends): # sort for deterministic loading + + for backend_module_name in sorted( + discovered_backends + ): # sort for deterministic loading try: importlib.import_module(backend_module_name) # Registration message is printed by the decorator except ImportError as e: - print(f"WARN: Could not import backend module '{backend_module_name}': {e}") - print("INFO: Backend auto-discovery finished.") \ No newline at end of file + # print(f"WARN: Could not import backend module '{backend_module_name}': {e}") + pass + # print("INFO: Backend auto-discovery finished.")