feat: cli tool
This commit is contained in:
@@ -15,5 +15,8 @@ dependencies = [
|
|||||||
"tqdm>=4.60.0"
|
"tqdm>=4.60.0"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
leann = "leann.cli:main"
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
where = ["src"]
|
where = ["src"]
|
||||||
257
packages/leann-core/src/leann/cli.py
Normal file
257
packages/leann-core/src/leann/cli.py
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
import os
|
||||||
|
|
||||||
|
from llama_index.core import SimpleDirectoryReader
|
||||||
|
from llama_index.core.node_parser import SentenceSplitter
|
||||||
|
|
||||||
|
from .api import LeannBuilder, LeannSearcher, LeannChat
|
||||||
|
|
||||||
|
|
||||||
|
class LeannCLI:
|
||||||
|
def __init__(self):
|
||||||
|
self.indexes_dir = Path.home() / ".leann" / "indexes"
|
||||||
|
self.indexes_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
self.node_parser = SentenceSplitter(
|
||||||
|
chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_index_path(self, index_name: str) -> str:
|
||||||
|
index_dir = self.indexes_dir / index_name
|
||||||
|
return str(index_dir / "documents.leann")
|
||||||
|
|
||||||
|
def index_exists(self, index_name: str) -> bool:
|
||||||
|
index_dir = self.indexes_dir / index_name
|
||||||
|
meta_file = index_dir / "documents.leann.meta.json"
|
||||||
|
return meta_file.exists()
|
||||||
|
|
||||||
|
def create_parser(self) -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="leann",
|
||||||
|
description="LEANN - Local Enhanced AI Navigation",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
leann build my-docs --docs ./documents # Build index named my-docs
|
||||||
|
leann search my-docs "query" # Search in my-docs index
|
||||||
|
leann ask my-docs "question" # Ask my-docs index
|
||||||
|
leann list # List all stored indexes
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
||||||
|
|
||||||
|
# Build command
|
||||||
|
build_parser = subparsers.add_parser("build", help="Build document index")
|
||||||
|
build_parser.add_argument("index_name", help="Index name")
|
||||||
|
build_parser.add_argument("--docs", type=str, required=True, help="Documents directory")
|
||||||
|
build_parser.add_argument("--backend", type=str, default="hnsw", choices=["hnsw", "diskann"])
|
||||||
|
build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
|
||||||
|
build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
|
||||||
|
|
||||||
|
# Search command
|
||||||
|
search_parser = subparsers.add_parser("search", help="Search documents")
|
||||||
|
search_parser.add_argument("index_name", help="Index name")
|
||||||
|
search_parser.add_argument("query", help="Search query")
|
||||||
|
search_parser.add_argument("--top-k", type=int, default=5)
|
||||||
|
|
||||||
|
# Ask command
|
||||||
|
ask_parser = subparsers.add_parser("ask", help="Ask questions")
|
||||||
|
ask_parser.add_argument("index_name", help="Index name")
|
||||||
|
ask_parser.add_argument("--llm", type=str, default="ollama", choices=["simulated", "ollama", "hf", "openai"])
|
||||||
|
ask_parser.add_argument("--model", type=str, default="qwen3:8b")
|
||||||
|
ask_parser.add_argument("--host", type=str, default="http://localhost:11434")
|
||||||
|
ask_parser.add_argument("--interactive", "-i", action="store_true")
|
||||||
|
|
||||||
|
# List command
|
||||||
|
list_parser = subparsers.add_parser("list", help="List all indexes")
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def list_indexes(self):
|
||||||
|
print("Stored LEANN indexes:")
|
||||||
|
|
||||||
|
if not self.indexes_dir.exists():
|
||||||
|
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
||||||
|
return
|
||||||
|
|
||||||
|
index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]
|
||||||
|
|
||||||
|
if not index_dirs:
|
||||||
|
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Found {len(index_dirs)} indexes:")
|
||||||
|
for i, index_dir in enumerate(index_dirs, 1):
|
||||||
|
index_name = index_dir.name
|
||||||
|
status = "✓" if self.index_exists(index_name) else "✗"
|
||||||
|
|
||||||
|
print(f" {i}. {index_name} [{status}]")
|
||||||
|
if self.index_exists(index_name):
|
||||||
|
meta_file = index_dir / "documents.leann.meta.json"
|
||||||
|
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (1024 * 1024)
|
||||||
|
print(f" Size: {size_mb:.1f} MB")
|
||||||
|
|
||||||
|
if index_dirs:
|
||||||
|
example_name = index_dirs[0].name
|
||||||
|
print(f"\nUsage:")
|
||||||
|
print(f" leann search {example_name} \"your query\"")
|
||||||
|
print(f" leann ask {example_name} --interactive")
|
||||||
|
|
||||||
|
def load_documents(self, docs_dir: str):
|
||||||
|
print(f"Loading documents from {docs_dir}...")
|
||||||
|
|
||||||
|
documents = SimpleDirectoryReader(
|
||||||
|
docs_dir,
|
||||||
|
recursive=True,
|
||||||
|
encoding="utf-8",
|
||||||
|
required_exts=[".pdf", ".txt", ".md", ".docx"],
|
||||||
|
).load_data(show_progress=True)
|
||||||
|
|
||||||
|
all_texts = []
|
||||||
|
for doc in documents:
|
||||||
|
nodes = self.node_parser.get_nodes_from_documents([doc])
|
||||||
|
for node in nodes:
|
||||||
|
all_texts.append(node.get_content())
|
||||||
|
|
||||||
|
print(f"Loaded {len(documents)} documents, {len(all_texts)} chunks")
|
||||||
|
return all_texts
|
||||||
|
|
||||||
|
async def build_index(self, args):
|
||||||
|
docs_dir = args.docs
|
||||||
|
index_name = args.index_name
|
||||||
|
index_dir = self.indexes_dir / index_name
|
||||||
|
index_path = self.get_index_path(index_name)
|
||||||
|
|
||||||
|
if index_dir.exists() and not args.force:
|
||||||
|
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
||||||
|
return
|
||||||
|
|
||||||
|
all_texts = self.load_documents(docs_dir)
|
||||||
|
if not all_texts:
|
||||||
|
print("No documents found")
|
||||||
|
return
|
||||||
|
|
||||||
|
index_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
print(f"Building index '{index_name}' with {args.backend} backend...")
|
||||||
|
|
||||||
|
builder = LeannBuilder(
|
||||||
|
backend_name=args.backend,
|
||||||
|
embedding_model=args.embedding_model,
|
||||||
|
graph_degree=32,
|
||||||
|
complexity=64,
|
||||||
|
is_compact=True,
|
||||||
|
is_recompute=True,
|
||||||
|
num_threads=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk_text in all_texts:
|
||||||
|
builder.add_text(chunk_text)
|
||||||
|
|
||||||
|
builder.build_index(index_path)
|
||||||
|
print(f"Index built at {index_path}")
|
||||||
|
|
||||||
|
async def search_documents(self, args):
|
||||||
|
index_name = args.index_name
|
||||||
|
query = args.query
|
||||||
|
index_path = self.get_index_path(index_name)
|
||||||
|
|
||||||
|
if not self.index_exists(index_name):
|
||||||
|
print(f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it.")
|
||||||
|
return
|
||||||
|
|
||||||
|
searcher = LeannSearcher(index_path=index_path)
|
||||||
|
results = searcher.search(query, top_k=args.top_k)
|
||||||
|
|
||||||
|
print(f"Search results for '{query}' (top {len(results)}):")
|
||||||
|
for i, result in enumerate(results, 1):
|
||||||
|
print(f"{i}. Score: {result.score:.3f}")
|
||||||
|
print(f" {result.text[:200]}...")
|
||||||
|
print()
|
||||||
|
|
||||||
|
async def ask_questions(self, args):
|
||||||
|
index_name = args.index_name
|
||||||
|
index_path = self.get_index_path(index_name)
|
||||||
|
|
||||||
|
if not self.index_exists(index_name):
|
||||||
|
print(f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Starting chat with index '{index_name}'...")
|
||||||
|
print(f"Using {args.model} ({args.llm})")
|
||||||
|
|
||||||
|
llm_config = {"type": args.llm, "model": args.model}
|
||||||
|
if args.llm == "ollama":
|
||||||
|
llm_config["host"] = args.host
|
||||||
|
|
||||||
|
chat = LeannChat(index_path=index_path, llm_config=llm_config)
|
||||||
|
|
||||||
|
if args.interactive:
|
||||||
|
print("LEANN Assistant ready! Type 'quit' to exit")
|
||||||
|
print("=" * 40)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
user_input = input("\nYou: ").strip()
|
||||||
|
if user_input.lower() in ['quit', 'exit', 'q']:
|
||||||
|
print("Goodbye!")
|
||||||
|
break
|
||||||
|
|
||||||
|
if not user_input:
|
||||||
|
continue
|
||||||
|
|
||||||
|
response = chat.ask(
|
||||||
|
user_input,
|
||||||
|
top_k=20,
|
||||||
|
recompute_beighbor_embeddings=True,
|
||||||
|
complexity=32
|
||||||
|
)
|
||||||
|
print(f"LEANN: {response}")
|
||||||
|
else:
|
||||||
|
query = input("Enter your question: ").strip()
|
||||||
|
if query:
|
||||||
|
response = chat.ask(
|
||||||
|
query,
|
||||||
|
top_k=20,
|
||||||
|
recompute_beighbor_embeddings=True,
|
||||||
|
complexity=32
|
||||||
|
)
|
||||||
|
print(f"LEANN: {response}")
|
||||||
|
|
||||||
|
async def run(self, args=None):
|
||||||
|
parser = self.create_parser()
|
||||||
|
|
||||||
|
if args is None:
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.command:
|
||||||
|
parser.print_help()
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.command == "list":
|
||||||
|
self.list_indexes()
|
||||||
|
elif args.command == "build":
|
||||||
|
await self.build_index(args)
|
||||||
|
elif args.command == "search":
|
||||||
|
await self.search_documents(args)
|
||||||
|
elif args.command == "ask":
|
||||||
|
await self.ask_questions(args)
|
||||||
|
else:
|
||||||
|
parser.print_help()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import dotenv
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
cli = LeannCLI()
|
||||||
|
asyncio.run(cli.run())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -7,30 +7,37 @@ import importlib.metadata
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from leann.interface import LeannBackendFactoryInterface
|
from leann.interface import LeannBackendFactoryInterface
|
||||||
|
|
||||||
BACKEND_REGISTRY: Dict[str, 'LeannBackendFactoryInterface'] = {}
|
BACKEND_REGISTRY: Dict[str, "LeannBackendFactoryInterface"] = {}
|
||||||
|
|
||||||
|
|
||||||
def register_backend(name: str):
|
def register_backend(name: str):
|
||||||
"""A decorator to register a new backend class."""
|
"""A decorator to register a new backend class."""
|
||||||
|
|
||||||
def decorator(cls):
|
def decorator(cls):
|
||||||
print(f"INFO: Registering backend '{name}'")
|
print(f"INFO: Registering backend '{name}'")
|
||||||
BACKEND_REGISTRY[name] = cls
|
BACKEND_REGISTRY[name] = cls
|
||||||
return cls
|
return cls
|
||||||
|
|
||||||
return decorator
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
def autodiscover_backends():
|
def autodiscover_backends():
|
||||||
"""Automatically discovers and imports all 'leann-backend-*' packages."""
|
"""Automatically discovers and imports all 'leann-backend-*' packages."""
|
||||||
print("INFO: Starting backend auto-discovery...")
|
# print("INFO: Starting backend auto-discovery...")
|
||||||
discovered_backends = []
|
discovered_backends = []
|
||||||
for dist in importlib.metadata.distributions():
|
for dist in importlib.metadata.distributions():
|
||||||
dist_name = dist.metadata['name']
|
dist_name = dist.metadata["name"]
|
||||||
if dist_name.startswith('leann-backend-'):
|
if dist_name.startswith("leann-backend-"):
|
||||||
backend_module_name = dist_name.replace('-', '_')
|
backend_module_name = dist_name.replace("-", "_")
|
||||||
discovered_backends.append(backend_module_name)
|
discovered_backends.append(backend_module_name)
|
||||||
|
|
||||||
for backend_module_name in sorted(discovered_backends): # sort for deterministic loading
|
for backend_module_name in sorted(
|
||||||
|
discovered_backends
|
||||||
|
): # sort for deterministic loading
|
||||||
try:
|
try:
|
||||||
importlib.import_module(backend_module_name)
|
importlib.import_module(backend_module_name)
|
||||||
# Registration message is printed by the decorator
|
# Registration message is printed by the decorator
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
print(f"WARN: Could not import backend module '{backend_module_name}': {e}")
|
# print(f"WARN: Could not import backend module '{backend_module_name}': {e}")
|
||||||
print("INFO: Backend auto-discovery finished.")
|
pass
|
||||||
|
# print("INFO: Backend auto-discovery finished.")
|
||||||
|
|||||||
Reference in New Issue
Block a user