docs: cli
This commit is contained in:
65
README.md
65
README.md
@@ -294,6 +294,71 @@ Once the index is built, you can ask questions like:
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
## 🖥️ Command Line Interface
|
||||||
|
|
||||||
|
LEANN includes a powerful CLI for document processing and search. Perfect for quick document indexing and interactive chat.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build an index from documents
|
||||||
|
leann build my-docs --docs ./documents
|
||||||
|
|
||||||
|
# Search your documents
|
||||||
|
leann search my-docs "machine learning concepts"
|
||||||
|
|
||||||
|
# Interactive chat with your documents
|
||||||
|
leann ask my-docs --interactive
|
||||||
|
|
||||||
|
# List all your indexes
|
||||||
|
leann list
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key CLI features:**
|
||||||
|
- Auto-detects document formats (PDF, TXT, MD, DOCX)
|
||||||
|
- Smart text chunking with overlap
|
||||||
|
- Multiple LLM providers (Ollama, OpenAI, HuggingFace)
|
||||||
|
- Organized index storage in `~/.leann/indexes/`
|
||||||
|
- Support for advanced search parameters
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><strong>📋 Click to expand: Complete CLI Reference</strong></summary>
|
||||||
|
|
||||||
|
**Build Command:**
|
||||||
|
```bash
|
||||||
|
leann build INDEX_NAME --docs DIRECTORY [OPTIONS]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--backend {hnsw,diskann} Backend to use (default: hnsw)
|
||||||
|
--embedding-model MODEL Embedding model (default: facebook/contriever)
|
||||||
|
--graph-degree N Graph degree (default: 32)
|
||||||
|
--complexity N Build complexity (default: 64)
|
||||||
|
--force Force rebuild existing index
|
||||||
|
--compact Use compact storage (default: true)
|
||||||
|
--recompute Enable recomputation (default: true)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Search Command:**
|
||||||
|
```bash
|
||||||
|
leann search INDEX_NAME QUERY [OPTIONS]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--top-k N Number of results (default: 5)
|
||||||
|
--complexity N Search complexity (default: 64)
|
||||||
|
--recompute-embeddings Use recomputation for highest accuracy
|
||||||
|
--pruning-strategy {global,local,proportional}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Ask Command:**
|
||||||
|
```bash
|
||||||
|
leann ask INDEX_NAME [OPTIONS]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--llm {ollama,openai,hf} LLM provider (default: ollama)
|
||||||
|
--model MODEL Model name (default: qwen3:8b)
|
||||||
|
--interactive Interactive chat mode
|
||||||
|
--top-k N Retrieval count (default: 20)
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## 🏗️ Architecture & How It Works
|
## 🏗️ Architecture & How It Works
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
|
||||||
import os
|
|
||||||
|
|
||||||
from llama_index.core import SimpleDirectoryReader
|
from llama_index.core import SimpleDirectoryReader
|
||||||
from llama_index.core.node_parser import SentenceSplitter
|
from llama_index.core.node_parser import SentenceSplitter
|
||||||
@@ -41,7 +37,7 @@ Examples:
|
|||||||
leann search my-docs "query" # Search in my-docs index
|
leann search my-docs "query" # Search in my-docs index
|
||||||
leann ask my-docs "question" # Ask my-docs index
|
leann ask my-docs "question" # Ask my-docs index
|
||||||
leann list # List all stored indexes
|
leann list # List all stored indexes
|
||||||
"""
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
||||||
@@ -49,10 +45,18 @@ Examples:
|
|||||||
# Build command
|
# Build command
|
||||||
build_parser = subparsers.add_parser("build", help="Build document index")
|
build_parser = subparsers.add_parser("build", help="Build document index")
|
||||||
build_parser.add_argument("index_name", help="Index name")
|
build_parser.add_argument("index_name", help="Index name")
|
||||||
build_parser.add_argument("--docs", type=str, required=True, help="Documents directory")
|
build_parser.add_argument(
|
||||||
build_parser.add_argument("--backend", type=str, default="hnsw", choices=["hnsw", "diskann"])
|
"--docs", type=str, required=True, help="Documents directory"
|
||||||
build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
|
)
|
||||||
build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
|
build_parser.add_argument(
|
||||||
|
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
|
||||||
|
)
|
||||||
|
build_parser.add_argument(
|
||||||
|
"--embedding-model", type=str, default="facebook/contriever"
|
||||||
|
)
|
||||||
|
build_parser.add_argument(
|
||||||
|
"--force", "-f", action="store_true", help="Force rebuild"
|
||||||
|
)
|
||||||
build_parser.add_argument("--graph-degree", type=int, default=32)
|
build_parser.add_argument("--graph-degree", type=int, default=32)
|
||||||
build_parser.add_argument("--complexity", type=int, default=64)
|
build_parser.add_argument("--complexity", type=int, default=64)
|
||||||
build_parser.add_argument("--num-threads", type=int, default=1)
|
build_parser.add_argument("--num-threads", type=int, default=1)
|
||||||
@@ -68,12 +72,21 @@ Examples:
|
|||||||
search_parser.add_argument("--beam-width", type=int, default=1)
|
search_parser.add_argument("--beam-width", type=int, default=1)
|
||||||
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||||
search_parser.add_argument("--recompute-embeddings", action="store_true")
|
search_parser.add_argument("--recompute-embeddings", action="store_true")
|
||||||
search_parser.add_argument("--pruning-strategy", choices=["global", "local", "proportional"], default="global")
|
search_parser.add_argument(
|
||||||
|
"--pruning-strategy",
|
||||||
|
choices=["global", "local", "proportional"],
|
||||||
|
default="global",
|
||||||
|
)
|
||||||
|
|
||||||
# Ask command
|
# Ask command
|
||||||
ask_parser = subparsers.add_parser("ask", help="Ask questions")
|
ask_parser = subparsers.add_parser("ask", help="Ask questions")
|
||||||
ask_parser.add_argument("index_name", help="Index name")
|
ask_parser.add_argument("index_name", help="Index name")
|
||||||
ask_parser.add_argument("--llm", type=str, default="ollama", choices=["simulated", "ollama", "hf", "openai"])
|
ask_parser.add_argument(
|
||||||
|
"--llm",
|
||||||
|
type=str,
|
||||||
|
default="ollama",
|
||||||
|
choices=["simulated", "ollama", "hf", "openai"],
|
||||||
|
)
|
||||||
ask_parser.add_argument("--model", type=str, default="qwen3:8b")
|
ask_parser.add_argument("--model", type=str, default="qwen3:8b")
|
||||||
ask_parser.add_argument("--host", type=str, default="http://localhost:11434")
|
ask_parser.add_argument("--host", type=str, default="http://localhost:11434")
|
||||||
ask_parser.add_argument("--interactive", "-i", action="store_true")
|
ask_parser.add_argument("--interactive", "-i", action="store_true")
|
||||||
@@ -82,7 +95,11 @@ Examples:
|
|||||||
ask_parser.add_argument("--beam-width", type=int, default=1)
|
ask_parser.add_argument("--beam-width", type=int, default=1)
|
||||||
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||||
ask_parser.add_argument("--recompute-embeddings", action="store_true")
|
ask_parser.add_argument("--recompute-embeddings", action="store_true")
|
||||||
ask_parser.add_argument("--pruning-strategy", choices=["global", "local", "proportional"], default="global")
|
ask_parser.add_argument(
|
||||||
|
"--pruning-strategy",
|
||||||
|
choices=["global", "local", "proportional"],
|
||||||
|
default="global",
|
||||||
|
)
|
||||||
|
|
||||||
# List command
|
# List command
|
||||||
list_parser = subparsers.add_parser("list", help="List all indexes")
|
list_parser = subparsers.add_parser("list", help="List all indexes")
|
||||||
@@ -93,13 +110,17 @@ Examples:
|
|||||||
print("Stored LEANN indexes:")
|
print("Stored LEANN indexes:")
|
||||||
|
|
||||||
if not self.indexes_dir.exists():
|
if not self.indexes_dir.exists():
|
||||||
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
print(
|
||||||
|
"No indexes found. Use 'leann build <name> --docs <dir>' to create one."
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]
|
index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]
|
||||||
|
|
||||||
if not index_dirs:
|
if not index_dirs:
|
||||||
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
print(
|
||||||
|
"No indexes found. Use 'leann build <name> --docs <dir>' to create one."
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"Found {len(index_dirs)} indexes:")
|
print(f"Found {len(index_dirs)} indexes:")
|
||||||
@@ -110,13 +131,15 @@ Examples:
|
|||||||
print(f" {i}. {index_name} [{status}]")
|
print(f" {i}. {index_name} [{status}]")
|
||||||
if self.index_exists(index_name):
|
if self.index_exists(index_name):
|
||||||
meta_file = index_dir / "documents.leann.meta.json"
|
meta_file = index_dir / "documents.leann.meta.json"
|
||||||
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (1024 * 1024)
|
size_mb = sum(
|
||||||
|
f.stat().st_size for f in index_dir.iterdir() if f.is_file()
|
||||||
|
) / (1024 * 1024)
|
||||||
print(f" Size: {size_mb:.1f} MB")
|
print(f" Size: {size_mb:.1f} MB")
|
||||||
|
|
||||||
if index_dirs:
|
if index_dirs:
|
||||||
example_name = index_dirs[0].name
|
example_name = index_dirs[0].name
|
||||||
print(f"\nUsage:")
|
print(f"\nUsage:")
|
||||||
print(f" leann search {example_name} \"your query\"")
|
print(f' leann search {example_name} "your query"')
|
||||||
print(f" leann ask {example_name} --interactive")
|
print(f" leann ask {example_name} --interactive")
|
||||||
|
|
||||||
def load_documents(self, docs_dir: str):
|
def load_documents(self, docs_dir: str):
|
||||||
@@ -179,7 +202,9 @@ Examples:
|
|||||||
index_path = self.get_index_path(index_name)
|
index_path = self.get_index_path(index_name)
|
||||||
|
|
||||||
if not self.index_exists(index_name):
|
if not self.index_exists(index_name):
|
||||||
print(f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it.")
|
print(
|
||||||
|
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
searcher = LeannSearcher(index_path=index_path)
|
searcher = LeannSearcher(index_path=index_path)
|
||||||
@@ -190,7 +215,7 @@ Examples:
|
|||||||
beam_width=args.beam_width,
|
beam_width=args.beam_width,
|
||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy
|
pruning_strategy=args.pruning_strategy,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Search results for '{query}' (top {len(results)}):")
|
print(f"Search results for '{query}' (top {len(results)}):")
|
||||||
@@ -204,7 +229,9 @@ Examples:
|
|||||||
index_path = self.get_index_path(index_name)
|
index_path = self.get_index_path(index_name)
|
||||||
|
|
||||||
if not self.index_exists(index_name):
|
if not self.index_exists(index_name):
|
||||||
print(f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it.")
|
print(
|
||||||
|
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"Starting chat with index '{index_name}'...")
|
print(f"Starting chat with index '{index_name}'...")
|
||||||
@@ -222,7 +249,7 @@ Examples:
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
user_input = input("\nYou: ").strip()
|
user_input = input("\nYou: ").strip()
|
||||||
if user_input.lower() in ['quit', 'exit', 'q']:
|
if user_input.lower() in ["quit", "exit", "q"]:
|
||||||
print("Goodbye!")
|
print("Goodbye!")
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -236,7 +263,7 @@ Examples:
|
|||||||
beam_width=args.beam_width,
|
beam_width=args.beam_width,
|
||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy
|
pruning_strategy=args.pruning_strategy,
|
||||||
)
|
)
|
||||||
print(f"LEANN: {response}")
|
print(f"LEANN: {response}")
|
||||||
else:
|
else:
|
||||||
@@ -249,7 +276,7 @@ Examples:
|
|||||||
beam_width=args.beam_width,
|
beam_width=args.beam_width,
|
||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy
|
pruning_strategy=args.pruning_strategy,
|
||||||
)
|
)
|
||||||
print(f"LEANN: {response}")
|
print(f"LEANN: {response}")
|
||||||
|
|
||||||
@@ -277,6 +304,7 @@ Examples:
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
import dotenv
|
import dotenv
|
||||||
|
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
cli = LeannCLI()
|
cli = LeannCLI()
|
||||||
|
|||||||
Reference in New Issue
Block a user