docs: cli

2025-07-21 23:48:40 -07:00
parent 870a443446
commit 71e5f1774c
2 changed files with 166 additions and 73 deletions
--- a/README.md
+++ b/README.md
@@ -294,6 +294,71 @@ Once the index is built, you can ask questions like:
 </details>
 ## 🖥️ Command Line Interface
 LEANN includes a powerful CLI for document processing and search. Perfect for quick document indexing and interactive chat.
 ```bash
 # Build an index from documents
 leann build my-docs --docs ./documents
 # Search your documents  
 leann search my-docs "machine learning concepts"
 # Interactive chat with your documents
 leann ask my-docs --interactive
 # List all your indexes
 leann list
 ```
 **Key CLI features:**
 - Auto-detects document formats (PDF, TXT, MD, DOCX)
 - Smart text chunking with overlap
 - Multiple LLM providers (Ollama, OpenAI, HuggingFace)
 - Organized index storage in `~/.leann/indexes/`
 - Support for advanced search parameters
 <details>
 <summary><strong>📋 Click to expand: Complete CLI Reference</strong></summary>
 **Build Command:**
 ```bash
 leann build INDEX_NAME --docs DIRECTORY [OPTIONS]
 Options:
  --backend {hnsw,diskann}     Backend to use (default: hnsw)
  --embedding-model MODEL      Embedding model (default: facebook/contriever)
  --graph-degree N            Graph degree (default: 32)
  --complexity N              Build complexity (default: 64)
  --force                     Force rebuild existing index
  --compact                   Use compact storage (default: true)
  --recompute                 Enable recomputation (default: true)
 ```
 **Search Command:**
 ```bash
 leann search INDEX_NAME QUERY [OPTIONS]
 Options:
  --top-k N                   Number of results (default: 5)
  --complexity N              Search complexity (default: 64)
  --recompute-embeddings      Use recomputation for highest accuracy
  --pruning-strategy {global,local,proportional}
 ```
 **Ask Command:**
 ```bash
 leann ask INDEX_NAME [OPTIONS]
 Options:
  --llm {ollama,openai,hf}    LLM provider (default: ollama)
  --model MODEL               Model name (default: qwen3:8b)
  --interactive              Interactive chat mode
  --top-k N                  Retrieval count (default: 20)
 ```
 </details>
 ## 🏗️ Architecture & How It Works
--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -1,10 +1,6 @@
 #!/usr/bin/env python3
 import argparse
 import asyncio
 import sys
 from pathlib import Path
 from typing import Optional
 import os
 from llama_index.core import SimpleDirectoryReader
 from llama_index.core.node_parser import SentenceSplitter
@@ -41,7 +37,7 @@ Examples:
  leann search my-docs "query"             # Search in my-docs index
  leann ask my-docs "question"             # Ask my-docs index
  leann list                              # List all stored indexes
-            """
+            """,
        )
        subparsers = parser.add_subparsers(dest="command", help="Available commands")
@@ -49,10 +45,18 @@ Examples:
        # Build command
        build_parser = subparsers.add_parser("build", help="Build document index")
        build_parser.add_argument("index_name", help="Index name")
-        build_parser.add_argument("--docs", type=str, required=True, help="Documents directory")
+        build_parser.add_argument(
-        build_parser.add_argument("--backend", type=str, default="hnsw", choices=["hnsw", "diskann"])
+            "--docs", type=str, required=True, help="Documents directory"
-        build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
+        )
-        build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
+        build_parser.add_argument(
            "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
        )
        build_parser.add_argument(
            "--embedding-model", type=str, default="facebook/contriever"
        )
        build_parser.add_argument(
            "--force", "-f", action="store_true", help="Force rebuild"
        )
        build_parser.add_argument("--graph-degree", type=int, default=32)
        build_parser.add_argument("--complexity", type=int, default=64)
        build_parser.add_argument("--num-threads", type=int, default=1)
@@ -68,12 +72,21 @@ Examples:
        search_parser.add_argument("--beam-width", type=int, default=1)
        search_parser.add_argument("--prune-ratio", type=float, default=0.0)
        search_parser.add_argument("--recompute-embeddings", action="store_true")
-        search_parser.add_argument("--pruning-strategy", choices=["global", "local", "proportional"], default="global")
+        search_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
            default="global",
        )
        # Ask command
        ask_parser = subparsers.add_parser("ask", help="Ask questions")
        ask_parser.add_argument("index_name", help="Index name")
-        ask_parser.add_argument("--llm", type=str, default="ollama", choices=["simulated", "ollama", "hf", "openai"])
+        ask_parser.add_argument(
            "--llm",
            type=str,
            default="ollama",
            choices=["simulated", "ollama", "hf", "openai"],
        )
        ask_parser.add_argument("--model", type=str, default="qwen3:8b")
        ask_parser.add_argument("--host", type=str, default="http://localhost:11434")
        ask_parser.add_argument("--interactive", "-i", action="store_true")
@@ -82,7 +95,11 @@ Examples:
        ask_parser.add_argument("--beam-width", type=int, default=1)
        ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
        ask_parser.add_argument("--recompute-embeddings", action="store_true")
-        ask_parser.add_argument("--pruning-strategy", choices=["global", "local", "proportional"], default="global")
+        ask_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
            default="global",
        )
        # List command
        list_parser = subparsers.add_parser("list", help="List all indexes")
@@ -93,13 +110,17 @@ Examples:
        print("Stored LEANN indexes:")
        if not self.indexes_dir.exists():
-            print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
+            print(
                "No indexes found. Use 'leann build <name> --docs <dir>' to create one."
            )
            return
        index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]
        if not index_dirs:
-            print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
+            print(
                "No indexes found. Use 'leann build <name> --docs <dir>' to create one."
            )
            return
        print(f"Found {len(index_dirs)} indexes:")
@@ -110,13 +131,15 @@ Examples:
            print(f"  {i}. {index_name} [{status}]")
            if self.index_exists(index_name):
                meta_file = index_dir / "documents.leann.meta.json"
-                size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (1024 * 1024)
+                size_mb = sum(
                    f.stat().st_size for f in index_dir.iterdir() if f.is_file()
                ) / (1024 * 1024)
                print(f"     Size: {size_mb:.1f} MB")
        if index_dirs:
            example_name = index_dirs[0].name
            print(f"\nUsage:")
-            print(f"  leann search {example_name} \"your query\"")
+            print(f'  leann search {example_name} "your query"')
            print(f"  leann ask {example_name} --interactive")
    def load_documents(self, docs_dir: str):
@@ -179,7 +202,9 @@ Examples:
        index_path = self.get_index_path(index_name)
        if not self.index_exists(index_name):
-            print(f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it.")
+            print(
                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
            )
            return
        searcher = LeannSearcher(index_path=index_path)
@@ -190,7 +215,7 @@ Examples:
            beam_width=args.beam_width,
            prune_ratio=args.prune_ratio,
            recompute_embeddings=args.recompute_embeddings,
-            pruning_strategy=args.pruning_strategy
+            pruning_strategy=args.pruning_strategy,
        )
        print(f"Search results for '{query}' (top {len(results)}):")
@@ -204,7 +229,9 @@ Examples:
        index_path = self.get_index_path(index_name)
        if not self.index_exists(index_name):
-            print(f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it.")
+            print(
                f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
            )
            return
        print(f"Starting chat with index '{index_name}'...")
@@ -222,7 +249,7 @@ Examples:
            while True:
                user_input = input("\nYou: ").strip()
-                if user_input.lower() in ['quit', 'exit', 'q']:
+                if user_input.lower() in ["quit", "exit", "q"]:
                    print("Goodbye!")
                    break
@@ -236,7 +263,7 @@ Examples:
                    beam_width=args.beam_width,
                    prune_ratio=args.prune_ratio,
                    recompute_embeddings=args.recompute_embeddings,
-                    pruning_strategy=args.pruning_strategy
+                    pruning_strategy=args.pruning_strategy,
                )
                print(f"LEANN: {response}")
        else:
@@ -249,7 +276,7 @@ Examples:
                    beam_width=args.beam_width,
                    prune_ratio=args.prune_ratio,
                    recompute_embeddings=args.recompute_embeddings,
-                    pruning_strategy=args.pruning_strategy
+                    pruning_strategy=args.pruning_strategy,
                )
                print(f"LEANN: {response}")
@@ -277,6 +304,7 @@ Examples:
 def main():
    import dotenv
    dotenv.load_dotenv()
    cli = LeannCLI()