Docs/Core: Low-Resource Setups, SkyPilot Option, and No-Recompute (#45)
* docs: add SkyPilot template and instructions for running embeddings/index build on cloud GPU * docs: add low-resource note in README; point to config guide; suggest OpenAI embeddings, SkyPilot remote build, and --no-recompute * docs: consolidate low-resource guidance into config guide; README points to it * cli: add --no-recompute and --no-recompute-embeddings flags; docs: clarify HNSW requires --no-compact when disabling recompute * docs: dedupe recomputation guidance; keep single Low-resource setups section * sky: expand leann-build.yaml with configurable params and flags (backend, recompute, compact, embedding options) * hnsw: auto-disable compact when --no-recompute is used; docs: expand SkyPilot with -e overrides and copy-back example * docs+sky: simplify SkyPilot flow (auto-build on launch, rsync copy-back); clarify HNSW auto non-compact when no-recompute * feat: auto compact for hnsw when recompute * reader: non-destructive portability (relative hints + fallback); fix comments; sky: refine yaml * cli: unify flags to --recompute/--no-recompute for build/search/ask; docs: update references * chore: remove * hnsw: move pruned/no-recompute assertion into backend; api: drop global assertion; docs: will adjust after benchmarking * cli: use argparse.BooleanOptionalAction for paired flags (--recompute/--compact) across build/search/ask * docs: a real example on recompute * benchmarks: fix and extend HNSW+DiskANN recompute vs no-recompute; docs: add fresh numbers and DiskANN notes * benchmarks: unify HNSW & DiskANN into one clean script; isolate groups, fixed ports, warm-up, param complexity * docs: diskann recompute * core: auto-cleanup for LeannSearcher/LeannChat (__enter__/__exit__/__del__); ensure server terminate/kill robustness; benchmarks: use searcher.cleanup(); docs: suggest uv run * fix: hang on warnings * docs: boolean flags * docs: leann help
This commit is contained in:
@@ -441,9 +441,14 @@ class DiskannSearcher(BaseSearcher):
|
||||
else: # "global"
|
||||
use_global_pruning = True
|
||||
|
||||
# Perform search with suppressed C++ output based on log level
|
||||
use_deferred_fetch = kwargs.get("USE_DEFERRED_FETCH", True)
|
||||
recompute_neighors = False
|
||||
# Strategy:
|
||||
# - Traversal always uses PQ distances
|
||||
# - If recompute_embeddings=True, do a single final rerank via deferred fetch
|
||||
# (fetch embeddings for the final candidate set only)
|
||||
# - Do not recompute neighbor distances along the path
|
||||
use_deferred_fetch = True if recompute_embeddings else False
|
||||
recompute_neighors = False # Expected typo. For backward compatibility.
|
||||
|
||||
with suppress_cpp_output_if_needed():
|
||||
labels, distances = self._index.batch_search(
|
||||
query,
|
||||
|
||||
@@ -54,12 +54,13 @@ class HNSWBuilder(LeannBackendBuilderInterface):
|
||||
self.efConstruction = self.build_params.setdefault("efConstruction", 200)
|
||||
self.distance_metric = self.build_params.setdefault("distance_metric", "mips")
|
||||
self.dimensions = self.build_params.get("dimensions")
|
||||
if not self.is_recompute:
|
||||
if self.is_compact:
|
||||
# TODO: support this case @andy
|
||||
raise ValueError(
|
||||
"is_recompute is False, but is_compact is True. This is not compatible now. change is compact to False and you can use the original HNSW index."
|
||||
)
|
||||
if not self.is_recompute and self.is_compact:
|
||||
# Auto-correct: non-recompute requires non-compact storage for HNSW
|
||||
logger.warning(
|
||||
"is_recompute=False requires non-compact HNSW. Forcing is_compact=False."
|
||||
)
|
||||
self.is_compact = False
|
||||
self.build_params["is_compact"] = False
|
||||
|
||||
def build(self, data: np.ndarray, ids: list[str], index_path: str, **kwargs):
|
||||
from . import faiss # type: ignore
|
||||
@@ -184,9 +185,11 @@ class HNSWSearcher(BaseSearcher):
|
||||
"""
|
||||
from . import faiss # type: ignore
|
||||
|
||||
if not recompute_embeddings:
|
||||
if self.is_pruned:
|
||||
raise RuntimeError("Recompute is required for pruned index.")
|
||||
if not recompute_embeddings and self.is_pruned:
|
||||
raise RuntimeError(
|
||||
"Recompute is required for pruned/compact HNSW index. "
|
||||
"Re-run search with --recompute, or rebuild with --no-recompute and --no-compact."
|
||||
)
|
||||
if recompute_embeddings:
|
||||
if zmq_port is None:
|
||||
raise ValueError("zmq_port must be provided if recompute_embeddings is True")
|
||||
|
||||
@@ -204,6 +204,18 @@ class LeannBuilder:
|
||||
**backend_kwargs,
|
||||
):
|
||||
self.backend_name = backend_name
|
||||
# Normalize incompatible combinations early (for consistent metadata)
|
||||
if backend_name == "hnsw":
|
||||
is_recompute = backend_kwargs.get("is_recompute", True)
|
||||
is_compact = backend_kwargs.get("is_compact", True)
|
||||
if is_recompute is False and is_compact is True:
|
||||
warnings.warn(
|
||||
"HNSW with is_recompute=False requires non-compact storage. Forcing is_compact=False.",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
backend_kwargs["is_compact"] = False
|
||||
|
||||
backend_factory: Optional[LeannBackendFactoryInterface] = BACKEND_REGISTRY.get(backend_name)
|
||||
if backend_factory is None:
|
||||
raise ValueError(f"Backend '{backend_name}' not found or not registered.")
|
||||
@@ -523,6 +535,7 @@ class LeannSearcher:
|
||||
self.embedding_model = self.meta_data["embedding_model"]
|
||||
# Support both old and new format
|
||||
self.embedding_mode = self.meta_data.get("embedding_mode", "sentence-transformers")
|
||||
# Delegate portability handling to PassageManager
|
||||
self.passage_manager = PassageManager(
|
||||
self.meta_data.get("passage_sources", []), metadata_file_path=self.meta_path_str
|
||||
)
|
||||
@@ -652,6 +665,23 @@ class LeannSearcher:
|
||||
if hasattr(self.backend_impl, "embedding_server_manager"):
|
||||
self.backend_impl.embedding_server_manager.stop_server()
|
||||
|
||||
# Enable automatic cleanup patterns
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
try:
|
||||
self.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
self.cleanup()
|
||||
except Exception:
|
||||
# Avoid noisy errors during interpreter shutdown
|
||||
pass
|
||||
|
||||
|
||||
class LeannChat:
|
||||
def __init__(
|
||||
@@ -730,3 +760,19 @@ class LeannChat:
|
||||
"""
|
||||
if hasattr(self.searcher, "cleanup"):
|
||||
self.searcher.cleanup()
|
||||
|
||||
# Enable automatic cleanup patterns
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
try:
|
||||
self.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
self.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -422,7 +422,6 @@ class LLMInterface(ABC):
|
||||
top_k=10,
|
||||
complexity=64,
|
||||
beam_width=8,
|
||||
USE_DEFERRED_FETCH=True,
|
||||
skip_search_reorder=True,
|
||||
recompute_beighbor_embeddings=True,
|
||||
dedup_node_dis=True,
|
||||
@@ -434,7 +433,6 @@ class LLMInterface(ABC):
|
||||
Supported kwargs:
|
||||
- complexity (int): Search complexity parameter (default: 32)
|
||||
- beam_width (int): Beam width for search (default: 4)
|
||||
- USE_DEFERRED_FETCH (bool): Enable deferred fetch mode (default: False)
|
||||
- skip_search_reorder (bool): Skip search reorder step (default: False)
|
||||
- recompute_beighbor_embeddings (bool): Enable ZMQ embedding server for neighbor recomputation (default: False)
|
||||
- dedup_node_dis (bool): Deduplicate nodes by distance (default: False)
|
||||
|
||||
@@ -72,7 +72,7 @@ class LeannCLI:
|
||||
def create_parser(self) -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="leann",
|
||||
description="LEANN - Local Enhanced AI Navigation",
|
||||
description="The smallest vector index in the world. RAG Everything with LEANN!",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
@@ -102,9 +102,18 @@ Examples:
|
||||
help="Documents directories and/or files (default: current directory)",
|
||||
)
|
||||
build_parser.add_argument(
|
||||
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
|
||||
"--backend",
|
||||
type=str,
|
||||
default="hnsw",
|
||||
choices=["hnsw", "diskann"],
|
||||
help="Backend to use (default: hnsw)",
|
||||
)
|
||||
build_parser.add_argument(
|
||||
"--embedding-model",
|
||||
type=str,
|
||||
default="facebook/contriever",
|
||||
help="Embedding model (default: facebook/contriever)",
|
||||
)
|
||||
build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
|
||||
build_parser.add_argument(
|
||||
"--embedding-mode",
|
||||
type=str,
|
||||
@@ -112,12 +121,28 @@ Examples:
|
||||
choices=["sentence-transformers", "openai", "mlx", "ollama"],
|
||||
help="Embedding backend mode (default: sentence-transformers)",
|
||||
)
|
||||
build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
|
||||
build_parser.add_argument("--graph-degree", type=int, default=32)
|
||||
build_parser.add_argument("--complexity", type=int, default=64)
|
||||
build_parser.add_argument(
|
||||
"--force", "-f", action="store_true", help="Force rebuild existing index"
|
||||
)
|
||||
build_parser.add_argument(
|
||||
"--graph-degree", type=int, default=32, help="Graph degree (default: 32)"
|
||||
)
|
||||
build_parser.add_argument(
|
||||
"--complexity", type=int, default=64, help="Build complexity (default: 64)"
|
||||
)
|
||||
build_parser.add_argument("--num-threads", type=int, default=1)
|
||||
build_parser.add_argument("--compact", action="store_true", default=True)
|
||||
build_parser.add_argument("--recompute", action="store_true", default=True)
|
||||
build_parser.add_argument(
|
||||
"--compact",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=True,
|
||||
help="Use compact storage (default: true). Must be `no-compact` for `no-recompute` build.",
|
||||
)
|
||||
build_parser.add_argument(
|
||||
"--recompute",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=True,
|
||||
help="Enable recomputation (default: true)",
|
||||
)
|
||||
build_parser.add_argument(
|
||||
"--file-types",
|
||||
type=str,
|
||||
@@ -152,20 +177,26 @@ Examples:
|
||||
search_parser = subparsers.add_parser("search", help="Search documents")
|
||||
search_parser.add_argument("index_name", help="Index name")
|
||||
search_parser.add_argument("query", help="Search query")
|
||||
search_parser.add_argument("--top-k", type=int, default=5)
|
||||
search_parser.add_argument("--complexity", type=int, default=64)
|
||||
search_parser.add_argument(
|
||||
"--top-k", type=int, default=5, help="Number of results (default: 5)"
|
||||
)
|
||||
search_parser.add_argument(
|
||||
"--complexity", type=int, default=64, help="Search complexity (default: 64)"
|
||||
)
|
||||
search_parser.add_argument("--beam-width", type=int, default=1)
|
||||
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||
search_parser.add_argument(
|
||||
"--recompute-embeddings",
|
||||
action="store_true",
|
||||
"--recompute",
|
||||
dest="recompute_embeddings",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=True,
|
||||
help="Recompute embeddings (default: True)",
|
||||
help="Enable/disable embedding recomputation (default: enabled). Should not do a `no-recompute` search in a `recompute` build.",
|
||||
)
|
||||
search_parser.add_argument(
|
||||
"--pruning-strategy",
|
||||
choices=["global", "local", "proportional"],
|
||||
default="global",
|
||||
help="Pruning strategy (default: global)",
|
||||
)
|
||||
|
||||
# Ask command
|
||||
@@ -176,19 +207,27 @@ Examples:
|
||||
type=str,
|
||||
default="ollama",
|
||||
choices=["simulated", "ollama", "hf", "openai"],
|
||||
help="LLM provider (default: ollama)",
|
||||
)
|
||||
ask_parser.add_argument(
|
||||
"--model", type=str, default="qwen3:8b", help="Model name (default: qwen3:8b)"
|
||||
)
|
||||
ask_parser.add_argument("--model", type=str, default="qwen3:8b")
|
||||
ask_parser.add_argument("--host", type=str, default="http://localhost:11434")
|
||||
ask_parser.add_argument("--interactive", "-i", action="store_true")
|
||||
ask_parser.add_argument("--top-k", type=int, default=20)
|
||||
ask_parser.add_argument(
|
||||
"--interactive", "-i", action="store_true", help="Interactive chat mode"
|
||||
)
|
||||
ask_parser.add_argument(
|
||||
"--top-k", type=int, default=20, help="Retrieval count (default: 20)"
|
||||
)
|
||||
ask_parser.add_argument("--complexity", type=int, default=32)
|
||||
ask_parser.add_argument("--beam-width", type=int, default=1)
|
||||
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||
ask_parser.add_argument(
|
||||
"--recompute-embeddings",
|
||||
action="store_true",
|
||||
"--recompute",
|
||||
dest="recompute_embeddings",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=True,
|
||||
help="Recompute embeddings (default: True)",
|
||||
help="Enable/disable embedding recomputation during ask (default: enabled)",
|
||||
)
|
||||
ask_parser.add_argument(
|
||||
"--pruning-strategy",
|
||||
|
||||
@@ -268,8 +268,12 @@ class EmbeddingServerManager:
|
||||
f"Terminating server process (PID: {self.server_process.pid}) for backend {self.backend_module_name}..."
|
||||
)
|
||||
|
||||
# Use simple termination - our improved server shutdown should handle this properly
|
||||
self.server_process.terminate()
|
||||
# Use simple termination first; if the server installed signal handlers,
|
||||
# it will exit cleanly. Otherwise escalate to kill after a short wait.
|
||||
try:
|
||||
self.server_process.terminate()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.server_process.wait(timeout=5) # Give more time for graceful shutdown
|
||||
@@ -278,7 +282,10 @@ class EmbeddingServerManager:
|
||||
logger.warning(
|
||||
f"Server process {self.server_process.pid} did not terminate within 5 seconds, force killing..."
|
||||
)
|
||||
self.server_process.kill()
|
||||
try:
|
||||
self.server_process.kill()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self.server_process.wait(timeout=2)
|
||||
logger.info(f"Server process {self.server_process.pid} killed successfully.")
|
||||
|
||||
Reference in New Issue
Block a user