From a2d4c9231eed08250caf3225e3d51ccb98a692ed Mon Sep 17 00:00:00 2001 From: yichuan-w Date: Mon, 18 Aug 2025 19:34:19 -0700 Subject: [PATCH] chore(logging): surface recompute and batching params; enable INFO logging in benchmark --- benchmarks/run_evaluation.py | 7 +++++++ .../leann_backend_hnsw/hnsw_backend.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/benchmarks/run_evaluation.py b/benchmarks/run_evaluation.py index ab4e169..ba340bb 100644 --- a/benchmarks/run_evaluation.py +++ b/benchmarks/run_evaluation.py @@ -6,6 +6,8 @@ results and the golden standard results, making the comparison robust to ID chan """ import argparse +import logging +import os import json import sys import time @@ -14,6 +16,11 @@ from pathlib import Path import numpy as np from leann.api import LeannBuilder, LeannChat, LeannSearcher +# Configure logging level (default INFO; override with LEANN_LOG_LEVEL) +_log_level_str = os.getenv("LEANN_LOG_LEVEL", "INFO").upper() +_log_level = getattr(logging, _log_level_str, logging.INFO) +logging.basicConfig(level=_log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s") + def download_data_if_needed(data_root: Path, download_embeddings: bool = False): """Checks if the data directory exists, and if not, downloads it from HF Hub.""" diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py index 4437bf6..bbe050d 100644 --- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py @@ -233,6 +233,18 @@ class HNSWSearcher(BaseSearcher): # HNSW-specific batch processing parameter params.batch_size = batch_size + # Log recompute mode and batching for visibility + logger.info( + "HNSW search: recompute=%s, zmq_port=%s, batch_size=%d, efSearch=%d, beam=%d, prune_ratio=%.3f, strategy=%s", + bool(recompute_embeddings), + str(zmq_port), + int(batch_size), + int(complexity), + int(beam_width), + float(prune_ratio), + pruning_strategy, + ) + batch_size_query = query.shape[0] distances = np.empty((batch_size_query, top_k), dtype=np.float32) labels = np.empty((batch_size_query, top_k), dtype=np.int64)