chore(logging): surface recompute and batching params; enable INFO logging in benchmark

This commit is contained in:
yichuan-w
2025-08-18 19:34:19 -07:00
parent c5d8138349
commit a2d4c9231e
2 changed files with 19 additions and 0 deletions

View File

@@ -6,6 +6,8 @@ results and the golden standard results, making the comparison robust to ID chan
"""
import argparse
import logging
import os
import json
import sys
import time
@@ -14,6 +16,11 @@ from pathlib import Path
import numpy as np
from leann.api import LeannBuilder, LeannChat, LeannSearcher
# Configure logging level (default INFO; override with LEANN_LOG_LEVEL)
_log_level_str = os.getenv("LEANN_LOG_LEVEL", "INFO").upper()
_log_level = getattr(logging, _log_level_str, logging.INFO)
logging.basicConfig(level=_log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
def download_data_if_needed(data_root: Path, download_embeddings: bool = False):
"""Checks if the data directory exists, and if not, downloads it from HF Hub."""

View File

@@ -233,6 +233,18 @@ class HNSWSearcher(BaseSearcher):
# HNSW-specific batch processing parameter
params.batch_size = batch_size
# Log recompute mode and batching for visibility
logger.info(
"HNSW search: recompute=%s, zmq_port=%s, batch_size=%d, efSearch=%d, beam=%d, prune_ratio=%.3f, strategy=%s",
bool(recompute_embeddings),
str(zmq_port),
int(batch_size),
int(complexity),
int(beam_width),
float(prune_ratio),
pruning_strategy,
)
batch_size_query = query.shape[0]
distances = np.empty((batch_size_query, top_k), dtype=np.float32)
labels = np.empty((batch_size_query, top_k), dtype=np.int64)