chore(logging): surface recompute and batching params; enable INFO logging in benchmark

This commit is contained in:
yichuan-w
2025-08-18 19:34:19 -07:00
parent c5d8138349
commit a2d4c9231e
2 changed files with 19 additions and 0 deletions

View File

@@ -6,6 +6,8 @@ results and the golden standard results, making the comparison robust to ID chan
""" """
import argparse import argparse
import logging
import os
import json import json
import sys import sys
import time import time
@@ -14,6 +16,11 @@ from pathlib import Path
import numpy as np import numpy as np
from leann.api import LeannBuilder, LeannChat, LeannSearcher from leann.api import LeannBuilder, LeannChat, LeannSearcher
# Configure logging level (default INFO; override with LEANN_LOG_LEVEL)
_log_level_str = os.getenv("LEANN_LOG_LEVEL", "INFO").upper()
_log_level = getattr(logging, _log_level_str, logging.INFO)
logging.basicConfig(level=_log_level, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
def download_data_if_needed(data_root: Path, download_embeddings: bool = False): def download_data_if_needed(data_root: Path, download_embeddings: bool = False):
"""Checks if the data directory exists, and if not, downloads it from HF Hub.""" """Checks if the data directory exists, and if not, downloads it from HF Hub."""

View File

@@ -233,6 +233,18 @@ class HNSWSearcher(BaseSearcher):
# HNSW-specific batch processing parameter # HNSW-specific batch processing parameter
params.batch_size = batch_size params.batch_size = batch_size
# Log recompute mode and batching for visibility
logger.info(
"HNSW search: recompute=%s, zmq_port=%s, batch_size=%d, efSearch=%d, beam=%d, prune_ratio=%.3f, strategy=%s",
bool(recompute_embeddings),
str(zmq_port),
int(batch_size),
int(complexity),
int(beam_width),
float(prune_ratio),
pruning_strategy,
)
batch_size_query = query.shape[0] batch_size_query = query.shape[0]
distances = np.empty((batch_size_query, top_k), dtype=np.float32) distances = np.empty((batch_size_query, top_k), dtype=np.float32)
labels = np.empty((batch_size_query, top_k), dtype=np.int64) labels = np.empty((batch_size_query, top_k), dtype=np.int64)