fix: prevent hang in CI by flushing print statements and redirecting embedding server output

- Add flush=True to all print statements in convert_to_csr.py to prevent buffer deadlock
- Redirect embedding server stdout/stderr to DEVNULL in CI environment (CI=true)
- Fix timeout in embedding_server_manager.stop_server() final wait call
This commit is contained in:
Andy Lee
2025-08-07 21:53:58 -07:00
parent c799d61a5a
commit 8714472cd8
2 changed files with 27 additions and 6 deletions

View File

@@ -1,5 +1,6 @@
import argparse
import gc # Import garbage collector interface
import logging
import os
import struct
import sys
@@ -7,6 +8,12 @@ import time
import numpy as np
# Set up logging to avoid print buffer issues
logger = logging.getLogger(__name__)
LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
log_level = getattr(logging, LOG_LEVEL, logging.WARNING)
logger.setLevel(log_level)
# --- FourCCs (add more if needed) ---
INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b"IHNf", "little")
# Add other HNSW fourccs if you expect different storage types inside HNSW
@@ -243,6 +250,12 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
output_filename: Output CSR index file
prune_embeddings: Whether to prune embedding storage (write NULL storage marker)
"""
# Disable buffering for print statements to avoid deadlock in CI/pytest
import functools
global print
print = functools.partial(print, flush=True)
print(f"Starting conversion: {input_filename} -> {output_filename}")
start_time = time.time()
original_hnsw_data = {}