fix: prevent hang in CI by flushing print statements and redirecting embedding server output

- Add flush=True to all print statements in convert_to_csr.py to prevent buffer deadlock
- Redirect embedding server stdout/stderr to DEVNULL in CI environment (CI=true)
- Fix timeout in embedding_server_manager.stop_server() final wait call
This commit is contained in:
Andy Lee
2025-08-07 21:53:58 -07:00
parent c799d61a5a
commit 8714472cd8
2 changed files with 27 additions and 6 deletions

View File

@@ -1,5 +1,6 @@
import argparse import argparse
import gc # Import garbage collector interface import gc # Import garbage collector interface
import logging
import os import os
import struct import struct
import sys import sys
@@ -7,6 +8,12 @@ import time
import numpy as np import numpy as np
# Set up logging to avoid print buffer issues
logger = logging.getLogger(__name__)
LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
log_level = getattr(logging, LOG_LEVEL, logging.WARNING)
logger.setLevel(log_level)
# --- FourCCs (add more if needed) --- # --- FourCCs (add more if needed) ---
INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b"IHNf", "little") INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b"IHNf", "little")
# Add other HNSW fourccs if you expect different storage types inside HNSW # Add other HNSW fourccs if you expect different storage types inside HNSW
@@ -243,6 +250,12 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
output_filename: Output CSR index file output_filename: Output CSR index file
prune_embeddings: Whether to prune embedding storage (write NULL storage marker) prune_embeddings: Whether to prune embedding storage (write NULL storage marker)
""" """
# Disable buffering for print statements to avoid deadlock in CI/pytest
import functools
global print
print = functools.partial(print, flush=True)
print(f"Starting conversion: {input_filename} -> {output_filename}") print(f"Starting conversion: {input_filename} -> {output_filename}")
start_time = time.time() start_time = time.time()
original_hnsw_data = {} original_hnsw_data = {}

View File

@@ -305,15 +305,23 @@ class EmbeddingServerManager:
project_root = Path(__file__).parent.parent.parent.parent.parent project_root = Path(__file__).parent.parent.parent.parent.parent
logger.info(f"Command: {' '.join(command)}") logger.info(f"Command: {' '.join(command)}")
# Let server output go directly to console # In CI environment, redirect output to avoid buffer deadlock
# The server will respect LEANN_LOG_LEVEL environment variable # Embedding servers use many print statements that can fill buffers
# IMPORTANT: Use a new session so we can manage the whole process group reliably, is_ci = os.environ.get("CI") == "true"
# and detach stdio to avoid lingering output keeping CI steps noisy/alive. if is_ci:
stdout_target = subprocess.DEVNULL
stderr_target = subprocess.DEVNULL
logger.info("CI environment detected, redirecting embedding server output to DEVNULL")
else:
stdout_target = None # Direct to console for visible logs
stderr_target = None # Direct to console for visible logs
# IMPORTANT: Use a new session so we can manage the whole process group reliably
self.server_process = subprocess.Popen( self.server_process = subprocess.Popen(
command, command,
cwd=project_root, cwd=project_root,
stdout=None, # Direct to console for visible logs stdout=stdout_target,
stderr=None, # Direct to console for visible logs stderr=stderr_target,
start_new_session=True, start_new_session=True,
) )
self.server_port = port self.server_port = port