fix: prevent hang in CI by flushing print statements and redirecting embedding server output
- Add flush=True to all print statements in convert_to_csr.py to prevent buffer deadlock - Redirect embedding server stdout/stderr to DEVNULL in CI environment (CI=true) - Fix timeout in embedding_server_manager.stop_server() final wait call
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import argparse
|
||||
import gc # Import garbage collector interface
|
||||
import logging
|
||||
import os
|
||||
import struct
|
||||
import sys
|
||||
@@ -7,6 +8,12 @@ import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Set up logging to avoid print buffer issues
|
||||
logger = logging.getLogger(__name__)
|
||||
LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
|
||||
log_level = getattr(logging, LOG_LEVEL, logging.WARNING)
|
||||
logger.setLevel(log_level)
|
||||
|
||||
# --- FourCCs (add more if needed) ---
|
||||
INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b"IHNf", "little")
|
||||
# Add other HNSW fourccs if you expect different storage types inside HNSW
|
||||
@@ -243,6 +250,12 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
|
||||
output_filename: Output CSR index file
|
||||
prune_embeddings: Whether to prune embedding storage (write NULL storage marker)
|
||||
"""
|
||||
# Disable buffering for print statements to avoid deadlock in CI/pytest
|
||||
import functools
|
||||
|
||||
global print
|
||||
print = functools.partial(print, flush=True)
|
||||
|
||||
print(f"Starting conversion: {input_filename} -> {output_filename}")
|
||||
start_time = time.time()
|
||||
original_hnsw_data = {}
|
||||
|
||||
@@ -305,15 +305,23 @@ class EmbeddingServerManager:
|
||||
project_root = Path(__file__).parent.parent.parent.parent.parent
|
||||
logger.info(f"Command: {' '.join(command)}")
|
||||
|
||||
# Let server output go directly to console
|
||||
# The server will respect LEANN_LOG_LEVEL environment variable
|
||||
# IMPORTANT: Use a new session so we can manage the whole process group reliably,
|
||||
# and detach stdio to avoid lingering output keeping CI steps noisy/alive.
|
||||
# In CI environment, redirect output to avoid buffer deadlock
|
||||
# Embedding servers use many print statements that can fill buffers
|
||||
is_ci = os.environ.get("CI") == "true"
|
||||
if is_ci:
|
||||
stdout_target = subprocess.DEVNULL
|
||||
stderr_target = subprocess.DEVNULL
|
||||
logger.info("CI environment detected, redirecting embedding server output to DEVNULL")
|
||||
else:
|
||||
stdout_target = None # Direct to console for visible logs
|
||||
stderr_target = None # Direct to console for visible logs
|
||||
|
||||
# IMPORTANT: Use a new session so we can manage the whole process group reliably
|
||||
self.server_process = subprocess.Popen(
|
||||
command,
|
||||
cwd=project_root,
|
||||
stdout=None, # Direct to console for visible logs
|
||||
stderr=None, # Direct to console for visible logs
|
||||
stdout=stdout_target,
|
||||
stderr=stderr_target,
|
||||
start_new_session=True,
|
||||
)
|
||||
self.server_port = port
|
||||
|
||||
Reference in New Issue
Block a user