fix: prevent hang in CI by flushing print statements and redirecting embedding server output
- Add flush=True to all print statements in convert_to_csr.py to prevent buffer deadlock - Redirect embedding server stdout/stderr to DEVNULL in CI environment (CI=true) - Fix timeout in embedding_server_manager.stop_server() final wait call
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import gc # Import garbage collector interface
|
import gc # Import garbage collector interface
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
@@ -7,6 +8,12 @@ import time
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
# Set up logging to avoid print buffer issues
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
|
||||||
|
log_level = getattr(logging, LOG_LEVEL, logging.WARNING)
|
||||||
|
logger.setLevel(log_level)
|
||||||
|
|
||||||
# --- FourCCs (add more if needed) ---
|
# --- FourCCs (add more if needed) ---
|
||||||
INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b"IHNf", "little")
|
INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b"IHNf", "little")
|
||||||
# Add other HNSW fourccs if you expect different storage types inside HNSW
|
# Add other HNSW fourccs if you expect different storage types inside HNSW
|
||||||
@@ -243,6 +250,12 @@ def convert_hnsw_graph_to_csr(input_filename, output_filename, prune_embeddings=
|
|||||||
output_filename: Output CSR index file
|
output_filename: Output CSR index file
|
||||||
prune_embeddings: Whether to prune embedding storage (write NULL storage marker)
|
prune_embeddings: Whether to prune embedding storage (write NULL storage marker)
|
||||||
"""
|
"""
|
||||||
|
# Disable buffering for print statements to avoid deadlock in CI/pytest
|
||||||
|
import functools
|
||||||
|
|
||||||
|
global print
|
||||||
|
print = functools.partial(print, flush=True)
|
||||||
|
|
||||||
print(f"Starting conversion: {input_filename} -> {output_filename}")
|
print(f"Starting conversion: {input_filename} -> {output_filename}")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
original_hnsw_data = {}
|
original_hnsw_data = {}
|
||||||
|
|||||||
@@ -305,15 +305,23 @@ class EmbeddingServerManager:
|
|||||||
project_root = Path(__file__).parent.parent.parent.parent.parent
|
project_root = Path(__file__).parent.parent.parent.parent.parent
|
||||||
logger.info(f"Command: {' '.join(command)}")
|
logger.info(f"Command: {' '.join(command)}")
|
||||||
|
|
||||||
# Let server output go directly to console
|
# In CI environment, redirect output to avoid buffer deadlock
|
||||||
# The server will respect LEANN_LOG_LEVEL environment variable
|
# Embedding servers use many print statements that can fill buffers
|
||||||
# IMPORTANT: Use a new session so we can manage the whole process group reliably,
|
is_ci = os.environ.get("CI") == "true"
|
||||||
# and detach stdio to avoid lingering output keeping CI steps noisy/alive.
|
if is_ci:
|
||||||
|
stdout_target = subprocess.DEVNULL
|
||||||
|
stderr_target = subprocess.DEVNULL
|
||||||
|
logger.info("CI environment detected, redirecting embedding server output to DEVNULL")
|
||||||
|
else:
|
||||||
|
stdout_target = None # Direct to console for visible logs
|
||||||
|
stderr_target = None # Direct to console for visible logs
|
||||||
|
|
||||||
|
# IMPORTANT: Use a new session so we can manage the whole process group reliably
|
||||||
self.server_process = subprocess.Popen(
|
self.server_process = subprocess.Popen(
|
||||||
command,
|
command,
|
||||||
cwd=project_root,
|
cwd=project_root,
|
||||||
stdout=None, # Direct to console for visible logs
|
stdout=stdout_target,
|
||||||
stderr=None, # Direct to console for visible logs
|
stderr=stderr_target,
|
||||||
start_new_session=True,
|
start_new_session=True,
|
||||||
)
|
)
|
||||||
self.server_port = port
|
self.server_port = port
|
||||||
|
|||||||
Reference in New Issue
Block a user