From 24676970eb89c8fc49cf14460db556cc3f8693ae Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Wed, 13 Aug 2025 11:44:32 -0700 Subject: [PATCH] fix: simplify embedding server process management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove start_new_session=True to fix signal handling issues - Simplify termination logic to use standard SIGTERM/SIGKILL - Remove complex process group management that could cause hangs - Add timeout-based cleanup to prevent CI hangs while ensuring proper resource cleanup - Give graceful shutdown more time (5s) since we fixed the server shutdown logic - Remove unused signal import This addresses the remaining process management issues that could cause startup failures and hanging during termination. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../src/leann/embedding_server_manager.py | 44 +++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/packages/leann-core/src/leann/embedding_server_manager.py b/packages/leann-core/src/leann/embedding_server_manager.py index 9ded6e5..1c33cf6 100644 --- a/packages/leann-core/src/leann/embedding_server_manager.py +++ b/packages/leann-core/src/leann/embedding_server_manager.py @@ -1,7 +1,6 @@ import atexit import logging import os -import signal import socket import subprocess import sys @@ -316,13 +315,12 @@ class EmbeddingServerManager: stdout_target = None # Direct to console for visible logs stderr_target = None # Direct to console for visible logs - # IMPORTANT: Use a new session so we can manage the whole process group reliably + # Start embedding server subprocess self.server_process = subprocess.Popen( command, cwd=project_root, stdout=stdout_target, stderr=stderr_target, - start_new_session=True, ) self.server_port = port logger.info(f"Server process started with PID: {self.server_process.pid}") @@ -364,26 +362,18 @@ class EmbeddingServerManager: logger.info( f"Terminating server process (PID: {self.server_process.pid}) for backend {self.backend_module_name}..." ) - # Try terminating the whole process group first (POSIX) - try: - pgid = os.getpgid(self.server_process.pid) - os.killpg(pgid, signal.SIGTERM) - except Exception: - # Fallback to terminating just the process - self.server_process.terminate() + + # Use simple termination - our improved server shutdown should handle this properly + self.server_process.terminate() try: - self.server_process.wait(timeout=3) - logger.info(f"Server process {self.server_process.pid} terminated.") + self.server_process.wait(timeout=5) # Give more time for graceful shutdown + logger.info(f"Server process {self.server_process.pid} terminated gracefully.") except subprocess.TimeoutExpired: logger.warning( - f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it." + f"Server process {self.server_process.pid} did not terminate within 5 seconds, force killing..." ) - try: - pgid = os.getpgid(self.server_process.pid) - os.killpg(pgid, signal.SIGKILL) - except Exception: - self.server_process.kill() + self.server_process.kill() try: self.server_process.wait(timeout=2) logger.info(f"Server process {self.server_process.pid} killed successfully.") @@ -391,12 +381,20 @@ class EmbeddingServerManager: logger.error( f"Failed to kill server process {self.server_process.pid} - it may be hung" ) - # Don't hang indefinitely - # Clean up process resources without waiting - # The process should already be terminated/killed above - # Don't wait here as it can hang CI indefinitely - self.server_process = None + # Clean up process resources with timeout to avoid CI hang + try: + # Use shorter timeout in CI environments + is_ci = os.environ.get("CI") == "true" + timeout = 3 if is_ci else 10 + self.server_process.wait(timeout=timeout) + logger.info(f"Server process {self.server_process.pid} cleanup completed") + except subprocess.TimeoutExpired: + logger.warning(f"Process cleanup timeout after {timeout}s, proceeding anyway") + except Exception as e: + logger.warning(f"Error during process cleanup: {e}") + finally: + self.server_process = None def _launch_server_process_colab(self, command: list, port: int) -> None: """Launch the server process with Colab-specific settings."""