fix: simplify embedding server process management
- Remove start_new_session=True to fix signal handling issues - Simplify termination logic to use standard SIGTERM/SIGKILL - Remove complex process group management that could cause hangs - Add timeout-based cleanup to prevent CI hangs while ensuring proper resource cleanup - Give graceful shutdown more time (5s) since we fixed the server shutdown logic - Remove unused signal import This addresses the remaining process management issues that could cause startup failures and hanging during termination. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,6 @@
|
|||||||
import atexit
|
import atexit
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import signal
|
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
@@ -316,13 +315,12 @@ class EmbeddingServerManager:
|
|||||||
stdout_target = None # Direct to console for visible logs
|
stdout_target = None # Direct to console for visible logs
|
||||||
stderr_target = None # Direct to console for visible logs
|
stderr_target = None # Direct to console for visible logs
|
||||||
|
|
||||||
# IMPORTANT: Use a new session so we can manage the whole process group reliably
|
# Start embedding server subprocess
|
||||||
self.server_process = subprocess.Popen(
|
self.server_process = subprocess.Popen(
|
||||||
command,
|
command,
|
||||||
cwd=project_root,
|
cwd=project_root,
|
||||||
stdout=stdout_target,
|
stdout=stdout_target,
|
||||||
stderr=stderr_target,
|
stderr=stderr_target,
|
||||||
start_new_session=True,
|
|
||||||
)
|
)
|
||||||
self.server_port = port
|
self.server_port = port
|
||||||
logger.info(f"Server process started with PID: {self.server_process.pid}")
|
logger.info(f"Server process started with PID: {self.server_process.pid}")
|
||||||
@@ -364,26 +362,18 @@ class EmbeddingServerManager:
|
|||||||
logger.info(
|
logger.info(
|
||||||
f"Terminating server process (PID: {self.server_process.pid}) for backend {self.backend_module_name}..."
|
f"Terminating server process (PID: {self.server_process.pid}) for backend {self.backend_module_name}..."
|
||||||
)
|
)
|
||||||
# Try terminating the whole process group first (POSIX)
|
|
||||||
try:
|
# Use simple termination - our improved server shutdown should handle this properly
|
||||||
pgid = os.getpgid(self.server_process.pid)
|
self.server_process.terminate()
|
||||||
os.killpg(pgid, signal.SIGTERM)
|
|
||||||
except Exception:
|
|
||||||
# Fallback to terminating just the process
|
|
||||||
self.server_process.terminate()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.server_process.wait(timeout=3)
|
self.server_process.wait(timeout=5) # Give more time for graceful shutdown
|
||||||
logger.info(f"Server process {self.server_process.pid} terminated.")
|
logger.info(f"Server process {self.server_process.pid} terminated gracefully.")
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
|
f"Server process {self.server_process.pid} did not terminate within 5 seconds, force killing..."
|
||||||
)
|
)
|
||||||
try:
|
self.server_process.kill()
|
||||||
pgid = os.getpgid(self.server_process.pid)
|
|
||||||
os.killpg(pgid, signal.SIGKILL)
|
|
||||||
except Exception:
|
|
||||||
self.server_process.kill()
|
|
||||||
try:
|
try:
|
||||||
self.server_process.wait(timeout=2)
|
self.server_process.wait(timeout=2)
|
||||||
logger.info(f"Server process {self.server_process.pid} killed successfully.")
|
logger.info(f"Server process {self.server_process.pid} killed successfully.")
|
||||||
@@ -391,12 +381,20 @@ class EmbeddingServerManager:
|
|||||||
logger.error(
|
logger.error(
|
||||||
f"Failed to kill server process {self.server_process.pid} - it may be hung"
|
f"Failed to kill server process {self.server_process.pid} - it may be hung"
|
||||||
)
|
)
|
||||||
# Don't hang indefinitely
|
|
||||||
|
|
||||||
# Clean up process resources without waiting
|
# Clean up process resources with timeout to avoid CI hang
|
||||||
# The process should already be terminated/killed above
|
try:
|
||||||
# Don't wait here as it can hang CI indefinitely
|
# Use shorter timeout in CI environments
|
||||||
self.server_process = None
|
is_ci = os.environ.get("CI") == "true"
|
||||||
|
timeout = 3 if is_ci else 10
|
||||||
|
self.server_process.wait(timeout=timeout)
|
||||||
|
logger.info(f"Server process {self.server_process.pid} cleanup completed")
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.warning(f"Process cleanup timeout after {timeout}s, proceeding anyway")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error during process cleanup: {e}")
|
||||||
|
finally:
|
||||||
|
self.server_process = None
|
||||||
|
|
||||||
def _launch_server_process_colab(self, command: list, port: int) -> None:
|
def _launch_server_process_colab(self, command: list, port: int) -> None:
|
||||||
"""Launch the server process with Colab-specific settings."""
|
"""Launch the server process with Colab-specific settings."""
|
||||||
|
|||||||
Reference in New Issue
Block a user