Based on excellent analysis from user, implemented comprehensive fixes: 1. ZMQ Socket Cleanup: - Set LINGER=0 on all ZMQ sockets (client and server) - Use try-finally blocks to ensure socket.close() and context.term() - Prevents blocking on exit when ZMQ contexts have pending operations 2. Global Test Cleanup: - Added tests/conftest.py with session-scoped cleanup fixture - Cleans up leftover ZMQ contexts and child processes after all tests - Lists remaining threads for debugging 3. CI Improvements: - Apply timeout to ALL Python versions on Linux (not just 3.13) - Increased timeout to 180s for better reliability - Added process cleanup (pkill) on timeout 4. Dependencies: - Added psutil>=5.9.0 to test dependencies for process management Root cause: Python 3.9/3.13 are more sensitive to cleanup timing during interpreter shutdown. ZMQ's default LINGER=-1 was blocking exit, and atexit handlers were unreliable for cleanup. This should resolve the 'all tests pass but CI hangs' issue.
100 lines
2.9 KiB
Python
100 lines
2.9 KiB
Python
"""Global test configuration and cleanup fixtures."""
|
|
|
|
import os
|
|
import signal
|
|
import time
|
|
from collections.abc import Generator
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture(scope="session", autouse=True)
|
|
def global_test_cleanup() -> Generator:
|
|
"""Global cleanup fixture that runs after all tests.
|
|
|
|
This ensures all ZMQ connections and child processes are properly cleaned up,
|
|
preventing the test runner from hanging on exit.
|
|
"""
|
|
yield
|
|
|
|
# Cleanup after all tests
|
|
try:
|
|
import zmq
|
|
|
|
# Set a very short linger on any remaining contexts
|
|
# This prevents blocking on context termination
|
|
ctx = zmq.Context.instance()
|
|
ctx.linger = 0
|
|
except Exception:
|
|
pass
|
|
|
|
# Kill any leftover child processes
|
|
try:
|
|
import psutil
|
|
|
|
current_process = psutil.Process()
|
|
children = current_process.children(recursive=True)
|
|
|
|
if children:
|
|
print(f"\n⚠️ Cleaning up {len(children)} leftover child processes...")
|
|
|
|
# First try to terminate gracefully
|
|
for child in children:
|
|
try:
|
|
child.terminate()
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
pass
|
|
|
|
# Wait a bit for processes to terminate
|
|
gone, alive = psutil.wait_procs(children, timeout=2)
|
|
|
|
# Force kill any remaining processes
|
|
for child in alive:
|
|
try:
|
|
print(f" Force killing process {child.pid} ({child.name()})")
|
|
child.kill()
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
pass
|
|
except ImportError:
|
|
# psutil not installed, try basic process cleanup
|
|
try:
|
|
# Send SIGTERM to all child processes
|
|
os.killpg(os.getpgid(os.getpid()), signal.SIGTERM)
|
|
except Exception:
|
|
pass
|
|
except Exception as e:
|
|
print(f"Warning: Error during process cleanup: {e}")
|
|
|
|
# List any remaining threads (for debugging)
|
|
try:
|
|
import threading
|
|
|
|
threads = [t for t in threading.enumerate() if t is not threading.main_thread()]
|
|
if threads:
|
|
print(f"\n⚠️ {len(threads)} non-main threads still running:")
|
|
for t in threads:
|
|
print(f" - {t.name} (daemon={t.daemon})")
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def cleanup_after_each_test():
|
|
"""Cleanup after each test to prevent resource leaks."""
|
|
yield
|
|
|
|
# Force garbage collection to trigger any __del__ methods
|
|
import gc
|
|
|
|
gc.collect()
|
|
|
|
# Give a moment for async cleanup
|
|
time.sleep(0.1)
|
|
|
|
|
|
def pytest_configure(config):
|
|
"""Configure pytest with better timeout handling."""
|
|
# Set default timeout method to thread if not specified
|
|
if not config.getoption("--timeout-method", None):
|
|
config.option.timeout_method = "thread"
|