chore(ci): remove unused pytest wrapper and debug runner

This commit is contained in:
Andy Lee
2025-08-13 16:59:30 -07:00
parent 751b5f8735
commit 317d9e9ed7
2 changed files with 0 additions and 359 deletions

View File

@@ -1,160 +0,0 @@
import faulthandler
import os
import signal
import subprocess
import sys
import threading
import time
import traceback
def setup_hang_detection() -> None:
"""Setup signal handlers and periodic dumps to help debug hangs in CI.
- Enables faulthandler to dump Python stack traces on fatal signals
- Installs handlers for SIGUSR1/2 to dump all thread stacks on demand
- Starts a background thread that periodically dumps stacks
"""
# Enable faulthandler for automatic stack dumps
faulthandler.enable()
def dump_all_stacks(signum, frame): # type: ignore[no-redef]
print(f"\n🔥 [HANG DEBUG] SIGNAL {signum} - DUMPING ALL THREAD STACKS:")
faulthandler.dump_traceback()
# Also dump current frames manually for completeness
for thread_id, thread_frame in sys._current_frames().items():
print(f"\n📍 Thread {thread_id}:")
traceback.print_stack(thread_frame)
def periodic_stack_dump() -> None:
"""Periodically dump stacks to catch where the process is stuck."""
start_time = time.time()
while True:
time.sleep(120) # Check every 2 minutes
elapsed = time.time() - start_time
print(f"\n⏰ [HANG DEBUG] Periodic check at {elapsed:.1f}s elapsed:")
# Check for hanging processes and dump stacks
try:
import subprocess
# Check for embedding servers that might be hanging
result = subprocess.run(
["pgrep", "-f", "embedding_server"], capture_output=True, text=True, timeout=5
)
if result.stdout.strip():
print(
f"📍 [HANG DEBUG] Found embedding server processes: {result.stdout.strip()}"
)
# Check for zmq processes
result = subprocess.run(
["pgrep", "-f", "zmq"], capture_output=True, text=True, timeout=5
)
if result.stdout.strip():
print(f"📍 [HANG DEBUG] Found zmq processes: {result.stdout.strip()}")
except Exception as e:
print(f"📍 [HANG DEBUG] Process check failed: {e}")
# Dump thread stacks every 4 minutes
if elapsed > 240 and int(elapsed) % 240 < 120:
print(f"\n⚠️ [HANG DEBUG] Stack dump at {elapsed:.1f}s:")
for thread_id, thread_frame in sys._current_frames().items():
print(f"\n📍 Thread {thread_id}:")
traceback.print_stack(thread_frame)
# Emergency exit after 8 minutes (should be handled by wrapper timeout)
if elapsed > 480:
print(
f"\n💥 [HANG DEBUG] Emergency exit after {elapsed:.1f}s - pytest taking too long!"
)
faulthandler.dump_traceback()
# Try to cleanup before exit
try:
import subprocess
subprocess.run(["pkill", "-9", "-f", "embedding_server"], timeout=2)
subprocess.run(["pkill", "-9", "-f", "zmq"], timeout=2)
except Exception:
pass
import os
os._exit(124) # Force exit with timeout code
# Register signal handlers for external debugging
signal.signal(signal.SIGUSR1, dump_all_stacks)
signal.signal(signal.SIGUSR2, dump_all_stacks)
# Start periodic dumping thread
dump_thread = threading.Thread(target=periodic_stack_dump, daemon=True)
dump_thread.start()
def main(argv: list[str]) -> int:
setup_hang_detection()
# Re-exec pytest with debugging enabled
# Use Popen for better control over the subprocess
print(f"🚀 [DEBUG] Starting pytest with args: {argv}")
try:
# Use Popen for non-blocking execution
process = subprocess.Popen(
[sys.executable, "-m", "pytest", *argv],
stdout=sys.stdout,
stderr=sys.stderr,
# Use separate process group to avoid signal inheritance issues
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
)
# Monitor the process with a reasonable timeout
start_time = time.time()
timeout = 600 # 10 minutes
poll_interval = 5 # seconds
while True:
# Check if process has completed
return_code = process.poll()
if return_code is not None:
print(f"✅ [DEBUG] Pytest completed with return code: {return_code}")
return return_code
# Check for timeout
elapsed = time.time() - start_time
if elapsed > timeout:
print(f"💥 [DEBUG] Pytest timed out after {elapsed:.1f}s, terminating...")
try:
# Try graceful termination first
process.terminate()
try:
process.wait(timeout=10)
except subprocess.TimeoutExpired:
# Force kill if still running
process.kill()
process.wait()
# Cleanup any remaining processes
subprocess.run(["pkill", "-9", "-f", "pytest"], timeout=5)
subprocess.run(["pkill", "-9", "-f", "embedding_server"], timeout=5)
except Exception:
pass
return 124 # timeout exit code
# Wait before next check
time.sleep(poll_interval)
except Exception as e:
print(f"💥 [DEBUG] Error running pytest: {e}")
# Cleanup on error
try:
subprocess.run(["pkill", "-9", "-f", "pytest"], timeout=5)
subprocess.run(["pkill", "-9", "-f", "embedding_server"], timeout=5)
except Exception:
pass
return 1
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

View File

@@ -1,199 +0,0 @@
#!/usr/bin/env python3
"""
CI pytest wrapper with comprehensive hang detection and cleanup.
Designed to prevent CI hangs due to subprocess or cleanup issues.
"""
import os
import signal
import subprocess
import sys
import time
def cleanup_all_processes():
"""Aggressively cleanup all related processes."""
print("🧹 [CLEANUP] Performing aggressive cleanup...")
# Kill by pattern - use separate calls to avoid shell injection
# Avoid killing ourselves by being more specific
patterns = [
"embedding_server",
"hnsw_embedding",
"zmq",
]
for pattern in patterns:
try:
subprocess.run(["pkill", "-9", "-f", pattern], timeout=5, capture_output=True)
except Exception:
pass
# Clean up specific pytest processes but NOT the wrapper itself
try:
result = subprocess.run(["ps", "aux"], capture_output=True, text=True, timeout=5)
lines = result.stdout.split("\n")
current_pid = str(os.getpid())
for line in lines:
# Skip our own process
if current_pid in line:
continue
# Only kill actual pytest processes, not wrapper processes
if (
"python" in line
and "pytest" in line
and "ci_pytest_wrapper.py" not in line
and "ci_debug_pytest.py" not in line
):
try:
pid = line.split()[1]
subprocess.run(["kill", "-9", pid], timeout=2)
except Exception:
pass
except Exception:
pass
print("🧹 [CLEANUP] Cleanup completed")
def run_pytest_with_monitoring(pytest_args):
"""Run pytest with comprehensive monitoring and timeout handling."""
# Pre-test cleanup
print("🧹 [WRAPPER] Pre-test cleanup...")
cleanup_all_processes()
time.sleep(2)
# Show pre-test state
print("📊 [WRAPPER] Pre-test process state:")
try:
result = subprocess.run(["ps", "aux"], capture_output=True, text=True, timeout=5)
relevant_lines = [
line
for line in result.stdout.split("\n")
if "python" in line or "embedding" in line or "zmq" in line
]
if relevant_lines:
for line in relevant_lines[:5]: # Show first 5 matches
print(f" {line}")
else:
print(" No relevant processes found")
except Exception:
print(" Process check failed")
# Setup signal handlers for cleanup
def signal_handler(signum, frame):
print(f"\n💥 [WRAPPER] Received signal {signum}, cleaning up...")
cleanup_all_processes()
sys.exit(128 + signum)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
# Run pytest with monitoring
print(f"🚀 [WRAPPER] Starting pytest with args: {pytest_args}")
try:
# Use Popen for better control
cmd = [sys.executable, "scripts/ci_debug_pytest.py", *pytest_args]
process = subprocess.Popen(
cmd,
stdout=sys.stdout,
stderr=sys.stderr,
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
)
# Monitor with timeout
start_time = time.time()
timeout = 600 # 10 minutes
monitor_interval = 10 # Check every 10 seconds
while True:
# Check if process completed
return_code = process.poll()
if return_code is not None:
print(f"✅ [WRAPPER] Pytest completed with return code: {return_code}")
break
# Check for timeout
elapsed = time.time() - start_time
if elapsed > timeout:
print(f"💥 [WRAPPER] Pytest timed out after {elapsed:.1f}s")
# Try graceful termination
try:
print("🔄 [WRAPPER] Attempting graceful termination...")
process.terminate()
try:
process.wait(timeout=10)
except subprocess.TimeoutExpired:
print("💀 [WRAPPER] Graceful termination failed, force killing...")
process.kill()
process.wait()
except Exception as e:
print(f"⚠️ [WRAPPER] Error during termination: {e}")
return_code = 124 # timeout exit code
break
# Monitor progress
if int(elapsed) % 30 == 0: # Every 30 seconds
print(f"📊 [WRAPPER] Monitor check: {elapsed:.0f}s elapsed, pytest still running")
time.sleep(monitor_interval)
# Post-test cleanup verification
print("🔍 [WRAPPER] Post-test cleanup verification...")
time.sleep(2)
try:
result = subprocess.run(["ps", "aux"], capture_output=True, text=True, timeout=5)
lines = result.stdout.split("\n")
current_pid = str(os.getpid())
remaining = []
for line in lines:
# Skip our own wrapper process
if current_pid in line or "ci_pytest_wrapper.py" in line:
continue
# Only check for actual problematic processes
if "python" in line and ("pytest" in line or "embedding" in line):
# Skip debug script too
if "ci_debug_pytest.py" not in line:
remaining.append(line)
if remaining:
print(f"⚠️ [WRAPPER] Found {len(remaining)} remaining processes:")
for line in remaining[:3]: # Show first 3
print(f" {line}")
print("💀 [WRAPPER] Performing final cleanup...")
cleanup_all_processes()
else:
print("✅ [WRAPPER] No remaining processes found")
except Exception:
print("⚠️ [WRAPPER] Post-test verification failed, performing cleanup anyway")
cleanup_all_processes()
return return_code
except Exception as e:
print(f"💥 [WRAPPER] Error running pytest: {e}")
cleanup_all_processes()
return 1
def main():
"""Main entry point."""
if len(sys.argv) < 2:
print("Usage: ci_pytest_wrapper.py <pytest_args...>")
return 1
pytest_args = sys.argv[1:]
print(f"🎯 [WRAPPER] CI pytest wrapper starting with args: {pytest_args}")
return run_pytest_with_monitoring(pytest_args)
if __name__ == "__main__":
sys.exit(main())