refactor: remove upterm/tmate debug code and clean CI workflow
- Remove all upterm/tmate SSH debugging infrastructure - Restore clean CI workflow from main branch - Remove diagnostic script that was only for SSH debugging - Keep valuable DiskANN and HNSW backend improvements This provides a clean base to add targeted pytest hang debugging without the complexity of SSH sessions. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
219
.github/workflows/build-reusable.yml
vendored
219
.github/workflows/build-reusable.yml
vendored
@@ -8,11 +8,6 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
default: ''
|
||||
debug_enabled:
|
||||
description: 'Enable tmate debugging session for troubleshooting'
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
@@ -33,7 +28,7 @@ jobs:
|
||||
|
||||
- name: Install ruff
|
||||
run: |
|
||||
uv tool install ruff==0.12.7
|
||||
uv tool install ruff
|
||||
|
||||
- name: Run ruff check
|
||||
run: |
|
||||
@@ -190,15 +185,10 @@ jobs:
|
||||
fi
|
||||
cd ../..
|
||||
|
||||
# Repair DiskANN wheel - use show first to debug
|
||||
# Repair DiskANN wheel
|
||||
cd packages/leann-backend-diskann
|
||||
if [ -d dist ]; then
|
||||
echo "Checking DiskANN wheel contents before repair:"
|
||||
unzip -l dist/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found"
|
||||
auditwheel show dist/*.whl || echo "auditwheel show failed"
|
||||
auditwheel repair dist/*.whl -w dist_repaired
|
||||
echo "Checking DiskANN wheel contents after repair:"
|
||||
unzip -l dist_repaired/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found after repair"
|
||||
rm -rf dist
|
||||
mv dist_repaired dist
|
||||
fi
|
||||
@@ -246,12 +236,6 @@ jobs:
|
||||
# Install test dependencies using extras
|
||||
uv pip install -e ".[test]"
|
||||
|
||||
# Debug: Check if _diskannpy module is installed correctly
|
||||
echo "Checking installed DiskANN module structure:"
|
||||
python -c "import leann_backend_diskann; print('leann_backend_diskann location:', leann_backend_diskann.__file__)" || echo "Failed to import leann_backend_diskann"
|
||||
python -c "from leann_backend_diskann import _diskannpy; print('_diskannpy imported successfully')" || echo "Failed to import _diskannpy"
|
||||
ls -la $(python -c "import leann_backend_diskann; import os; print(os.path.dirname(leann_backend_diskann.__file__))" 2>/dev/null) 2>/dev/null || echo "Failed to list module directory"
|
||||
|
||||
- name: Run tests with pytest
|
||||
env:
|
||||
CI: true # Mark as CI environment to skip memory-intensive tests
|
||||
@@ -265,203 +249,8 @@ jobs:
|
||||
# Activate virtual environment
|
||||
source .venv/bin/activate || source .venv/Scripts/activate
|
||||
|
||||
# Debug: Show debug_enabled value
|
||||
echo "🔍 DEBUG_ENABLED value: '${{ inputs.debug_enabled }}'"
|
||||
echo "🔍 EVENT NAME: '${{ github.event_name }}'"
|
||||
echo "🔍 COMMIT MESSAGE: '${{ github.event.head_commit.message }}'"
|
||||
echo "🔍 Contains [debug]: '${{ contains(github.event.head_commit.message, '[debug]') }}'"
|
||||
echo "🔍 GITHUB REF: '${{ github.ref }}'"
|
||||
echo "🔍 GITHUB HEAD_REF: '${{ github.head_ref }}'"
|
||||
|
||||
# Start tmate session INSIDE the test step if debug enabled
|
||||
# FORCE DEBUG MODE - Always enable on this debug branch
|
||||
DEBUG_MODE=true
|
||||
echo "✅ DEBUG MODE FORCED ON - Investigation branch"
|
||||
|
||||
if [[ "$DEBUG_MODE" == "true" ]]; then
|
||||
echo "🔧 DEBUG MODE: Starting tmate session before tests..."
|
||||
# Install tmate if not available
|
||||
if ! command -v tmate &> /dev/null; then
|
||||
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
||||
sudo apt-get update && sudo apt-get install -y tmate
|
||||
elif [[ "$RUNNER_OS" == "macOS" ]]; then
|
||||
brew install tmate
|
||||
fi
|
||||
fi
|
||||
|
||||
# Start tmate session in background
|
||||
echo "Starting tmate session..."
|
||||
tmate -S debug-session new-session -d
|
||||
|
||||
# Wait for tmate to initialize and get connection info
|
||||
echo "Waiting for tmate to initialize..."
|
||||
sleep 5
|
||||
|
||||
# Try multiple times to get connection info
|
||||
for i in {1..10}; do
|
||||
SSH_INFO=$(tmate -S debug-session display -p '#{tmate_ssh}' 2>/dev/null || echo "")
|
||||
WEB_INFO=$(tmate -S debug-session display -p '#{tmate_web}' 2>/dev/null || echo "")
|
||||
|
||||
if [[ -n "$SSH_INFO" && "$SSH_INFO" != "connecting..." ]]; then
|
||||
echo "🔗 SSH: $SSH_INFO"
|
||||
echo "🔗 Web: $WEB_INFO"
|
||||
break
|
||||
fi
|
||||
|
||||
echo "Attempt $i: Still connecting... (SSH: '$SSH_INFO')"
|
||||
sleep 2
|
||||
done
|
||||
echo "⏱️ Session will timeout after 30 minutes"
|
||||
echo "💡 You can now SSH in and run: pytest tests/ -vv --capture=no"
|
||||
echo "💡 Or run diagnostics: bash scripts/diagnose_hang.sh"
|
||||
echo ""
|
||||
echo "Waiting 60 seconds for you to connect..."
|
||||
sleep 60
|
||||
fi
|
||||
|
||||
# Define comprehensive diagnostic function
|
||||
diag() {
|
||||
echo "===== COMPREHENSIVE DIAGNOSTICS BEGIN ====="
|
||||
date
|
||||
echo ""
|
||||
echo "### Current Shell Info ###"
|
||||
echo "Shell PID: $$"
|
||||
echo "Shell PPID: $PPID"
|
||||
echo "Current directory: $(pwd)"
|
||||
echo ""
|
||||
|
||||
echo "### Process Tree (full) ###"
|
||||
pstree -ap 2>/dev/null || ps auxf || true
|
||||
echo ""
|
||||
|
||||
echo "### All Python/Pytest Processes ###"
|
||||
ps -ef | grep -E 'python|pytest' | grep -v grep || true
|
||||
echo ""
|
||||
|
||||
echo "### Embedding Server Processes ###"
|
||||
ps -ef | grep -E 'embedding|zmq|diskann' | grep -v grep || true
|
||||
echo ""
|
||||
|
||||
echo "### Network Listeners ###"
|
||||
ss -ltnp 2>/dev/null || netstat -ltn 2>/dev/null || true
|
||||
echo ""
|
||||
|
||||
echo "### Open File Descriptors (lsof) ###"
|
||||
lsof -p $$ 2>/dev/null | head -20 || true
|
||||
echo ""
|
||||
|
||||
echo "### Zombie Processes ###"
|
||||
ps aux | grep '<defunct>' || echo "No zombie processes"
|
||||
echo ""
|
||||
|
||||
echo "### Current Jobs ###"
|
||||
jobs -l || true
|
||||
echo ""
|
||||
|
||||
echo "### /proc/PID/fd for current shell ###"
|
||||
ls -la /proc/$$/fd 2>/dev/null || true
|
||||
echo ""
|
||||
|
||||
echo "===== COMPREHENSIVE DIAGNOSTICS END ====="
|
||||
}
|
||||
|
||||
# Enable verbose logging for debugging
|
||||
export PYTHONUNBUFFERED=1
|
||||
export PYTEST_CURRENT_TEST=1
|
||||
|
||||
# Run all tests with extensive logging
|
||||
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
||||
echo "🚀 Starting Linux test execution with timeout..."
|
||||
echo "Current time: $(date)"
|
||||
echo "Shell PID: $$"
|
||||
echo "Python: $(python --version)"
|
||||
echo "Pytest: $(pytest --version)"
|
||||
|
||||
# Show environment variables for debugging
|
||||
echo "📦 Environment variables:"
|
||||
env | grep -E "PYTHON|PYTEST|CI|RUNNER" | sort
|
||||
|
||||
# Set trap for diagnostics
|
||||
trap diag INT TERM EXIT
|
||||
|
||||
echo "📋 Pre-test diagnostics:"
|
||||
ps -ef | grep -E 'python|pytest' | grep -v grep || echo "No python/pytest processes before test"
|
||||
|
||||
# Check for any listening ports before test
|
||||
echo "🔌 Pre-test network state:"
|
||||
ss -ltn 2>/dev/null | grep -E "555[0-9]|556[0-9]" || echo "No embedding server ports open"
|
||||
|
||||
echo "🏃 Running pytest with 180s timeout..."
|
||||
timeout --preserve-status --signal=INT --kill-after=10 180 bash -c '
|
||||
echo "⏱️ Pytest starting at: $(date)"
|
||||
echo "Running command: pytest tests/ -vv --maxfail=3 --tb=short --capture=no"
|
||||
|
||||
# Run pytest with maximum verbosity and no output capture
|
||||
pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=DEBUG 2>&1 | tee pytest.log
|
||||
PYTEST_EXIT=${PIPESTATUS[0]}
|
||||
|
||||
echo "✅ Pytest finished at: $(date) with exit code: $PYTEST_EXIT"
|
||||
echo "Last 20 lines of pytest output:"
|
||||
tail -20 pytest.log || true
|
||||
|
||||
# Immediately check for leftover processes
|
||||
echo "🔍 Post-pytest process check:"
|
||||
ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "No leftover processes"
|
||||
|
||||
# Clean up any children before exit
|
||||
echo "🧹 Cleaning up child processes..."
|
||||
pkill -TERM -P $$ 2>/dev/null || true
|
||||
sleep 0.5
|
||||
pkill -KILL -P $$ 2>/dev/null || true
|
||||
|
||||
echo "📊 Final check before exit:"
|
||||
ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "All clean"
|
||||
|
||||
exit $PYTEST_EXIT
|
||||
'
|
||||
|
||||
EXIT_CODE=$?
|
||||
echo "🔚 Timeout command exited with code: $EXIT_CODE"
|
||||
|
||||
if [ $EXIT_CODE -eq 124 ]; then
|
||||
echo "⚠️ TIMEOUT TRIGGERED - Tests took more than 180 seconds!"
|
||||
echo "📸 Capturing full diagnostics..."
|
||||
diag
|
||||
|
||||
# Run diagnostic script if available
|
||||
if [ -f scripts/diagnose_hang.sh ]; then
|
||||
echo "🔍 Running diagnostic script..."
|
||||
bash scripts/diagnose_hang.sh || true
|
||||
fi
|
||||
|
||||
# More aggressive cleanup
|
||||
echo "💀 Killing all Python processes owned by runner..."
|
||||
pkill -9 -u runner python || true
|
||||
pkill -9 -u runner pytest || true
|
||||
elif [ $EXIT_CODE -ne 0 ]; then
|
||||
echo "❌ Tests failed with exit code: $EXIT_CODE"
|
||||
else
|
||||
echo "✅ All tests passed!"
|
||||
fi
|
||||
|
||||
# Always show final state
|
||||
echo "📍 Final state check:"
|
||||
ps -ef | grep -E 'python|pytest|embedding' | grep -v grep || echo "No Python processes remaining"
|
||||
|
||||
exit $EXIT_CODE
|
||||
else
|
||||
# For macOS/Windows, run without GNU timeout
|
||||
echo "🚀 Running tests on $RUNNER_OS..."
|
||||
pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=INFO
|
||||
fi
|
||||
|
||||
# Provide tmate session on test failure for debugging
|
||||
- name: Setup tmate session on failure
|
||||
if: ${{ failure() && (inputs.debug_enabled || contains(github.event.head_commit.message, '[debug]')) }}
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
with:
|
||||
timeout-minutes: 30
|
||||
limit-access-to-actor: true
|
||||
# Run all tests
|
||||
pytest tests/
|
||||
|
||||
- name: Run sanity checks (optional)
|
||||
run: |
|
||||
|
||||
Reference in New Issue
Block a user