diff --git a/.github/workflows/build-and-publish.yml b/.github/workflows/build-and-publish.yml index bf076d3..058b0a5 100644 --- a/.github/workflows/build-and-publish.yml +++ b/.github/workflows/build-and-publish.yml @@ -5,7 +5,16 @@ on: branches: [ main ] pull_request: branches: [ main ] + workflow_dispatch: + inputs: + debug_enabled: + type: boolean + description: 'Run with tmate debugging enabled (SSH access to runner)' + required: false + default: false jobs: build: uses: ./.github/workflows/build-reusable.yml + with: + debug_enabled: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled || false }} diff --git a/.github/workflows/build-reusable.yml b/.github/workflows/build-reusable.yml index 30a44e9..0ab701c 100644 --- a/.github/workflows/build-reusable.yml +++ b/.github/workflows/build-reusable.yml @@ -8,6 +8,11 @@ on: required: false type: string default: '' + debug_enabled: + description: 'Enable tmate debugging session for troubleshooting' + required: false + type: boolean + default: false jobs: lint: @@ -223,6 +228,14 @@ jobs: python -c "from leann_backend_diskann import _diskannpy; print('_diskannpy imported successfully')" || echo "Failed to import _diskannpy" ls -la $(python -c "import leann_backend_diskann; import os; print(os.path.dirname(leann_backend_diskann.__file__))" 2>/dev/null) 2>/dev/null || echo "Failed to list module directory" + # Enable tmate debugging session if requested + - name: Setup tmate session for debugging + if: ${{ inputs.debug_enabled }} + uses: mxschmitt/action-tmate@v3 + with: + detached: true + timeout-minutes: 30 + limit-access-to-actor: true - name: Run tests with pytest env: CI: true # Mark as CI environment to skip memory-intensive tests @@ -236,8 +249,149 @@ jobs: # Activate virtual environment source .venv/bin/activate || source .venv/Scripts/activate - # Run all tests - pytest tests/ + # Define comprehensive diagnostic function + diag() { + echo "===== COMPREHENSIVE DIAGNOSTICS BEGIN =====" + date + echo "" + echo "### Current Shell Info ###" + echo "Shell PID: $$" + echo "Shell PPID: $PPID" + echo "Current directory: $(pwd)" + echo "" + + echo "### Process Tree (full) ###" + pstree -ap 2>/dev/null || ps auxf || true + echo "" + + echo "### All Python/Pytest Processes ###" + ps -ef | grep -E 'python|pytest' | grep -v grep || true + echo "" + + echo "### Embedding Server Processes ###" + ps -ef | grep -E 'embedding|zmq|diskann' | grep -v grep || true + echo "" + + echo "### Network Listeners ###" + ss -ltnp 2>/dev/null || netstat -ltn 2>/dev/null || true + echo "" + + echo "### Open File Descriptors (lsof) ###" + lsof -p $$ 2>/dev/null | head -20 || true + echo "" + + echo "### Zombie Processes ###" + ps aux | grep '' || echo "No zombie processes" + echo "" + + echo "### Current Jobs ###" + jobs -l || true + echo "" + + echo "### /proc/PID/fd for current shell ###" + ls -la /proc/$$/fd 2>/dev/null || true + echo "" + + echo "===== COMPREHENSIVE DIAGNOSTICS END =====" + } + + # Enable verbose logging for debugging + export PYTHONUNBUFFERED=1 + export PYTEST_CURRENT_TEST=1 + + # Run all tests with extensive logging + if [[ "$RUNNER_OS" == "Linux" ]]; then + echo "๐Ÿš€ Starting Linux test execution with timeout..." + echo "Current time: $(date)" + echo "Shell PID: $$" + echo "Python: $(python --version)" + echo "Pytest: $(pytest --version)" + + # Show environment variables for debugging + echo "๐Ÿ“ฆ Environment variables:" + env | grep -E "PYTHON|PYTEST|CI|RUNNER" | sort + + # Set trap for diagnostics + trap diag INT TERM EXIT + + echo "๐Ÿ“‹ Pre-test diagnostics:" + ps -ef | grep -E 'python|pytest' | grep -v grep || echo "No python/pytest processes before test" + + # Check for any listening ports before test + echo "๐Ÿ”Œ Pre-test network state:" + ss -ltn 2>/dev/null | grep -E "555[0-9]|556[0-9]" || echo "No embedding server ports open" + + echo "๐Ÿƒ Running pytest with 180s timeout..." + timeout --preserve-status --signal=INT --kill-after=10 180 bash -c ' + echo "โฑ๏ธ Pytest starting at: $(date)" + echo "Running command: pytest tests/ -vv --maxfail=3 --tb=short --capture=no" + + # Run pytest with maximum verbosity and no output capture + pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=DEBUG 2>&1 | tee pytest.log + PYTEST_EXIT=${PIPESTATUS[0]} + + echo "โœ… Pytest finished at: $(date) with exit code: $PYTEST_EXIT" + echo "Last 20 lines of pytest output:" + tail -20 pytest.log || true + + # Immediately check for leftover processes + echo "๐Ÿ” Post-pytest process check:" + ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "No leftover processes" + + # Clean up any children before exit + echo "๐Ÿงน Cleaning up child processes..." + pkill -TERM -P $$ 2>/dev/null || true + sleep 0.5 + pkill -KILL -P $$ 2>/dev/null || true + + echo "๐Ÿ“Š Final check before exit:" + ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "All clean" + + exit $PYTEST_EXIT + ' + + EXIT_CODE=$? + echo "๐Ÿ”š Timeout command exited with code: $EXIT_CODE" + + if [ $EXIT_CODE -eq 124 ]; then + echo "โš ๏ธ TIMEOUT TRIGGERED - Tests took more than 180 seconds!" + echo "๐Ÿ“ธ Capturing full diagnostics..." + diag + + # Run diagnostic script if available + if [ -f scripts/diagnose_hang.sh ]; then + echo "๐Ÿ” Running diagnostic script..." + bash scripts/diagnose_hang.sh || true + fi + + # More aggressive cleanup + echo "๐Ÿ’€ Killing all Python processes owned by runner..." + pkill -9 -u runner python || true + pkill -9 -u runner pytest || true + elif [ $EXIT_CODE -ne 0 ]; then + echo "โŒ Tests failed with exit code: $EXIT_CODE" + else + echo "โœ… All tests passed!" + fi + + # Always show final state + echo "๐Ÿ“ Final state check:" + ps -ef | grep -E 'python|pytest|embedding' | grep -v grep || echo "No Python processes remaining" + + exit $EXIT_CODE + else + # For macOS/Windows, run without GNU timeout + echo "๐Ÿš€ Running tests on $RUNNER_OS..." + pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=INFO + fi + + # Provide tmate session on test failure for debugging + - name: Setup tmate session on failure + if: ${{ failure() && (inputs.debug_enabled || contains(github.event.head_commit.message, '[debug]')) }} + uses: mxschmitt/action-tmate@v3 + with: + timeout-minutes: 30 + limit-access-to-actor: true - name: Run sanity checks (optional) run: |