refactor: remove upterm/tmate debug code and clean CI workflow

- Remove all upterm/tmate SSH debugging infrastructure - Restore clean CI workflow from main branch - Remove diagnostic script that was only for SSH debugging - Keep valuable DiskANN and HNSW backend improvements This provides a clean base to add targeted pytest hang debugging without the complexity of SSH sessions. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-12 00:31:27 -07:00
parent fdf47852f0
commit 341141cf8b
2 changed files with 4 additions and 318 deletions
--- a/.github/workflows/build-reusable.yml
+++ b/.github/workflows/build-reusable.yml
@@ -8,11 +8,6 @@ on:
        required: false
        type: string
        default: ''
-      debug_enabled:
-        description: 'Enable tmate debugging session for troubleshooting'
-        required: false
-        type: boolean
-        default: false

 jobs:
  lint:
@@ -33,7 +28,7 @@ jobs:

      - name: Install ruff
        run: |
-          uv tool install ruff==0.12.7
+          uv tool install ruff

      - name: Run ruff check
        run: |
@@ -190,15 +185,10 @@ jobs:
          fi
          cd ../..

-          # Repair DiskANN wheel - use show first to debug
+          # Repair DiskANN wheel
          cd packages/leann-backend-diskann
          if [ -d dist ]; then
-            echo "Checking DiskANN wheel contents before repair:"
-            unzip -l dist/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found"
-            auditwheel show dist/*.whl || echo "auditwheel show failed"
            auditwheel repair dist/*.whl -w dist_repaired
-            echo "Checking DiskANN wheel contents after repair:"
-            unzip -l dist_repaired/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found after repair"
            rm -rf dist
            mv dist_repaired dist
          fi
@@ -246,12 +236,6 @@ jobs:
          # Install test dependencies using extras
          uv pip install -e ".[test]"

-          # Debug: Check if _diskannpy module is installed correctly
-          echo "Checking installed DiskANN module structure:"
-          python -c "import leann_backend_diskann; print('leann_backend_diskann location:', leann_backend_diskann.__file__)" || echo "Failed to import leann_backend_diskann"
-          python -c "from leann_backend_diskann import _diskannpy; print('_diskannpy imported successfully')" || echo "Failed to import _diskannpy"
-          ls -la $(python -c "import leann_backend_diskann; import os; print(os.path.dirname(leann_backend_diskann.__file__))" 2>/dev/null) 2>/dev/null || echo "Failed to list module directory"
-
      - name: Run tests with pytest
        env:
          CI: true  # Mark as CI environment to skip memory-intensive tests
@@ -265,203 +249,8 @@ jobs:
          # Activate virtual environment
          source .venv/bin/activate || source .venv/Scripts/activate

-          # Debug: Show debug_enabled value
-          echo "🔍 DEBUG_ENABLED value: '${{ inputs.debug_enabled }}'"
-          echo "🔍 EVENT NAME: '${{ github.event_name }}'"
-          echo "🔍 COMMIT MESSAGE: '${{ github.event.head_commit.message }}'"
-          echo "🔍 Contains [debug]: '${{ contains(github.event.head_commit.message, '[debug]') }}'"
-          echo "🔍 GITHUB REF: '${{ github.ref }}'"
-          echo "🔍 GITHUB HEAD_REF: '${{ github.head_ref }}'"
-          
-          # Start tmate session INSIDE the test step if debug enabled
-          # FORCE DEBUG MODE - Always enable on this debug branch
-          DEBUG_MODE=true
-          echo "✅ DEBUG MODE FORCED ON - Investigation branch"
-          
-          if [[ "$DEBUG_MODE" == "true" ]]; then
-            echo "🔧 DEBUG MODE: Starting tmate session before tests..."
-            # Install tmate if not available
-            if ! command -v tmate &> /dev/null; then
-              if [[ "$RUNNER_OS" == "Linux" ]]; then
-                sudo apt-get update && sudo apt-get install -y tmate
-              elif [[ "$RUNNER_OS" == "macOS" ]]; then
-                brew install tmate
-              fi
-            fi
-            
-            # Start tmate session in background
-            echo "Starting tmate session..."
-            tmate -S debug-session new-session -d 
-            
-            # Wait for tmate to initialize and get connection info
-            echo "Waiting for tmate to initialize..."
-            sleep 5
-            
-            # Try multiple times to get connection info
-            for i in {1..10}; do
-              SSH_INFO=$(tmate -S debug-session display -p '#{tmate_ssh}' 2>/dev/null || echo "")
-              WEB_INFO=$(tmate -S debug-session display -p '#{tmate_web}' 2>/dev/null || echo "")
-              
-              if [[ -n "$SSH_INFO" && "$SSH_INFO" != "connecting..." ]]; then
-                echo "🔗 SSH: $SSH_INFO"
-                echo "🔗 Web: $WEB_INFO"
-                break
-              fi
-              
-              echo "Attempt $i: Still connecting... (SSH: '$SSH_INFO')"
-              sleep 2
-            done
-            echo "⏱️  Session will timeout after 30 minutes"
-            echo "💡 You can now SSH in and run: pytest tests/ -vv --capture=no"
-            echo "💡 Or run diagnostics: bash scripts/diagnose_hang.sh"
-            echo ""
-            echo "Waiting 60 seconds for you to connect..."
-            sleep 60
-          fi
-
-          # Define comprehensive diagnostic function
-          diag() {
-            echo "===== COMPREHENSIVE DIAGNOSTICS BEGIN ====="
-            date
-            echo ""
-            echo "### Current Shell Info ###"
-            echo "Shell PID: $$"
-            echo "Shell PPID: $PPID"
-            echo "Current directory: $(pwd)"
-            echo ""
-
-            echo "### Process Tree (full) ###"
-            pstree -ap 2>/dev/null || ps auxf || true
-            echo ""
-
-            echo "### All Python/Pytest Processes ###"
-            ps -ef | grep -E 'python|pytest' | grep -v grep || true
-            echo ""
-
-            echo "### Embedding Server Processes ###"
-            ps -ef | grep -E 'embedding|zmq|diskann' | grep -v grep || true
-            echo ""
-
-            echo "### Network Listeners ###"
-            ss -ltnp 2>/dev/null || netstat -ltn 2>/dev/null || true
-            echo ""
-
-            echo "### Open File Descriptors (lsof) ###"
-            lsof -p $$ 2>/dev/null | head -20 || true
-            echo ""
-
-            echo "### Zombie Processes ###"
-            ps aux | grep '<defunct>' || echo "No zombie processes"
-            echo ""
-
-            echo "### Current Jobs ###"
-            jobs -l || true
-            echo ""
-
-            echo "### /proc/PID/fd for current shell ###"
-            ls -la /proc/$$/fd 2>/dev/null || true
-            echo ""
-
-            echo "===== COMPREHENSIVE DIAGNOSTICS END ====="
-          }
-
-                    # Enable verbose logging for debugging
-          export PYTHONUNBUFFERED=1
-          export PYTEST_CURRENT_TEST=1
-
-          # Run all tests with extensive logging
-          if [[ "$RUNNER_OS" == "Linux" ]]; then
-            echo "🚀 Starting Linux test execution with timeout..."
-            echo "Current time: $(date)"
-            echo "Shell PID: $$"
-            echo "Python: $(python --version)"
-            echo "Pytest: $(pytest --version)"
-
-            # Show environment variables for debugging
-            echo "📦 Environment variables:"
-            env | grep -E "PYTHON|PYTEST|CI|RUNNER" | sort
-
-            # Set trap for diagnostics
-            trap diag INT TERM EXIT
-
-            echo "📋 Pre-test diagnostics:"
-            ps -ef | grep -E 'python|pytest' | grep -v grep || echo "No python/pytest processes before test"
-
-            # Check for any listening ports before test
-            echo "🔌 Pre-test network state:"
-            ss -ltn 2>/dev/null | grep -E "555[0-9]|556[0-9]" || echo "No embedding server ports open"
-
-            echo "🏃 Running pytest with 180s timeout..."
-            timeout --preserve-status --signal=INT --kill-after=10 180 bash -c '
-              echo "⏱️ Pytest starting at: $(date)"
-              echo "Running command: pytest tests/ -vv --maxfail=3 --tb=short --capture=no"
-
-              # Run pytest with maximum verbosity and no output capture
-              pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=DEBUG 2>&1 | tee pytest.log
-              PYTEST_EXIT=${PIPESTATUS[0]}
-
-              echo "✅ Pytest finished at: $(date) with exit code: $PYTEST_EXIT"
-              echo "Last 20 lines of pytest output:"
-              tail -20 pytest.log || true
-
-              # Immediately check for leftover processes
-              echo "🔍 Post-pytest process check:"
-              ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "No leftover processes"
-
-              # Clean up any children before exit
-              echo "🧹 Cleaning up child processes..."
-              pkill -TERM -P $$ 2>/dev/null || true
-              sleep 0.5
-              pkill -KILL -P $$ 2>/dev/null || true
-
-              echo "📊 Final check before exit:"
-              ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "All clean"
-
-              exit $PYTEST_EXIT
-            '
-
-            EXIT_CODE=$?
-            echo "🔚 Timeout command exited with code: $EXIT_CODE"
-
-                        if [ $EXIT_CODE -eq 124 ]; then
-              echo "⚠️ TIMEOUT TRIGGERED - Tests took more than 180 seconds!"
-              echo "📸 Capturing full diagnostics..."
-              diag
-
-              # Run diagnostic script if available
-              if [ -f scripts/diagnose_hang.sh ]; then
-                echo "🔍 Running diagnostic script..."
-                bash scripts/diagnose_hang.sh || true
-              fi
-
-              # More aggressive cleanup
-              echo "💀 Killing all Python processes owned by runner..."
-              pkill -9 -u runner python || true
-              pkill -9 -u runner pytest || true
-            elif [ $EXIT_CODE -ne 0 ]; then
-              echo "❌ Tests failed with exit code: $EXIT_CODE"
-            else
-              echo "✅ All tests passed!"
-            fi
-
-                        # Always show final state
-            echo "📍 Final state check:"
-            ps -ef | grep -E 'python|pytest|embedding' | grep -v grep || echo "No Python processes remaining"
-
-            exit $EXIT_CODE
-          else
-            # For macOS/Windows, run without GNU timeout
-            echo "🚀 Running tests on $RUNNER_OS..."
-            pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=INFO
-          fi
-
-      # Provide tmate session on test failure for debugging
-      - name: Setup tmate session on failure
-        if: ${{ failure() && (inputs.debug_enabled || contains(github.event.head_commit.message, '[debug]')) }}
-        uses: mxschmitt/action-tmate@v3
-        with:
-          timeout-minutes: 30
-          limit-access-to-actor: true
+          # Run all tests
+          pytest tests/

      - name: Run sanity checks (optional)
        run: |