LEANN/.github/workflows/build-reusable.yml

name: Reusable Build

on:
  workflow_call:
    inputs:
      ref:
        description: 'Git ref to build'
        required: false
        type: string
        default: ''
      debug_enabled:
        description: 'Enable tmate debugging session for troubleshooting'
        required: false
        type: boolean
        default: false

jobs:
  lint:
    name: Lint and Format Check
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref }}

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - name: Install uv
        uses: astral-sh/setup-uv@v4

      - name: Install ruff
        run: |
          uv tool install ruff==0.12.7

      - name: Run ruff check
        run: |
          ruff check .

      - name: Run ruff format check
        run: |
          ruff format --check .

  build:
    needs: lint
    name: Build ${{ matrix.os }} Python ${{ matrix.python }}
    strategy:
      matrix:
        include:
          - os: ubuntu-22.04
            python: '3.9'
          - os: ubuntu-22.04
            python: '3.10'
          - os: ubuntu-22.04
            python: '3.11'
          - os: ubuntu-22.04
            python: '3.12'
          - os: ubuntu-22.04
            python: '3.13'
          - os: macos-latest
            python: '3.9'
          - os: macos-latest
            python: '3.10'
          - os: macos-latest
            python: '3.11'
          - os: macos-latest
            python: '3.12'
          - os: macos-latest
            python: '3.13'
    runs-on: ${{ matrix.os }}

    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.ref }}
          submodules: recursive

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python }}

      - name: Install uv
        uses: astral-sh/setup-uv@v4

      - name: Install system dependencies (Ubuntu)
        if: runner.os == 'Linux'
        run: |
          sudo apt-get update
          sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \
            pkg-config libopenblas-dev patchelf libabsl-dev libaio-dev libprotobuf-dev

          # Install Intel MKL for DiskANN
          wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
          sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
          source /opt/intel/oneapi/setvars.sh
          echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
          echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV

      - name: Install system dependencies (macOS)
        if: runner.os == 'macOS'
        run: |
          # Don't install LLVM, use system clang for better compatibility
          brew install libomp boost protobuf zeromq

      - name: Install build dependencies
        run: |
          uv pip install --system scikit-build-core numpy swig Cython pybind11
          if [[ "$RUNNER_OS" == "Linux" ]]; then
            uv pip install --system auditwheel
          else
            uv pip install --system delocate
          fi

      - name: Build packages
        run: |
          # Build core (platform independent) on all platforms for consistency
          cd packages/leann-core
          uv build
          cd ../..

          # Build HNSW backend
          cd packages/leann-backend-hnsw
          if [ "${{ matrix.os }}" == "macos-latest" ]; then
            # Use system clang instead of homebrew LLVM for better compatibility
            export CC=clang
            export CXX=clang++
            export MACOSX_DEPLOYMENT_TARGET=11.0
            uv build --wheel --python python
          else
            uv build --wheel --python python
          fi
          cd ../..

          # Build DiskANN backend
          cd packages/leann-backend-diskann
          if [ "${{ matrix.os }}" == "macos-latest" ]; then
            # Use system clang instead of homebrew LLVM for better compatibility
            export CC=clang
            export CXX=clang++
            # sgesdd_ is only available on macOS 13.3+
            export MACOSX_DEPLOYMENT_TARGET=13.3
            uv build --wheel --python python
          else
            uv build --wheel --python python
          fi
          cd ../..

          # Build meta package (platform independent) on all platforms
          cd packages/leann
          uv build
          cd ../..

      - name: Repair wheels (Linux)
        if: runner.os == 'Linux'
        run: |
          # Repair HNSW wheel
          cd packages/leann-backend-hnsw
          if [ -d dist ]; then
            auditwheel repair dist/*.whl -w dist_repaired
            rm -rf dist
            mv dist_repaired dist
          fi
          cd ../..

          # Repair DiskANN wheel - use show first to debug
          cd packages/leann-backend-diskann
          if [ -d dist ]; then
            echo "Checking DiskANN wheel contents before repair:"
            unzip -l dist/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found"
            auditwheel show dist/*.whl || echo "auditwheel show failed"
            auditwheel repair dist/*.whl -w dist_repaired
            echo "Checking DiskANN wheel contents after repair:"
            unzip -l dist_repaired/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found after repair"
            rm -rf dist
            mv dist_repaired dist
          fi
          cd ../..

      - name: Repair wheels (macOS)
        if: runner.os == 'macOS'
        run: |
          # Repair HNSW wheel
          cd packages/leann-backend-hnsw
          if [ -d dist ]; then
            delocate-wheel -w dist_repaired -v dist/*.whl
            rm -rf dist
            mv dist_repaired dist
          fi
          cd ../..

          # Repair DiskANN wheel
          cd packages/leann-backend-diskann
          if [ -d dist ]; then
            delocate-wheel -w dist_repaired -v dist/*.whl
            rm -rf dist
            mv dist_repaired dist
          fi
          cd ../..

      - name: List built packages
        run: |
          echo "📦 Built packages:"
          find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort

      - name: Install built packages for testing
        run: |
          # Create a virtual environment with the correct Python version
          uv venv --python python${{ matrix.python }}
          source .venv/bin/activate || source .venv/Scripts/activate

          # Install the built wheels directly to ensure we use locally built packages
          # Use only locally built wheels on all platforms for full consistency
          FIND_LINKS="--find-links packages/leann-core/dist --find-links packages/leann/dist"
          FIND_LINKS="$FIND_LINKS --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist"

          uv pip install leann-core leann leann-backend-hnsw leann-backend-diskann \
            $FIND_LINKS --force-reinstall

          # Install test dependencies using extras
          uv pip install -e ".[test]"

          # Debug: Check if _diskannpy module is installed correctly
          echo "Checking installed DiskANN module structure:"
          python -c "import leann_backend_diskann; print('leann_backend_diskann location:', leann_backend_diskann.__file__)" || echo "Failed to import leann_backend_diskann"
          python -c "from leann_backend_diskann import _diskannpy; print('_diskannpy imported successfully')" || echo "Failed to import _diskannpy"
          ls -la $(python -c "import leann_backend_diskann; import os; print(os.path.dirname(leann_backend_diskann.__file__))" 2>/dev/null) 2>/dev/null || echo "Failed to list module directory"

          # Extra debugging for Python 3.13
          if [[ "${{ matrix.python }}" == "3.13" ]]; then
            echo "=== Python 3.13 Debug Info ==="
            echo "Python version details:"
            python --version
            python -c "import sys; print(f'sys.version_info: {sys.version_info}')"

            echo "Pytest version:"
            python -m pytest --version

            echo "Testing basic pytest collection:"
            if [[ "$RUNNER_OS" == "Linux" ]]; then
              timeout --signal=INT 10 python -m pytest --collect-only tests/test_ci_minimal.py -v || echo "Collection timed out or failed"
            else
              # No timeout on macOS/Windows
              python -m pytest --collect-only tests/test_ci_minimal.py -v || echo "Collection failed"
            fi

            echo "Testing single simple test:"
            if [[ "$RUNNER_OS" == "Linux" ]]; then
              timeout --signal=INT 10 python -m pytest tests/test_ci_minimal.py::test_package_imports --full-trace -v || echo "Simple test timed out or failed"
            else
              # No timeout on macOS/Windows
              python -m pytest tests/test_ci_minimal.py::test_package_imports --full-trace -v || echo "Simple test failed"
            fi
          fi

      # Enable tmate debugging session if requested
      - name: Setup tmate session for debugging
        if: ${{ inputs.debug_enabled }}
        uses: mxschmitt/action-tmate@v3
        with:
          detached: true
          timeout-minutes: 30
          limit-access-to-actor: true

      - name: Run tests with pytest
        # Timeout hierarchy:
        # 1. Individual test timeout: 20s (see pyproject.toml markers)
        # 2. Pytest session timeout: 300s (see pyproject.toml [tool.pytest.ini_options])
        # 3. Outer shell timeout: 360s (300s + 60s buffer for cleanup)
        # 4. GitHub Actions job timeout: 6 hours (default)
        env:
          CI: true  # Mark as CI environment to skip memory-intensive tests
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          HF_HUB_DISABLE_SYMLINKS: 1
          TOKENIZERS_PARALLELISM: false
          PYTORCH_ENABLE_MPS_FALLBACK: 0  # Disable MPS on macOS CI to avoid memory issues
          OMP_NUM_THREADS: 1  # Disable OpenMP parallelism to avoid libomp crashes
          MKL_NUM_THREADS: 1  # Single thread for MKL operations
        run: |
          # Activate virtual environment
          source .venv/bin/activate || source .venv/Scripts/activate

          # Define comprehensive diagnostic function
          diag() {
            echo "===== COMPREHENSIVE DIAGNOSTICS BEGIN ====="
            date
            echo ""
            echo "### Current Shell Info ###"
            echo "Shell PID: $$"
            echo "Shell PPID: $PPID"
            echo "Current directory: $(pwd)"
            echo ""

            echo "### Process Tree (full) ###"
            pstree -ap 2>/dev/null || ps auxf || true
            echo ""

            echo "### All Python/Pytest Processes ###"
            ps -ef | grep -E 'python|pytest' | grep -v grep || true
            echo ""

            echo "### Embedding Server Processes ###"
            ps -ef | grep -E 'embedding|zmq|diskann' | grep -v grep || true
            echo ""

            echo "### Network Listeners ###"
            ss -ltnp 2>/dev/null || netstat -ltn 2>/dev/null || true
            echo ""

            echo "### Open File Descriptors (lsof) ###"
            lsof -p $$ 2>/dev/null | head -20 || true
            echo ""

            echo "### Zombie Processes ###"
            ps aux | grep '<defunct>' || echo "No zombie processes"
            echo ""

            echo "### Current Jobs ###"
            jobs -l || true
            echo ""

            echo "### /proc/PID/fd for current shell ###"
            ls -la /proc/$$/fd 2>/dev/null || true
            echo ""

            echo "===== COMPREHENSIVE DIAGNOSTICS END ====="
          }

                    # Enable verbose logging for debugging
          export PYTHONUNBUFFERED=1
          export PYTEST_CURRENT_TEST=1

          # Run all tests with extensive logging
          if [[ "$RUNNER_OS" == "Linux" ]]; then
            echo "🚀 Starting Linux test execution with timeout..."
            echo "Current time: $(date)"
            echo "Shell PID: $$"
            echo "Python: $(python --version)"
            echo "Pytest: $(pytest --version)"

            # Show environment variables for debugging
            echo "📦 Environment variables:"
            env | grep -E "PYTHON|PYTEST|CI|RUNNER" | sort

            # Set trap for diagnostics
            trap diag INT TERM EXIT

            echo "📋 Pre-test diagnostics:"
            ps -ef | grep -E 'python|pytest' | grep -v grep || echo "No python/pytest processes before test"

            # Check for any listening ports before test
            echo "🔌 Pre-test network state:"
            ss -ltn 2>/dev/null | grep -E "555[0-9]|556[0-9]" || echo "No embedding server ports open"

            # Set timeouts - outer must be larger than pytest's internal timeout
            # IMPORTANT: Keep PYTEST_TIMEOUT_SEC in sync with pyproject.toml [tool.pytest.ini_options] timeout
            PYTEST_TIMEOUT_SEC=${PYTEST_TIMEOUT_SEC:-300}  # Default 300s, matches pyproject.toml
            BUFFER_SEC=${TIMEOUT_BUFFER_SEC:-60}  # Buffer for cleanup after pytest timeout
            OUTER_TIMEOUT_SEC=${OUTER_TIMEOUT_SEC:-$((PYTEST_TIMEOUT_SEC + BUFFER_SEC))}

            echo "⏰ Timeout configuration:"
            echo "   - Pytest internal timeout: ${PYTEST_TIMEOUT_SEC}s (from pyproject.toml)"
            echo "   - Cleanup buffer: ${BUFFER_SEC}s"
            echo "   - Outer shell timeout: ${OUTER_TIMEOUT_SEC}s (${PYTEST_TIMEOUT_SEC}s + ${BUFFER_SEC}s buffer)"
            echo "   - This ensures pytest can complete its own timeout handling and cleanup"

            echo "🏃 Running pytest with ${OUTER_TIMEOUT_SEC}s outer timeout..."

            # Export for inner shell
            export PYTEST_TIMEOUT_SEC OUTER_TIMEOUT_SEC BUFFER_SEC

            timeout --preserve-status --signal=INT --kill-after=10 ${OUTER_TIMEOUT_SEC} bash -c '
              echo "⏱️ Pytest starting at: $(date)"
              echo "Running command: pytest tests/ -vv --maxfail=3 --tb=short --capture=no"

              # Run pytest with maximum verbosity and no output capture
              pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=DEBUG 2>&1 | tee pytest.log
              PYTEST_EXIT=${PIPESTATUS[0]}

              echo "✅ Pytest finished at: $(date) with exit code: $PYTEST_EXIT"
              echo "Last 20 lines of pytest output:"
              tail -20 pytest.log || true

              # Immediately check for leftover processes
              echo "🔍 Post-pytest process check:"
              ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "No leftover processes"

              # Clean up any children before exit
              echo "🧹 Cleaning up child processes..."
              pkill -TERM -P $$ 2>/dev/null || true
              sleep 0.5
              pkill -KILL -P $$ 2>/dev/null || true

              echo "📊 Final check before exit:"
              ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "All clean"

              exit $PYTEST_EXIT
            '

            EXIT_CODE=$?
            echo "🔚 Timeout command exited with code: $EXIT_CODE"

                        if [ $EXIT_CODE -eq 124 ]; then
              echo "⚠️ TIMEOUT TRIGGERED - Tests took more than ${OUTER_TIMEOUT_SEC} seconds!"
              echo "📸 Capturing full diagnostics..."
              diag

              # Run diagnostic script if available
              if [ -f scripts/diagnose_hang.sh ]; then
                echo "🔍 Running diagnostic script..."
                bash scripts/diagnose_hang.sh || true
              fi

              # More aggressive cleanup
              echo "💀 Killing all Python processes owned by runner..."
              pkill -9 -u runner python || true
              pkill -9 -u runner pytest || true
            elif [ $EXIT_CODE -ne 0 ]; then
              echo "❌ Tests failed with exit code: $EXIT_CODE"
            else
              echo "✅ All tests passed!"
            fi

                        # Always show final state
            echo "📍 Final state check:"
            ps -ef | grep -E 'python|pytest|embedding' | grep -v grep || echo "No Python processes remaining"

            exit $EXIT_CODE
          else
            # For macOS/Windows, run without GNU timeout
            echo "🚀 Running tests on $RUNNER_OS..."
            pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=INFO
          fi

      # Provide tmate session on test failure for debugging
      - name: Setup tmate session on failure
        if: ${{ failure() && (inputs.debug_enabled || contains(github.event.head_commit.message, '[debug]')) }}
        uses: mxschmitt/action-tmate@v3
        with:
          timeout-minutes: 30
          limit-access-to-actor: true

      - name: Run sanity checks (optional)
        run: |
          # Activate virtual environment
          source .venv/bin/activate || source .venv/Scripts/activate

          # Run distance function tests if available
          if [ -f test/sanity_checks/test_distance_functions.py ]; then
            echo "Running distance function sanity checks..."
            python test/sanity_checks/test_distance_functions.py || echo "⚠️ Distance function test failed, continuing..."
          fi

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: packages-${{ matrix.os }}-py${{ matrix.python }}
          path: packages/*/dist/