Files
LEANN/.github/workflows/build-reusable.yml
Andy Lee 742c9baabc fix: increase outer timeout to 360s to respect pytest's 300s timeout
The outer shell timeout must be larger than pytest's internal timeout (300s)
to allow pytest to handle its own timeout gracefully and perform cleanup.

Changes:
- Increased outer timeout from 180s to 360s (300s + 60s buffer)
- Made timeouts configurable via environment variables
- Added clear documentation about timeout hierarchy
- Display timeout configuration at runtime

Timeout hierarchy:
1. Individual test: 20s (markers)
2. Pytest session: 300s (pyproject.toml)
3. Outer shell: 360s (for cleanup)
4. GitHub Actions: 6 hours (default)

This prevents the outer timeout from killing pytest before it can finish
its own timeout handling, which was likely causing the hanging issues.
2025-08-08 22:48:40 -07:00

461 lines
17 KiB
YAML

name: Reusable Build
on:
workflow_call:
inputs:
ref:
description: 'Git ref to build'
required: false
type: string
default: ''
debug_enabled:
description: 'Enable tmate debugging session for troubleshooting'
required: false
type: boolean
default: false
jobs:
lint:
name: Lint and Format Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref }}
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Install ruff
run: |
uv tool install ruff==0.12.7
- name: Run ruff check
run: |
ruff check .
- name: Run ruff format check
run: |
ruff format --check .
build:
needs: lint
name: Build ${{ matrix.os }} Python ${{ matrix.python }}
strategy:
matrix:
include:
- os: ubuntu-22.04
python: '3.9'
- os: ubuntu-22.04
python: '3.10'
- os: ubuntu-22.04
python: '3.11'
- os: ubuntu-22.04
python: '3.12'
- os: ubuntu-22.04
python: '3.13'
- os: macos-latest
python: '3.9'
- os: macos-latest
python: '3.10'
- os: macos-latest
python: '3.11'
- os: macos-latest
python: '3.12'
- os: macos-latest
python: '3.13'
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref }}
submodules: recursive
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Install system dependencies (Ubuntu)
if: runner.os == 'Linux'
run: |
sudo apt-get update
sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \
pkg-config libopenblas-dev patchelf libabsl-dev libaio-dev libprotobuf-dev
# Install Intel MKL for DiskANN
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
source /opt/intel/oneapi/setvars.sh
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Install system dependencies (macOS)
if: runner.os == 'macOS'
run: |
# Don't install LLVM, use system clang for better compatibility
brew install libomp boost protobuf zeromq
- name: Install build dependencies
run: |
uv pip install --system scikit-build-core numpy swig Cython pybind11
if [[ "$RUNNER_OS" == "Linux" ]]; then
uv pip install --system auditwheel
else
uv pip install --system delocate
fi
- name: Build packages
run: |
# Build core (platform independent) on all platforms for consistency
cd packages/leann-core
uv build
cd ../..
# Build HNSW backend
cd packages/leann-backend-hnsw
if [ "${{ matrix.os }}" == "macos-latest" ]; then
# Use system clang instead of homebrew LLVM for better compatibility
export CC=clang
export CXX=clang++
export MACOSX_DEPLOYMENT_TARGET=11.0
uv build --wheel --python python
else
uv build --wheel --python python
fi
cd ../..
# Build DiskANN backend
cd packages/leann-backend-diskann
if [ "${{ matrix.os }}" == "macos-latest" ]; then
# Use system clang instead of homebrew LLVM for better compatibility
export CC=clang
export CXX=clang++
# sgesdd_ is only available on macOS 13.3+
export MACOSX_DEPLOYMENT_TARGET=13.3
uv build --wheel --python python
else
uv build --wheel --python python
fi
cd ../..
# Build meta package (platform independent) on all platforms
cd packages/leann
uv build
cd ../..
- name: Repair wheels (Linux)
if: runner.os == 'Linux'
run: |
# Repair HNSW wheel
cd packages/leann-backend-hnsw
if [ -d dist ]; then
auditwheel repair dist/*.whl -w dist_repaired
rm -rf dist
mv dist_repaired dist
fi
cd ../..
# Repair DiskANN wheel - use show first to debug
cd packages/leann-backend-diskann
if [ -d dist ]; then
echo "Checking DiskANN wheel contents before repair:"
unzip -l dist/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found"
auditwheel show dist/*.whl || echo "auditwheel show failed"
auditwheel repair dist/*.whl -w dist_repaired
echo "Checking DiskANN wheel contents after repair:"
unzip -l dist_repaired/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found after repair"
rm -rf dist
mv dist_repaired dist
fi
cd ../..
- name: Repair wheels (macOS)
if: runner.os == 'macOS'
run: |
# Repair HNSW wheel
cd packages/leann-backend-hnsw
if [ -d dist ]; then
delocate-wheel -w dist_repaired -v dist/*.whl
rm -rf dist
mv dist_repaired dist
fi
cd ../..
# Repair DiskANN wheel
cd packages/leann-backend-diskann
if [ -d dist ]; then
delocate-wheel -w dist_repaired -v dist/*.whl
rm -rf dist
mv dist_repaired dist
fi
cd ../..
- name: List built packages
run: |
echo "📦 Built packages:"
find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort
- name: Install built packages for testing
run: |
# Create a virtual environment with the correct Python version
uv venv --python python${{ matrix.python }}
source .venv/bin/activate || source .venv/Scripts/activate
# Install the built wheels directly to ensure we use locally built packages
# Use only locally built wheels on all platforms for full consistency
FIND_LINKS="--find-links packages/leann-core/dist --find-links packages/leann/dist"
FIND_LINKS="$FIND_LINKS --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist"
uv pip install leann-core leann leann-backend-hnsw leann-backend-diskann \
$FIND_LINKS --force-reinstall
# Install test dependencies using extras
uv pip install -e ".[test]"
# Debug: Check if _diskannpy module is installed correctly
echo "Checking installed DiskANN module structure:"
python -c "import leann_backend_diskann; print('leann_backend_diskann location:', leann_backend_diskann.__file__)" || echo "Failed to import leann_backend_diskann"
python -c "from leann_backend_diskann import _diskannpy; print('_diskannpy imported successfully')" || echo "Failed to import _diskannpy"
ls -la $(python -c "import leann_backend_diskann; import os; print(os.path.dirname(leann_backend_diskann.__file__))" 2>/dev/null) 2>/dev/null || echo "Failed to list module directory"
# Extra debugging for Python 3.13
if [[ "${{ matrix.python }}" == "3.13" ]]; then
echo "=== Python 3.13 Debug Info ==="
echo "Python version details:"
python --version
python -c "import sys; print(f'sys.version_info: {sys.version_info}')"
echo "Pytest version:"
python -m pytest --version
echo "Testing basic pytest collection:"
if [[ "$RUNNER_OS" == "Linux" ]]; then
timeout --signal=INT 10 python -m pytest --collect-only tests/test_ci_minimal.py -v || echo "Collection timed out or failed"
else
# No timeout on macOS/Windows
python -m pytest --collect-only tests/test_ci_minimal.py -v || echo "Collection failed"
fi
echo "Testing single simple test:"
if [[ "$RUNNER_OS" == "Linux" ]]; then
timeout --signal=INT 10 python -m pytest tests/test_ci_minimal.py::test_package_imports --full-trace -v || echo "Simple test timed out or failed"
else
# No timeout on macOS/Windows
python -m pytest tests/test_ci_minimal.py::test_package_imports --full-trace -v || echo "Simple test failed"
fi
fi
# Enable tmate debugging session if requested
- name: Setup tmate session for debugging
if: ${{ inputs.debug_enabled }}
uses: mxschmitt/action-tmate@v3
with:
detached: true
timeout-minutes: 30
limit-access-to-actor: true
- name: Run tests with pytest
# Timeout hierarchy:
# 1. Individual test timeout: 20s (see pyproject.toml markers)
# 2. Pytest session timeout: 300s (see pyproject.toml [tool.pytest.ini_options])
# 3. Outer shell timeout: 360s (300s + 60s buffer for cleanup)
# 4. GitHub Actions job timeout: 6 hours (default)
env:
CI: true # Mark as CI environment to skip memory-intensive tests
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
HF_HUB_DISABLE_SYMLINKS: 1
TOKENIZERS_PARALLELISM: false
PYTORCH_ENABLE_MPS_FALLBACK: 0 # Disable MPS on macOS CI to avoid memory issues
OMP_NUM_THREADS: 1 # Disable OpenMP parallelism to avoid libomp crashes
MKL_NUM_THREADS: 1 # Single thread for MKL operations
run: |
# Activate virtual environment
source .venv/bin/activate || source .venv/Scripts/activate
# Define comprehensive diagnostic function
diag() {
echo "===== COMPREHENSIVE DIAGNOSTICS BEGIN ====="
date
echo ""
echo "### Current Shell Info ###"
echo "Shell PID: $$"
echo "Shell PPID: $PPID"
echo "Current directory: $(pwd)"
echo ""
echo "### Process Tree (full) ###"
pstree -ap 2>/dev/null || ps auxf || true
echo ""
echo "### All Python/Pytest Processes ###"
ps -ef | grep -E 'python|pytest' | grep -v grep || true
echo ""
echo "### Embedding Server Processes ###"
ps -ef | grep -E 'embedding|zmq|diskann' | grep -v grep || true
echo ""
echo "### Network Listeners ###"
ss -ltnp 2>/dev/null || netstat -ltn 2>/dev/null || true
echo ""
echo "### Open File Descriptors (lsof) ###"
lsof -p $$ 2>/dev/null | head -20 || true
echo ""
echo "### Zombie Processes ###"
ps aux | grep '<defunct>' || echo "No zombie processes"
echo ""
echo "### Current Jobs ###"
jobs -l || true
echo ""
echo "### /proc/PID/fd for current shell ###"
ls -la /proc/$$/fd 2>/dev/null || true
echo ""
echo "===== COMPREHENSIVE DIAGNOSTICS END ====="
}
# Enable verbose logging for debugging
export PYTHONUNBUFFERED=1
export PYTEST_CURRENT_TEST=1
# Run all tests with extensive logging
if [[ "$RUNNER_OS" == "Linux" ]]; then
echo "🚀 Starting Linux test execution with timeout..."
echo "Current time: $(date)"
echo "Shell PID: $$"
echo "Python: $(python --version)"
echo "Pytest: $(pytest --version)"
# Show environment variables for debugging
echo "📦 Environment variables:"
env | grep -E "PYTHON|PYTEST|CI|RUNNER" | sort
# Set trap for diagnostics
trap diag INT TERM EXIT
echo "📋 Pre-test diagnostics:"
ps -ef | grep -E 'python|pytest' | grep -v grep || echo "No python/pytest processes before test"
# Check for any listening ports before test
echo "🔌 Pre-test network state:"
ss -ltn 2>/dev/null | grep -E "555[0-9]|556[0-9]" || echo "No embedding server ports open"
# Set timeouts - outer must be larger than pytest's internal timeout
# IMPORTANT: Keep PYTEST_TIMEOUT_SEC in sync with pyproject.toml [tool.pytest.ini_options] timeout
PYTEST_TIMEOUT_SEC=${PYTEST_TIMEOUT_SEC:-300} # Default 300s, matches pyproject.toml
BUFFER_SEC=${TIMEOUT_BUFFER_SEC:-60} # Buffer for cleanup after pytest timeout
OUTER_TIMEOUT_SEC=${OUTER_TIMEOUT_SEC:-$((PYTEST_TIMEOUT_SEC + BUFFER_SEC))}
echo "⏰ Timeout configuration:"
echo " - Pytest internal timeout: ${PYTEST_TIMEOUT_SEC}s (from pyproject.toml)"
echo " - Cleanup buffer: ${BUFFER_SEC}s"
echo " - Outer shell timeout: ${OUTER_TIMEOUT_SEC}s (${PYTEST_TIMEOUT_SEC}s + ${BUFFER_SEC}s buffer)"
echo " - This ensures pytest can complete its own timeout handling and cleanup"
echo "🏃 Running pytest with ${OUTER_TIMEOUT_SEC}s outer timeout..."
# Export for inner shell
export PYTEST_TIMEOUT_SEC OUTER_TIMEOUT_SEC BUFFER_SEC
timeout --preserve-status --signal=INT --kill-after=10 ${OUTER_TIMEOUT_SEC} bash -c '
echo "⏱️ Pytest starting at: $(date)"
echo "Running command: pytest tests/ -vv --maxfail=3 --tb=short --capture=no"
# Run pytest with maximum verbosity and no output capture
pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=DEBUG 2>&1 | tee pytest.log
PYTEST_EXIT=${PIPESTATUS[0]}
echo "✅ Pytest finished at: $(date) with exit code: $PYTEST_EXIT"
echo "Last 20 lines of pytest output:"
tail -20 pytest.log || true
# Immediately check for leftover processes
echo "🔍 Post-pytest process check:"
ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "No leftover processes"
# Clean up any children before exit
echo "🧹 Cleaning up child processes..."
pkill -TERM -P $$ 2>/dev/null || true
sleep 0.5
pkill -KILL -P $$ 2>/dev/null || true
echo "📊 Final check before exit:"
ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "All clean"
exit $PYTEST_EXIT
'
EXIT_CODE=$?
echo "🔚 Timeout command exited with code: $EXIT_CODE"
if [ $EXIT_CODE -eq 124 ]; then
echo "⚠️ TIMEOUT TRIGGERED - Tests took more than ${OUTER_TIMEOUT_SEC} seconds!"
echo "📸 Capturing full diagnostics..."
diag
# Run diagnostic script if available
if [ -f scripts/diagnose_hang.sh ]; then
echo "🔍 Running diagnostic script..."
bash scripts/diagnose_hang.sh || true
fi
# More aggressive cleanup
echo "💀 Killing all Python processes owned by runner..."
pkill -9 -u runner python || true
pkill -9 -u runner pytest || true
elif [ $EXIT_CODE -ne 0 ]; then
echo "❌ Tests failed with exit code: $EXIT_CODE"
else
echo "✅ All tests passed!"
fi
# Always show final state
echo "📍 Final state check:"
ps -ef | grep -E 'python|pytest|embedding' | grep -v grep || echo "No Python processes remaining"
exit $EXIT_CODE
else
# For macOS/Windows, run without GNU timeout
echo "🚀 Running tests on $RUNNER_OS..."
pytest tests/ -vv --maxfail=3 --tb=short --capture=no --log-cli-level=INFO
fi
# Provide tmate session on test failure for debugging
- name: Setup tmate session on failure
if: ${{ failure() && (inputs.debug_enabled || contains(github.event.head_commit.message, '[debug]')) }}
uses: mxschmitt/action-tmate@v3
with:
timeout-minutes: 30
limit-access-to-actor: true
- name: Run sanity checks (optional)
run: |
# Activate virtual environment
source .venv/bin/activate || source .venv/Scripts/activate
# Run distance function tests if available
if [ -f test/sanity_checks/test_distance_functions.py ]; then
echo "Running distance function sanity checks..."
python test/sanity_checks/test_distance_functions.py || echo "⚠️ Distance function test failed, continuing..."
fi
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: packages-${{ matrix.os }}-py${{ matrix.python }}
path: packages/*/dist/