1. CI Logging Enhancements: - Added comprehensive diagnostics with process tree, network listeners, file descriptors - Added timestamps at every stage (before/during/after pytest) - Added trap EXIT to always show diagnostics - Added immediate process checks after pytest finishes - Added sub-shell execution with immediate cleanup 2. Fixed Subprocess PIPE Blocking: - Changed Colab mode from PIPE to DEVNULL to prevent blocking - PIPE without reading can cause parent process to wait indefinitely 3. Pytest Session Hooks: - Added pytest_sessionstart to log initial state - Added pytest_sessionfinish for aggressive cleanup before exit - Shows all child processes and their status This should reveal exactly where the hang is happening.
392 lines
14 KiB
YAML
392 lines
14 KiB
YAML
name: Reusable Build
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
ref:
|
|
description: 'Git ref to build'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
|
|
jobs:
|
|
lint:
|
|
name: Lint and Format Check
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref }}
|
|
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v4
|
|
|
|
- name: Install ruff
|
|
run: |
|
|
uv tool install ruff==0.12.7
|
|
|
|
- name: Run ruff check
|
|
run: |
|
|
ruff check .
|
|
|
|
- name: Run ruff format check
|
|
run: |
|
|
ruff format --check .
|
|
|
|
build:
|
|
needs: lint
|
|
name: Build ${{ matrix.os }} Python ${{ matrix.python }}
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- os: ubuntu-22.04
|
|
python: '3.9'
|
|
- os: ubuntu-22.04
|
|
python: '3.10'
|
|
- os: ubuntu-22.04
|
|
python: '3.11'
|
|
- os: ubuntu-22.04
|
|
python: '3.12'
|
|
- os: ubuntu-22.04
|
|
python: '3.13'
|
|
- os: macos-latest
|
|
python: '3.9'
|
|
- os: macos-latest
|
|
python: '3.10'
|
|
- os: macos-latest
|
|
python: '3.11'
|
|
- os: macos-latest
|
|
python: '3.12'
|
|
- os: macos-latest
|
|
python: '3.13'
|
|
runs-on: ${{ matrix.os }}
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref }}
|
|
submodules: recursive
|
|
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ matrix.python }}
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v4
|
|
|
|
- name: Install system dependencies (Ubuntu)
|
|
if: runner.os == 'Linux'
|
|
run: |
|
|
sudo apt-get update
|
|
sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \
|
|
pkg-config libopenblas-dev patchelf libabsl-dev libaio-dev libprotobuf-dev
|
|
|
|
# Install Intel MKL for DiskANN
|
|
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
|
|
sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
|
|
source /opt/intel/oneapi/setvars.sh
|
|
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
|
|
echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
|
|
|
|
- name: Install system dependencies (macOS)
|
|
if: runner.os == 'macOS'
|
|
run: |
|
|
# Don't install LLVM, use system clang for better compatibility
|
|
brew install libomp boost protobuf zeromq
|
|
|
|
- name: Install build dependencies
|
|
run: |
|
|
uv pip install --system scikit-build-core numpy swig Cython pybind11
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
uv pip install --system auditwheel
|
|
else
|
|
uv pip install --system delocate
|
|
fi
|
|
|
|
- name: Build packages
|
|
run: |
|
|
# Build core (platform independent) on all platforms for consistency
|
|
cd packages/leann-core
|
|
uv build
|
|
cd ../..
|
|
|
|
# Build HNSW backend
|
|
cd packages/leann-backend-hnsw
|
|
if [ "${{ matrix.os }}" == "macos-latest" ]; then
|
|
# Use system clang instead of homebrew LLVM for better compatibility
|
|
export CC=clang
|
|
export CXX=clang++
|
|
export MACOSX_DEPLOYMENT_TARGET=11.0
|
|
uv build --wheel --python python
|
|
else
|
|
uv build --wheel --python python
|
|
fi
|
|
cd ../..
|
|
|
|
# Build DiskANN backend
|
|
cd packages/leann-backend-diskann
|
|
if [ "${{ matrix.os }}" == "macos-latest" ]; then
|
|
# Use system clang instead of homebrew LLVM for better compatibility
|
|
export CC=clang
|
|
export CXX=clang++
|
|
# sgesdd_ is only available on macOS 13.3+
|
|
export MACOSX_DEPLOYMENT_TARGET=13.3
|
|
uv build --wheel --python python
|
|
else
|
|
uv build --wheel --python python
|
|
fi
|
|
cd ../..
|
|
|
|
# Build meta package (platform independent) on all platforms
|
|
cd packages/leann
|
|
uv build
|
|
cd ../..
|
|
|
|
- name: Repair wheels (Linux)
|
|
if: runner.os == 'Linux'
|
|
run: |
|
|
# Repair HNSW wheel
|
|
cd packages/leann-backend-hnsw
|
|
if [ -d dist ]; then
|
|
auditwheel repair dist/*.whl -w dist_repaired
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
# Repair DiskANN wheel - use show first to debug
|
|
cd packages/leann-backend-diskann
|
|
if [ -d dist ]; then
|
|
echo "Checking DiskANN wheel contents before repair:"
|
|
unzip -l dist/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found"
|
|
auditwheel show dist/*.whl || echo "auditwheel show failed"
|
|
auditwheel repair dist/*.whl -w dist_repaired
|
|
echo "Checking DiskANN wheel contents after repair:"
|
|
unzip -l dist_repaired/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found after repair"
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
- name: Repair wheels (macOS)
|
|
if: runner.os == 'macOS'
|
|
run: |
|
|
# Repair HNSW wheel
|
|
cd packages/leann-backend-hnsw
|
|
if [ -d dist ]; then
|
|
delocate-wheel -w dist_repaired -v dist/*.whl
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
# Repair DiskANN wheel
|
|
cd packages/leann-backend-diskann
|
|
if [ -d dist ]; then
|
|
delocate-wheel -w dist_repaired -v dist/*.whl
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
- name: List built packages
|
|
run: |
|
|
echo "📦 Built packages:"
|
|
find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort
|
|
|
|
- name: Install built packages for testing
|
|
run: |
|
|
# Create a virtual environment with the correct Python version
|
|
uv venv --python python${{ matrix.python }}
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Install the built wheels directly to ensure we use locally built packages
|
|
# Use only locally built wheels on all platforms for full consistency
|
|
FIND_LINKS="--find-links packages/leann-core/dist --find-links packages/leann/dist"
|
|
FIND_LINKS="$FIND_LINKS --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist"
|
|
|
|
uv pip install leann-core leann leann-backend-hnsw leann-backend-diskann \
|
|
$FIND_LINKS --force-reinstall
|
|
|
|
# Install test dependencies using extras
|
|
uv pip install -e ".[test]"
|
|
|
|
# Debug: Check if _diskannpy module is installed correctly
|
|
echo "Checking installed DiskANN module structure:"
|
|
python -c "import leann_backend_diskann; print('leann_backend_diskann location:', leann_backend_diskann.__file__)" || echo "Failed to import leann_backend_diskann"
|
|
python -c "from leann_backend_diskann import _diskannpy; print('_diskannpy imported successfully')" || echo "Failed to import _diskannpy"
|
|
ls -la $(python -c "import leann_backend_diskann; import os; print(os.path.dirname(leann_backend_diskann.__file__))" 2>/dev/null) 2>/dev/null || echo "Failed to list module directory"
|
|
|
|
# Extra debugging for Python 3.13
|
|
if [[ "${{ matrix.python }}" == "3.13" ]]; then
|
|
echo "=== Python 3.13 Debug Info ==="
|
|
echo "Python version details:"
|
|
python --version
|
|
python -c "import sys; print(f'sys.version_info: {sys.version_info}')"
|
|
|
|
echo "Pytest version:"
|
|
python -m pytest --version
|
|
|
|
echo "Testing basic pytest collection:"
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
timeout --signal=INT 10 python -m pytest --collect-only tests/test_ci_minimal.py -v || echo "Collection timed out or failed"
|
|
else
|
|
# No timeout on macOS/Windows
|
|
python -m pytest --collect-only tests/test_ci_minimal.py -v || echo "Collection failed"
|
|
fi
|
|
|
|
echo "Testing single simple test:"
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
timeout --signal=INT 10 python -m pytest tests/test_ci_minimal.py::test_package_imports --full-trace -v || echo "Simple test timed out or failed"
|
|
else
|
|
# No timeout on macOS/Windows
|
|
python -m pytest tests/test_ci_minimal.py::test_package_imports --full-trace -v || echo "Simple test failed"
|
|
fi
|
|
fi
|
|
|
|
- name: Run tests with pytest
|
|
env:
|
|
CI: true # Mark as CI environment to skip memory-intensive tests
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
HF_HUB_DISABLE_SYMLINKS: 1
|
|
TOKENIZERS_PARALLELISM: false
|
|
PYTORCH_ENABLE_MPS_FALLBACK: 0 # Disable MPS on macOS CI to avoid memory issues
|
|
OMP_NUM_THREADS: 1 # Disable OpenMP parallelism to avoid libomp crashes
|
|
MKL_NUM_THREADS: 1 # Single thread for MKL operations
|
|
run: |
|
|
# Activate virtual environment
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Define comprehensive diagnostic function
|
|
diag() {
|
|
echo "===== COMPREHENSIVE DIAGNOSTICS BEGIN ====="
|
|
date
|
|
echo ""
|
|
echo "### Current Shell Info ###"
|
|
echo "Shell PID: $$"
|
|
echo "Shell PPID: $PPID"
|
|
echo "Current directory: $(pwd)"
|
|
echo ""
|
|
|
|
echo "### Process Tree (full) ###"
|
|
pstree -ap 2>/dev/null || ps auxf || true
|
|
echo ""
|
|
|
|
echo "### All Python/Pytest Processes ###"
|
|
ps -ef | grep -E 'python|pytest' | grep -v grep || true
|
|
echo ""
|
|
|
|
echo "### Embedding Server Processes ###"
|
|
ps -ef | grep -E 'embedding|zmq|diskann' | grep -v grep || true
|
|
echo ""
|
|
|
|
echo "### Network Listeners ###"
|
|
ss -ltnp 2>/dev/null || netstat -ltn 2>/dev/null || true
|
|
echo ""
|
|
|
|
echo "### Open File Descriptors (lsof) ###"
|
|
lsof -p $$ 2>/dev/null | head -20 || true
|
|
echo ""
|
|
|
|
echo "### Zombie Processes ###"
|
|
ps aux | grep '<defunct>' || echo "No zombie processes"
|
|
echo ""
|
|
|
|
echo "### Current Jobs ###"
|
|
jobs -l || true
|
|
echo ""
|
|
|
|
echo "### /proc/PID/fd for current shell ###"
|
|
ls -la /proc/$$/fd 2>/dev/null || true
|
|
echo ""
|
|
|
|
echo "===== COMPREHENSIVE DIAGNOSTICS END ====="
|
|
}
|
|
|
|
# Run all tests with extensive logging
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
echo "🚀 Starting Linux test execution with timeout..."
|
|
echo "Current time: $(date)"
|
|
echo "Shell PID: $$"
|
|
|
|
# Set trap for diagnostics
|
|
trap diag INT TERM EXIT
|
|
|
|
echo "📋 Pre-test diagnostics:"
|
|
ps -ef | grep -E 'python|pytest' | grep -v grep || echo "No python/pytest processes before test"
|
|
|
|
echo "🏃 Running pytest with 180s timeout..."
|
|
timeout --preserve-status --signal=INT --kill-after=10 180 bash -c '
|
|
echo "⏱️ Pytest starting at: $(date)"
|
|
pytest tests/ -vv --maxfail=3
|
|
PYTEST_EXIT=$?
|
|
echo "✅ Pytest finished at: $(date) with exit code: $PYTEST_EXIT"
|
|
|
|
# Immediately check for leftover processes
|
|
echo "🔍 Post-pytest process check:"
|
|
ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "No leftover processes"
|
|
|
|
# Clean up any children before exit
|
|
echo "🧹 Cleaning up child processes..."
|
|
pkill -TERM -P $$ 2>/dev/null || true
|
|
sleep 0.5
|
|
pkill -KILL -P $$ 2>/dev/null || true
|
|
|
|
echo "📊 Final check before exit:"
|
|
ps -ef | grep -E "python|pytest|embedding" | grep -v grep || echo "All clean"
|
|
|
|
exit $PYTEST_EXIT
|
|
'
|
|
|
|
EXIT_CODE=$?
|
|
echo "🔚 Timeout command exited with code: $EXIT_CODE"
|
|
|
|
if [ $EXIT_CODE -eq 124 ]; then
|
|
echo "⚠️ TIMEOUT TRIGGERED - Tests took more than 180 seconds!"
|
|
echo "📸 Capturing full diagnostics..."
|
|
diag
|
|
|
|
# More aggressive cleanup
|
|
echo "💀 Killing all Python processes owned by runner..."
|
|
pkill -9 -u runner python || true
|
|
pkill -9 -u runner pytest || true
|
|
elif [ $EXIT_CODE -ne 0 ]; then
|
|
echo "❌ Tests failed with exit code: $EXIT_CODE"
|
|
else
|
|
echo "✅ All tests passed!"
|
|
fi
|
|
|
|
# Always show final state
|
|
echo "📍 Final state check:"
|
|
ps -ef | grep -E 'python|pytest|embedding' | grep -v grep || echo "No Python processes remaining"
|
|
|
|
exit $EXIT_CODE
|
|
else
|
|
# For macOS/Windows, run without GNU timeout
|
|
echo "🚀 Running tests on $RUNNER_OS..."
|
|
pytest tests/ -vv --maxfail=3
|
|
fi
|
|
|
|
- name: Run sanity checks (optional)
|
|
run: |
|
|
# Activate virtual environment
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Run distance function tests if available
|
|
if [ -f test/sanity_checks/test_distance_functions.py ]; then
|
|
echo "Running distance function sanity checks..."
|
|
python test/sanity_checks/test_distance_functions.py || echo "⚠️ Distance function test failed, continuing..."
|
|
fi
|
|
|
|
- name: Upload artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: packages-${{ matrix.os }}-py${{ matrix.python }}
|
|
path: packages/*/dist/
|