Based on excellent analysis from user, implemented comprehensive fixes: 1. ZMQ Socket Cleanup: - Set LINGER=0 on all ZMQ sockets (client and server) - Use try-finally blocks to ensure socket.close() and context.term() - Prevents blocking on exit when ZMQ contexts have pending operations 2. Global Test Cleanup: - Added tests/conftest.py with session-scoped cleanup fixture - Cleans up leftover ZMQ contexts and child processes after all tests - Lists remaining threads for debugging 3. CI Improvements: - Apply timeout to ALL Python versions on Linux (not just 3.13) - Increased timeout to 180s for better reliability - Added process cleanup (pkill) on timeout 4. Dependencies: - Added psutil>=5.9.0 to test dependencies for process management Root cause: Python 3.9/3.13 are more sensitive to cleanup timing during interpreter shutdown. ZMQ's default LINGER=-1 was blocking exit, and atexit handlers were unreliable for cleanup. This should resolve the 'all tests pass but CI hangs' issue.
303 lines
11 KiB
YAML
303 lines
11 KiB
YAML
name: Reusable Build
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
ref:
|
|
description: 'Git ref to build'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
|
|
jobs:
|
|
lint:
|
|
name: Lint and Format Check
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref }}
|
|
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v4
|
|
|
|
- name: Install ruff
|
|
run: |
|
|
uv tool install ruff==0.12.7
|
|
|
|
- name: Run ruff check
|
|
run: |
|
|
ruff check .
|
|
|
|
- name: Run ruff format check
|
|
run: |
|
|
ruff format --check .
|
|
|
|
build:
|
|
needs: lint
|
|
name: Build ${{ matrix.os }} Python ${{ matrix.python }}
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- os: ubuntu-22.04
|
|
python: '3.9'
|
|
- os: ubuntu-22.04
|
|
python: '3.10'
|
|
- os: ubuntu-22.04
|
|
python: '3.11'
|
|
- os: ubuntu-22.04
|
|
python: '3.12'
|
|
- os: ubuntu-22.04
|
|
python: '3.13'
|
|
- os: macos-latest
|
|
python: '3.9'
|
|
- os: macos-latest
|
|
python: '3.10'
|
|
- os: macos-latest
|
|
python: '3.11'
|
|
- os: macos-latest
|
|
python: '3.12'
|
|
- os: macos-latest
|
|
python: '3.13'
|
|
runs-on: ${{ matrix.os }}
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref }}
|
|
submodules: recursive
|
|
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ matrix.python }}
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v4
|
|
|
|
- name: Install system dependencies (Ubuntu)
|
|
if: runner.os == 'Linux'
|
|
run: |
|
|
sudo apt-get update
|
|
sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \
|
|
pkg-config libopenblas-dev patchelf libabsl-dev libaio-dev libprotobuf-dev
|
|
|
|
# Install Intel MKL for DiskANN
|
|
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
|
|
sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
|
|
source /opt/intel/oneapi/setvars.sh
|
|
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
|
|
echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
|
|
|
|
- name: Install system dependencies (macOS)
|
|
if: runner.os == 'macOS'
|
|
run: |
|
|
# Don't install LLVM, use system clang for better compatibility
|
|
brew install libomp boost protobuf zeromq
|
|
|
|
- name: Install build dependencies
|
|
run: |
|
|
uv pip install --system scikit-build-core numpy swig Cython pybind11
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
uv pip install --system auditwheel
|
|
else
|
|
uv pip install --system delocate
|
|
fi
|
|
|
|
- name: Build packages
|
|
run: |
|
|
# Build core (platform independent) on all platforms for consistency
|
|
cd packages/leann-core
|
|
uv build
|
|
cd ../..
|
|
|
|
# Build HNSW backend
|
|
cd packages/leann-backend-hnsw
|
|
if [ "${{ matrix.os }}" == "macos-latest" ]; then
|
|
# Use system clang instead of homebrew LLVM for better compatibility
|
|
export CC=clang
|
|
export CXX=clang++
|
|
export MACOSX_DEPLOYMENT_TARGET=11.0
|
|
uv build --wheel --python python
|
|
else
|
|
uv build --wheel --python python
|
|
fi
|
|
cd ../..
|
|
|
|
# Build DiskANN backend
|
|
cd packages/leann-backend-diskann
|
|
if [ "${{ matrix.os }}" == "macos-latest" ]; then
|
|
# Use system clang instead of homebrew LLVM for better compatibility
|
|
export CC=clang
|
|
export CXX=clang++
|
|
# sgesdd_ is only available on macOS 13.3+
|
|
export MACOSX_DEPLOYMENT_TARGET=13.3
|
|
uv build --wheel --python python
|
|
else
|
|
uv build --wheel --python python
|
|
fi
|
|
cd ../..
|
|
|
|
# Build meta package (platform independent) on all platforms
|
|
cd packages/leann
|
|
uv build
|
|
cd ../..
|
|
|
|
- name: Repair wheels (Linux)
|
|
if: runner.os == 'Linux'
|
|
run: |
|
|
# Repair HNSW wheel
|
|
cd packages/leann-backend-hnsw
|
|
if [ -d dist ]; then
|
|
auditwheel repair dist/*.whl -w dist_repaired
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
# Repair DiskANN wheel - use show first to debug
|
|
cd packages/leann-backend-diskann
|
|
if [ -d dist ]; then
|
|
echo "Checking DiskANN wheel contents before repair:"
|
|
unzip -l dist/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found"
|
|
auditwheel show dist/*.whl || echo "auditwheel show failed"
|
|
auditwheel repair dist/*.whl -w dist_repaired
|
|
echo "Checking DiskANN wheel contents after repair:"
|
|
unzip -l dist_repaired/*.whl | grep -E "\.so|\.pyd|_diskannpy" || echo "No .so files found after repair"
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
- name: Repair wheels (macOS)
|
|
if: runner.os == 'macOS'
|
|
run: |
|
|
# Repair HNSW wheel
|
|
cd packages/leann-backend-hnsw
|
|
if [ -d dist ]; then
|
|
delocate-wheel -w dist_repaired -v dist/*.whl
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
# Repair DiskANN wheel
|
|
cd packages/leann-backend-diskann
|
|
if [ -d dist ]; then
|
|
delocate-wheel -w dist_repaired -v dist/*.whl
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
- name: List built packages
|
|
run: |
|
|
echo "📦 Built packages:"
|
|
find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort
|
|
|
|
- name: Install built packages for testing
|
|
run: |
|
|
# Create a virtual environment with the correct Python version
|
|
uv venv --python python${{ matrix.python }}
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Install the built wheels directly to ensure we use locally built packages
|
|
# Use only locally built wheels on all platforms for full consistency
|
|
FIND_LINKS="--find-links packages/leann-core/dist --find-links packages/leann/dist"
|
|
FIND_LINKS="$FIND_LINKS --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist"
|
|
|
|
uv pip install leann-core leann leann-backend-hnsw leann-backend-diskann \
|
|
$FIND_LINKS --force-reinstall
|
|
|
|
# Install test dependencies using extras
|
|
uv pip install -e ".[test]"
|
|
|
|
# Debug: Check if _diskannpy module is installed correctly
|
|
echo "Checking installed DiskANN module structure:"
|
|
python -c "import leann_backend_diskann; print('leann_backend_diskann location:', leann_backend_diskann.__file__)" || echo "Failed to import leann_backend_diskann"
|
|
python -c "from leann_backend_diskann import _diskannpy; print('_diskannpy imported successfully')" || echo "Failed to import _diskannpy"
|
|
ls -la $(python -c "import leann_backend_diskann; import os; print(os.path.dirname(leann_backend_diskann.__file__))" 2>/dev/null) 2>/dev/null || echo "Failed to list module directory"
|
|
|
|
# Extra debugging for Python 3.13
|
|
if [[ "${{ matrix.python }}" == "3.13" ]]; then
|
|
echo "=== Python 3.13 Debug Info ==="
|
|
echo "Python version details:"
|
|
python --version
|
|
python -c "import sys; print(f'sys.version_info: {sys.version_info}')"
|
|
|
|
echo "Pytest version:"
|
|
python -m pytest --version
|
|
|
|
echo "Testing basic pytest collection:"
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
timeout --signal=INT 10 python -m pytest --collect-only tests/test_ci_minimal.py -v || echo "Collection timed out or failed"
|
|
else
|
|
# No timeout on macOS/Windows
|
|
python -m pytest --collect-only tests/test_ci_minimal.py -v || echo "Collection failed"
|
|
fi
|
|
|
|
echo "Testing single simple test:"
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
timeout --signal=INT 10 python -m pytest tests/test_ci_minimal.py::test_package_imports --full-trace -v || echo "Simple test timed out or failed"
|
|
else
|
|
# No timeout on macOS/Windows
|
|
python -m pytest tests/test_ci_minimal.py::test_package_imports --full-trace -v || echo "Simple test failed"
|
|
fi
|
|
fi
|
|
|
|
- name: Run tests with pytest
|
|
env:
|
|
CI: true # Mark as CI environment to skip memory-intensive tests
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
HF_HUB_DISABLE_SYMLINKS: 1
|
|
TOKENIZERS_PARALLELISM: false
|
|
PYTORCH_ENABLE_MPS_FALLBACK: 0 # Disable MPS on macOS CI to avoid memory issues
|
|
OMP_NUM_THREADS: 1 # Disable OpenMP parallelism to avoid libomp crashes
|
|
MKL_NUM_THREADS: 1 # Single thread for MKL operations
|
|
run: |
|
|
# Activate virtual environment
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Run all tests with timeout on Linux to prevent hanging
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
echo "Running tests with timeout (Linux)..."
|
|
timeout --signal=INT 180 pytest tests/ -v || {
|
|
EXIT_CODE=$?
|
|
if [ $EXIT_CODE -eq 124 ]; then
|
|
echo "⚠️ Tests timed out after 180 seconds - likely process cleanup issue"
|
|
echo "Check for lingering ZMQ connections or child processes"
|
|
# Try to clean up any leftover processes
|
|
pkill -TERM -P $$ || true
|
|
sleep 1
|
|
pkill -KILL -P $$ || true
|
|
fi
|
|
exit $EXIT_CODE
|
|
}
|
|
else
|
|
# For macOS/Windows, run without GNU timeout
|
|
echo "Running tests ($RUNNER_OS)..."
|
|
pytest tests/ -v
|
|
fi
|
|
|
|
- name: Run sanity checks (optional)
|
|
run: |
|
|
# Activate virtual environment
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Run distance function tests if available
|
|
if [ -f test/sanity_checks/test_distance_functions.py ]; then
|
|
echo "Running distance function sanity checks..."
|
|
python test/sanity_checks/test_distance_functions.py || echo "⚠️ Distance function test failed, continuing..."
|
|
fi
|
|
|
|
- name: Upload artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: packages-${{ matrix.os }}-py${{ matrix.python }}
|
|
path: packages/*/dist/
|