439 lines
17 KiB
YAML
439 lines
17 KiB
YAML
name: Reusable Build
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
ref:
|
|
description: 'Git ref to build'
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
|
|
jobs:
|
|
lint:
|
|
name: Lint and Format Check
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref }}
|
|
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v4
|
|
|
|
- name: Install ruff
|
|
run: |
|
|
uv tool install ruff
|
|
|
|
- name: Run ruff check
|
|
run: |
|
|
ruff check .
|
|
|
|
- name: Run ruff format check
|
|
run: |
|
|
ruff format --check .
|
|
|
|
build:
|
|
needs: lint
|
|
name: Build ${{ matrix.os }} Python ${{ matrix.python }}
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- os: ubuntu-22.04
|
|
python: '3.9'
|
|
- os: ubuntu-22.04
|
|
python: '3.10'
|
|
- os: ubuntu-22.04
|
|
python: '3.11'
|
|
- os: ubuntu-22.04
|
|
python: '3.12'
|
|
- os: ubuntu-22.04
|
|
python: '3.13'
|
|
- os: macos-14
|
|
python: '3.9'
|
|
- os: macos-14
|
|
python: '3.10'
|
|
- os: macos-14
|
|
python: '3.11'
|
|
- os: macos-14
|
|
python: '3.12'
|
|
- os: macos-14
|
|
python: '3.13'
|
|
- os: macos-13
|
|
python: '3.9'
|
|
- os: macos-13
|
|
python: '3.10'
|
|
- os: macos-13
|
|
python: '3.11'
|
|
- os: macos-13
|
|
python: '3.12'
|
|
# Note: macos-13 + Python 3.13 excluded due to PyTorch compatibility
|
|
# (PyTorch 2.5+ supports Python 3.13 but not Intel Mac x86_64)
|
|
runs-on: ${{ matrix.os }}
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ inputs.ref }}
|
|
submodules: recursive
|
|
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ matrix.python }}
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v4
|
|
|
|
- name: Install system dependencies (Ubuntu)
|
|
if: runner.os == 'Linux'
|
|
run: |
|
|
sudo apt-get update
|
|
sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \
|
|
pkg-config libopenblas-dev patchelf libabsl-dev libaio-dev libprotobuf-dev
|
|
|
|
# Install Intel MKL for DiskANN
|
|
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
|
|
sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
|
|
source /opt/intel/oneapi/setvars.sh
|
|
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
|
|
echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
|
|
|
|
- name: Install system dependencies (macOS)
|
|
if: runner.os == 'macOS'
|
|
run: |
|
|
# Don't install LLVM, use system clang for better compatibility
|
|
brew install libomp boost protobuf zeromq
|
|
|
|
- name: Install build dependencies
|
|
run: |
|
|
uv pip install --system scikit-build-core numpy swig Cython pybind11
|
|
if [[ "$RUNNER_OS" == "Linux" ]]; then
|
|
uv pip install --system auditwheel
|
|
else
|
|
uv pip install --system delocate
|
|
fi
|
|
|
|
- name: Set macOS environment variables
|
|
if: runner.os == 'macOS'
|
|
run: |
|
|
# Use brew --prefix to automatically detect Homebrew installation path
|
|
HOMEBREW_PREFIX=$(brew --prefix)
|
|
echo "HOMEBREW_PREFIX=${HOMEBREW_PREFIX}" >> $GITHUB_ENV
|
|
echo "OpenMP_ROOT=${HOMEBREW_PREFIX}/opt/libomp" >> $GITHUB_ENV
|
|
|
|
# Set CMAKE_PREFIX_PATH to let CMake find all packages automatically
|
|
echo "CMAKE_PREFIX_PATH=${HOMEBREW_PREFIX}" >> $GITHUB_ENV
|
|
|
|
# Set compiler flags for OpenMP (required for both backends)
|
|
echo "LDFLAGS=-L${HOMEBREW_PREFIX}/opt/libomp/lib" >> $GITHUB_ENV
|
|
echo "CPPFLAGS=-I${HOMEBREW_PREFIX}/opt/libomp/include" >> $GITHUB_ENV
|
|
|
|
- name: Build packages
|
|
run: |
|
|
# Build core (platform independent)
|
|
cd packages/leann-core
|
|
uv build
|
|
cd ../..
|
|
|
|
# Build HNSW backend
|
|
cd packages/leann-backend-hnsw
|
|
if [[ "${{ matrix.os }}" == macos-* ]]; then
|
|
# Use system clang for better compatibility
|
|
export CC=clang
|
|
export CXX=clang++
|
|
export MACOSX_DEPLOYMENT_TARGET=11.0
|
|
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
|
|
else
|
|
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
|
|
fi
|
|
cd ../..
|
|
|
|
# Build DiskANN backend
|
|
cd packages/leann-backend-diskann
|
|
if [[ "${{ matrix.os }}" == macos-* ]]; then
|
|
# Use system clang for better compatibility
|
|
export CC=clang
|
|
export CXX=clang++
|
|
# DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function
|
|
export MACOSX_DEPLOYMENT_TARGET=13.3
|
|
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
|
|
else
|
|
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
|
|
fi
|
|
cd ../..
|
|
|
|
# Build meta package (platform independent)
|
|
cd packages/leann
|
|
uv build
|
|
cd ../..
|
|
|
|
- name: Repair wheels (Linux)
|
|
if: runner.os == 'Linux'
|
|
run: |
|
|
# Repair HNSW wheel
|
|
cd packages/leann-backend-hnsw
|
|
if [ -d dist ]; then
|
|
auditwheel repair dist/*.whl -w dist_repaired
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
# Repair DiskANN wheel
|
|
cd packages/leann-backend-diskann
|
|
if [ -d dist ]; then
|
|
auditwheel repair dist/*.whl -w dist_repaired
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
- name: Repair wheels (macOS)
|
|
if: runner.os == 'macOS'
|
|
run: |
|
|
# Repair HNSW wheel
|
|
cd packages/leann-backend-hnsw
|
|
if [ -d dist ]; then
|
|
delocate-wheel -w dist_repaired -v dist/*.whl
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
# Repair DiskANN wheel
|
|
cd packages/leann-backend-diskann
|
|
if [ -d dist ]; then
|
|
delocate-wheel -w dist_repaired -v dist/*.whl
|
|
rm -rf dist
|
|
mv dist_repaired dist
|
|
fi
|
|
cd ../..
|
|
|
|
- name: List built packages
|
|
run: |
|
|
echo "📦 Built packages:"
|
|
find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort
|
|
|
|
|
|
- name: Install built packages for testing
|
|
run: |
|
|
# Create a virtual environment with the correct Python version
|
|
uv venv --python ${{ matrix.python }}
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Install packages using --find-links to prioritize local builds
|
|
uv pip install --find-links packages/leann-core/dist --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist packages/leann-core/dist/*.whl || uv pip install --find-links packages/leann-core/dist packages/leann-core/dist/*.tar.gz
|
|
uv pip install --find-links packages/leann-core/dist packages/leann-backend-hnsw/dist/*.whl
|
|
uv pip install --find-links packages/leann-core/dist packages/leann-backend-diskann/dist/*.whl
|
|
uv pip install packages/leann/dist/*.whl || uv pip install packages/leann/dist/*.tar.gz
|
|
|
|
# Install test dependencies using extras
|
|
uv pip install -e ".[test]"
|
|
|
|
- name: Run tests with pytest
|
|
env:
|
|
CI: true # Mark as CI environment to skip memory-intensive tests
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
HF_HUB_DISABLE_SYMLINKS: 1
|
|
TOKENIZERS_PARALLELISM: false
|
|
PYTORCH_ENABLE_MPS_FALLBACK: 0 # Disable MPS on macOS CI to avoid memory issues
|
|
OMP_NUM_THREADS: 1 # Disable OpenMP parallelism to avoid libomp crashes
|
|
MKL_NUM_THREADS: 1 # Single thread for MKL operations
|
|
run: |
|
|
# Activate virtual environment
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Add targeted debugging for pytest hangs (especially Ubuntu 22.04)
|
|
if [[ "${{ matrix.os }}" == "ubuntu-22.04" ]]; then
|
|
echo "🔍 [HANG DEBUG] Ubuntu 22.04 detected - enabling enhanced process monitoring"
|
|
|
|
# Create debug runner script exists in repo: scripts/ci_debug_pytest.py
|
|
|
|
# Pre-test state
|
|
echo "📊 [HANG DEBUG] Pre-test process state:"
|
|
ps aux | grep -E "(python|embedding|zmq)" | grep -v grep || echo "No relevant processes"
|
|
|
|
echo "🔌 [HANG DEBUG] Pre-test network state:"
|
|
ss -tulpn | grep -E "(555[0-9]|556[0-9])" || echo "No embedding server ports"
|
|
|
|
# Function to monitor processes during test
|
|
monitor_processes() {
|
|
while true; do
|
|
sleep 30
|
|
echo "⏰ [HANG DEBUG] $(date): Process check during test execution"
|
|
ps aux | grep -E "(python|pytest|embedding)" | grep -v grep | head -10
|
|
ss -tulpn | grep -E "(555[0-9]|556[0-9])" || echo "No ports"
|
|
done
|
|
}
|
|
|
|
# Start background monitoring
|
|
monitor_processes &
|
|
MONITOR_PID=$!
|
|
echo "🔍 [HANG DEBUG] Started background monitor (PID: $MONITOR_PID)"
|
|
|
|
# Run pytest with enhanced real-time monitoring (no dependency on pytest logs)
|
|
echo "🚀 [HANG DEBUG] Starting pytest with 600s timeout and external monitoring..."
|
|
|
|
# Start independent process monitor that tracks the actual pytest process
|
|
external_monitor() {
|
|
local timeout_pid=$1
|
|
local start_time=$(date +%s)
|
|
local last_output_time=$start_time
|
|
local stable_count=0
|
|
|
|
while true; do
|
|
sleep 10
|
|
current_time=$(date +%s)
|
|
elapsed=$((current_time - start_time))
|
|
output_silence=$((current_time - last_output_time))
|
|
|
|
# Find the actual pytest process (deepest Python process in the tree)
|
|
actual_pytest_pid=$(pgrep -f "python.*-m.*pytest" | tail -1)
|
|
|
|
if [ -z "$actual_pytest_pid" ]; then
|
|
echo "📊 [EXTERNAL] $(date): No pytest process found, checking if timeout is still running"
|
|
if ! kill -0 $timeout_pid 2>/dev/null; then
|
|
echo "📊 [EXTERNAL] $(date): Timeout process ended after ${elapsed}s"
|
|
break
|
|
fi
|
|
continue
|
|
fi
|
|
|
|
# Get detailed process info for actual pytest
|
|
ps_info=$(ps -p $actual_pytest_pid -o pid,ppid,time,pcpu,pmem,state,comm 2>/dev/null || echo "PROCESS_GONE")
|
|
if [ "$ps_info" != "PROCESS_GONE" ]; then
|
|
current_cpu=$(echo "$ps_info" | tail -1 | awk '{print $4}' | cut -d. -f1)
|
|
state=$(echo "$ps_info" | tail -1 | awk '{print $6}')
|
|
|
|
echo "📊 [EXTERNAL] $(date): Real pytest PID $actual_pytest_pid - CPU: ${current_cpu}%, State: $state, Silent: ${output_silence}s"
|
|
|
|
# Check for real hang: low CPU + no output for extended time + process still running
|
|
if [ "$current_cpu" -lt 2 ] && [ $output_silence -gt 120 ] && [ "$state" != "Z" ]; then
|
|
stable_count=$((stable_count + 1))
|
|
if [ $stable_count -ge 3 ]; then # 30 seconds of confirmed hang
|
|
echo "🔥 [EXTERNAL] $(date): REAL HANG DETECTED - dumping stack traces"
|
|
echo "🔍 [EXTERNAL] $(date): Sending SIGUSR1 to pytest PID $actual_pytest_pid"
|
|
kill -USR1 $actual_pytest_pid 2>/dev/null || echo "Failed to send signal to pytest"
|
|
|
|
# Also try to get system-level stack trace
|
|
echo "🔍 [EXTERNAL] $(date): Getting system stack trace with gdb"
|
|
timeout 10 gdb --batch --ex "thread apply all bt" --ex "quit" --pid=$actual_pytest_pid 2>/dev/null || echo "gdb failed"
|
|
|
|
# Reset counter to avoid spam
|
|
stable_count=0
|
|
last_output_time=$current_time
|
|
fi
|
|
else
|
|
stable_count=0
|
|
# Update last output time if we see activity
|
|
if [ "$current_cpu" -gt 5 ]; then
|
|
last_output_time=$current_time
|
|
fi
|
|
fi
|
|
|
|
# Check for zombie/stopped state
|
|
if [ "$state" = "Z" ] || [ "$state" = "T" ]; then
|
|
echo "💀 [EXTERNAL] $(date): Pytest process in abnormal state: $state"
|
|
fi
|
|
else
|
|
echo "📊 [EXTERNAL] $(date): Pytest process $actual_pytest_pid disappeared"
|
|
fi
|
|
|
|
# Emergency timeout - much longer now
|
|
if [ $elapsed -gt 900 ]; then # 15 minutes
|
|
echo "💥 [EXTERNAL] $(date): Emergency timeout reached, force killing"
|
|
kill -KILL $timeout_pid 2>/dev/null || true
|
|
pkill -KILL -f "pytest" 2>/dev/null || true
|
|
break
|
|
fi
|
|
done
|
|
}
|
|
|
|
# Run pytest in background so we can monitor it externally
|
|
python -u -c "import sys, time; print(f'🔍 [REALTIME] {time.strftime(\"%H:%M:%S\")} Starting pytest...', flush=True)"
|
|
timeout --preserve-status --signal=TERM --kill-after=30 900 bash -c '
|
|
echo "▶️ [HANG DEBUG] Pytest starting at: $(date)"
|
|
# Force unbuffered output and immediate flush
|
|
stdbuf -o0 -e0 python scripts/ci_debug_pytest.py tests/ -v --tb=short --maxfail=5 -x -s 2>&1 | while IFS= read -r line; do
|
|
printf "%s [PYTEST] %s\n" "$(date +"%H:%M:%S")" "$line"
|
|
# Force flush after each line
|
|
sync
|
|
done
|
|
PYTEST_RESULT=${PIPESTATUS[0]}
|
|
echo "✅ [HANG DEBUG] Pytest completed at: $(date) with exit code: $PYTEST_RESULT"
|
|
exit $PYTEST_RESULT
|
|
' &
|
|
PYTEST_PID=$!
|
|
echo "🔍 [HANG DEBUG] Pytest started with PID: $PYTEST_PID"
|
|
|
|
# Start external monitoring
|
|
external_monitor $PYTEST_PID &
|
|
EXTERNAL_MONITOR_PID=$!
|
|
|
|
# Wait for pytest to complete
|
|
wait $PYTEST_PID
|
|
PYTEST_EXIT=$?
|
|
echo "🏁 [HANG DEBUG] Pytest process ended with exit code: $PYTEST_EXIT"
|
|
|
|
# Stop external monitor
|
|
kill $EXTERNAL_MONITOR_PID 2>/dev/null || true
|
|
|
|
# Final cleanup check
|
|
echo "🧹 [HANG DEBUG] Final cleanup check..."
|
|
REMAINING_PROCS=$(ps aux | grep -E "python.*pytest" | grep -v grep | wc -l)
|
|
if [ $REMAINING_PROCS -gt 0 ]; then
|
|
echo "⚠️ [HANG DEBUG] Found $REMAINING_PROCS remaining pytest processes after completion"
|
|
ps aux | grep -E "python.*pytest" | grep -v grep
|
|
echo "💀 [HANG DEBUG] Force killing remaining processes..."
|
|
ps aux | grep -E "python.*pytest" | grep -v grep | awk "{print \$2}" | xargs -r kill -KILL
|
|
else
|
|
echo "✅ [HANG DEBUG] No remaining pytest processes found"
|
|
fi
|
|
PYTEST_EXIT=$?
|
|
|
|
# Stop background monitoring
|
|
kill $MONITOR_PID 2>/dev/null || true
|
|
|
|
echo "🔚 [HANG DEBUG] Pytest exit code: $PYTEST_EXIT"
|
|
if [ $PYTEST_EXIT -eq 124 ]; then
|
|
echo "⚠️ [HANG DEBUG] TIMEOUT! Pytest hung for >600s"
|
|
echo "🔍 [HANG DEBUG] Final process state:"
|
|
ps aux | grep -E "(python|pytest|embedding)" | grep -v grep
|
|
echo "🔍 [HANG DEBUG] Final network state:"
|
|
ss -tulpn | grep -E "(555[0-9]|556[0-9])" || echo "No ports"
|
|
echo "💀 [HANG DEBUG] Killing remaining processes..."
|
|
pkill -TERM -f "pytest\|embedding_server\|zmq" || true
|
|
sleep 3
|
|
pkill -KILL -f "pytest\|embedding_server\|zmq" || true
|
|
fi
|
|
|
|
exit $PYTEST_EXIT
|
|
else
|
|
# For non-Ubuntu or non-22.04, run normally
|
|
echo "🚀 [HANG DEBUG] Running tests on ${{ matrix.os }} (normal mode)"
|
|
pytest tests/ -v --tb=short
|
|
fi
|
|
|
|
- name: Run sanity checks (optional)
|
|
run: |
|
|
# Activate virtual environment
|
|
source .venv/bin/activate || source .venv/Scripts/activate
|
|
|
|
# Run distance function tests if available
|
|
if [ -f test/sanity_checks/test_distance_functions.py ]; then
|
|
echo "Running distance function sanity checks..."
|
|
python test/sanity_checks/test_distance_functions.py || echo "⚠️ Distance function test failed, continuing..."
|
|
fi
|
|
|
|
- name: Upload artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: packages-${{ matrix.os }}-py${{ matrix.python }}
|
|
path: packages/*/dist/
|