778 lines
25 KiB
Bash
778 lines
25 KiB
Bash
#!/bin/bash
|
|
################################################################################
|
|
# vLLM Installation Script for NVIDIA DGX Spark (Blackwell GB10)
|
|
# Version: 1.1.0
|
|
# Author: DGX Spark Community
|
|
# License: MIT
|
|
#
|
|
# This script automates the complete installation of vLLM on DGX Spark systems
|
|
# with Blackwell GB10 GPUs, including all necessary fixes and optimizations.
|
|
#
|
|
# Usage: ./install.sh [OPTIONS]
|
|
# Can also be run via: curl -fsSL <url>/install.sh | bash
|
|
#
|
|
# Options:
|
|
# --install-dir DIR Installation directory (default: $PWD/vllm-install)
|
|
# --vllm-version HASH vLLM git commit (default: 66a168a19 - tested with Blackwell)
|
|
# --python-version VER Python version (default: 3.12)
|
|
# --skip-tests Skip post-installation tests
|
|
# --help Show this help message
|
|
################################################################################
|
|
|
|
set -e # Exit on error
|
|
set -o pipefail # Catch errors in pipes
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Default configuration
|
|
INSTALL_DIR="$PWD/vllm-install"
|
|
VLLM_VERSION="66a168a197ba214a5b70a74fa2e713c9eeb3251a" # vLLM commit with Blackwell fixes
|
|
TRITON_VERSION="4caa0328bf8df64896dd5f6fb9df41b0eb2e750a" # Triton commit that works with Blackwell
|
|
PYTHON_VERSION="3.12"
|
|
SKIP_TESTS=false
|
|
|
|
# GitHub raw URL for downloading repo assets when run outside the repo
|
|
REPO_RAW_URL="https://raw.githubusercontent.com/eelbaz/dgx-spark-vllm-setup/main"
|
|
|
|
# Script directory (only meaningful when run from a local clone)
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd || echo "")"
|
|
|
|
################################################################################
|
|
# Helper Functions
|
|
################################################################################
|
|
|
|
log_info() {
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
|
}
|
|
|
|
log_success() {
|
|
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
|
}
|
|
|
|
log_warning() {
|
|
echo -e "${YELLOW}[WARNING]${NC} $1"
|
|
}
|
|
|
|
log_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1"
|
|
}
|
|
|
|
print_header() {
|
|
echo ""
|
|
echo -e "${BLUE}========================================${NC}"
|
|
echo -e "${BLUE}$1${NC}"
|
|
echo -e "${BLUE}========================================${NC}"
|
|
echo ""
|
|
}
|
|
|
|
check_command() {
|
|
if command -v "$1" &> /dev/null; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Auto-confirm when stdin is not a terminal (e.g. curl | bash)
|
|
confirm_or_default_yes() {
|
|
local prompt="$1"
|
|
if [ -t 0 ]; then
|
|
read -p "$prompt (y/N) " -n 1 -r
|
|
echo
|
|
[[ $REPLY =~ ^[Yy]$ ]]
|
|
else
|
|
log_info "Non-interactive mode: auto-confirming"
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
################################################################################
|
|
# Pre-flight Checks
|
|
################################################################################
|
|
|
|
preflight_checks() {
|
|
print_header "Pre-flight System Checks"
|
|
|
|
log_info "Checking system requirements..."
|
|
|
|
# Check if running on ARM64
|
|
ARCH=$(uname -m)
|
|
if [[ "$ARCH" != "aarch64" ]] && [[ "$ARCH" != "arm64" ]]; then
|
|
log_warning "This script is designed for ARM64 architecture (DGX Spark)"
|
|
log_warning "Detected architecture: $ARCH"
|
|
fi
|
|
|
|
# Check for NVIDIA GPU
|
|
if ! check_command nvidia-smi; then
|
|
log_error "nvidia-smi not found. NVIDIA drivers required."
|
|
exit 1
|
|
fi
|
|
|
|
# Check GPU type
|
|
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
|
|
log_info "Detected GPU: $GPU_NAME"
|
|
|
|
if [[ ! "$GPU_NAME" =~ "GB10" ]]; then
|
|
log_warning "This script is optimized for NVIDIA GB10 (Blackwell)"
|
|
log_warning "Your GPU: $GPU_NAME"
|
|
if ! confirm_or_default_yes "Continue anyway?"; then
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Check CUDA
|
|
if ! check_command nvcc; then
|
|
# Check common CUDA install locations
|
|
if [ -x "/usr/local/cuda/bin/nvcc" ]; then
|
|
export PATH="/usr/local/cuda/bin:$PATH"
|
|
log_info "Found CUDA at /usr/local/cuda, added to PATH"
|
|
else
|
|
log_error "CUDA toolkit not found. Please install CUDA 13.0+"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d',' -f1)
|
|
log_info "CUDA version: $CUDA_VERSION"
|
|
|
|
# Check for Python development headers (required for Triton build)
|
|
PYTHON_INCLUDE="/usr/include/python${PYTHON_VERSION}/patchlevel.h"
|
|
if [ ! -f "$PYTHON_INCLUDE" ]; then
|
|
log_warning "Python ${PYTHON_VERSION} development headers not found"
|
|
log_info "Installing python${PYTHON_VERSION}-dev (requires sudo)..."
|
|
if sudo apt-get install -y "python${PYTHON_VERSION}-dev"; then
|
|
log_success "python${PYTHON_VERSION}-dev installed"
|
|
else
|
|
log_error "Failed to install python${PYTHON_VERSION}-dev"
|
|
log_error "Please install manually: sudo apt install python${PYTHON_VERSION}-dev"
|
|
exit 1
|
|
fi
|
|
else
|
|
log_info "Python ${PYTHON_VERSION} development headers found"
|
|
fi
|
|
|
|
# Check disk space (need ~50GB)
|
|
AVAILABLE_SPACE=$(df -BG "$HOME" | tail -1 | awk '{print $4}' | sed 's/G//')
|
|
if [[ "$AVAILABLE_SPACE" -lt 50 ]]; then
|
|
log_error "Insufficient disk space. Need at least 50GB, have ${AVAILABLE_SPACE}GB"
|
|
exit 1
|
|
fi
|
|
|
|
log_success "Pre-flight checks passed!"
|
|
}
|
|
|
|
################################################################################
|
|
# Install uv Package Manager
|
|
################################################################################
|
|
|
|
install_uv() {
|
|
print_header "Step 1/8: Installing uv Package Manager"
|
|
|
|
if check_command uv; then
|
|
UV_VERSION=$(uv --version | awk '{print $2}')
|
|
log_info "uv already installed: v$UV_VERSION"
|
|
else
|
|
log_info "Installing uv..."
|
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
export PATH="$HOME/.local/bin:$PATH"
|
|
log_success "uv installed successfully"
|
|
fi
|
|
|
|
# Verify installation
|
|
if ! check_command uv; then
|
|
log_error "uv installation failed"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
################################################################################
|
|
# Create Python Virtual Environment
|
|
################################################################################
|
|
|
|
create_venv() {
|
|
print_header "Step 2/8: Creating Python Virtual Environment"
|
|
|
|
VENV_DIR="$INSTALL_DIR/.vllm"
|
|
|
|
if [ -d "$VENV_DIR" ]; then
|
|
log_warning "Virtual environment already exists at $VENV_DIR"
|
|
if confirm_or_default_yes "Remove and recreate?"; then
|
|
rm -rf "$VENV_DIR"
|
|
else
|
|
log_info "Using existing virtual environment"
|
|
return
|
|
fi
|
|
fi
|
|
|
|
log_info "Creating Python $PYTHON_VERSION virtual environment..."
|
|
mkdir -p "$INSTALL_DIR"
|
|
cd "$INSTALL_DIR"
|
|
uv venv .vllm --python "$PYTHON_VERSION"
|
|
|
|
# Upgrade setuptools to 77+ so PEP 639 license fields are supported
|
|
# (fixes flashinfer-python build failure)
|
|
log_info "Upgrading setuptools in venv for PEP 639 license support..."
|
|
uv pip install --python "$VENV_DIR/bin/python" --upgrade setuptools
|
|
|
|
log_success "Virtual environment created at $VENV_DIR"
|
|
}
|
|
|
|
################################################################################
|
|
# Install PyTorch
|
|
################################################################################
|
|
|
|
install_pytorch() {
|
|
print_header "Step 3/8: Installing PyTorch with CUDA 13.0"
|
|
|
|
source "$INSTALL_DIR/.vllm/bin/activate"
|
|
|
|
log_info "Installing latest PyTorch for cu130..."
|
|
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130
|
|
|
|
# Verify PyTorch installation
|
|
log_info "Verifying PyTorch installation..."
|
|
python -c "import torch; print('PyTorch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available())"
|
|
|
|
log_success "PyTorch installed successfully"
|
|
}
|
|
|
|
################################################################################
|
|
# Clone and Build Triton
|
|
################################################################################
|
|
|
|
install_triton() {
|
|
print_header "Step 4/8: Installing Triton from Main Branch"
|
|
|
|
TRITON_DIR="$INSTALL_DIR/triton"
|
|
|
|
if [ -d "$TRITON_DIR" ]; then
|
|
log_info "Triton directory exists, updating..."
|
|
cd "$TRITON_DIR"
|
|
git fetch
|
|
else
|
|
log_info "Cloning Triton repository..."
|
|
cd "$INSTALL_DIR"
|
|
git clone https://github.com/triton-lang/triton.git
|
|
cd triton
|
|
fi
|
|
|
|
log_info "Checking out Triton commit $TRITON_VERSION (tested with Blackwell)..."
|
|
git checkout "$TRITON_VERSION"
|
|
git submodule update --init --recursive
|
|
|
|
log_info "Installing Triton build dependencies..."
|
|
source "$INSTALL_DIR/.vllm/bin/activate"
|
|
uv pip install pip cmake ninja pybind11
|
|
|
|
log_info "Building Triton (this takes ~5 minutes)..."
|
|
export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
|
|
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
|
|
python -m pip install --no-build-isolation -v . 2>&1 | tee "$INSTALL_DIR/triton-build.log"
|
|
|
|
if [ ${PIPESTATUS[0]} -ne 0 ]; then
|
|
log_error "Triton build failed. See $INSTALL_DIR/triton-build.log for details"
|
|
exit 1
|
|
fi
|
|
|
|
# Record the installed triton version so we can protect it later
|
|
TRITON_INSTALLED_VERSION=$(python -c "import triton; print(triton.__version__)" 2>/dev/null || echo "unknown")
|
|
log_info "Triton version installed: $TRITON_INSTALLED_VERSION"
|
|
|
|
log_success "Triton installed successfully"
|
|
}
|
|
|
|
################################################################################
|
|
# Install Additional Dependencies
|
|
################################################################################
|
|
|
|
install_dependencies() {
|
|
print_header "Step 5/8: Installing Additional Dependencies"
|
|
|
|
source "$INSTALL_DIR/.vllm/bin/activate"
|
|
|
|
log_info "Installing xgrammar, setuptools-scm, and apache-tvm-ffi..."
|
|
uv pip install xgrammar setuptools-scm apache-tvm-ffi==0.1.0b15 --prerelease=allow
|
|
|
|
log_success "Dependencies installed successfully"
|
|
}
|
|
|
|
################################################################################
|
|
# Clone vLLM
|
|
################################################################################
|
|
|
|
clone_vllm() {
|
|
print_header "Step 6/8: Cloning vLLM Repository"
|
|
|
|
VLLM_DIR="$INSTALL_DIR/vllm"
|
|
|
|
if [ -d "$VLLM_DIR" ]; then
|
|
log_warning "vLLM directory already exists at $VLLM_DIR"
|
|
if confirm_or_default_yes "Remove and re-clone?"; then
|
|
rm -rf "$VLLM_DIR"
|
|
else
|
|
log_info "Using existing vLLM directory"
|
|
cd "$VLLM_DIR"
|
|
return
|
|
fi
|
|
fi
|
|
|
|
log_info "Cloning vLLM $VLLM_VERSION..."
|
|
cd "$INSTALL_DIR"
|
|
git clone --recursive https://github.com/vllm-project/vllm.git
|
|
cd vllm
|
|
git checkout "$VLLM_VERSION"
|
|
git submodule update --init --recursive
|
|
|
|
log_success "vLLM repository cloned"
|
|
}
|
|
|
|
################################################################################
|
|
# Apply Critical Fixes
|
|
################################################################################
|
|
|
|
apply_fixes() {
|
|
print_header "Step 7/8: Applying Critical Fixes"
|
|
|
|
cd "$INSTALL_DIR/vllm"
|
|
|
|
# Fix 1: pyproject.toml license field
|
|
log_info "Fixing pyproject.toml license field..."
|
|
sed -i 's/^license = "Apache-2.0"$/license = {text = "Apache-2.0"}/' pyproject.toml
|
|
sed -i '/^license-files = /d' pyproject.toml
|
|
|
|
# Fix 2: CMakeLists.txt SM100/SM120 MOE kernels (check if already applied)
|
|
if grep -q 'cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f"' CMakeLists.txt; then
|
|
log_info "CMakeLists.txt SM100/SM120 fix already applied"
|
|
else
|
|
log_info "Applying CMakeLists.txt SM100/SM120 fix..."
|
|
sed -i 's/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f"/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f"/' CMakeLists.txt
|
|
sed -i 's/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a"/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;12.1a"/' CMakeLists.txt
|
|
fi
|
|
|
|
# Fix 3: flashinfer-python license field (pre-emptive fix)
|
|
log_info "Pre-fixing flashinfer-python license issue..."
|
|
rm -rf "$HOME/.cache/uv/sdists-v9/pypi/flashinfer-python" 2>/dev/null || true
|
|
|
|
# Fix 4: GPT-OSS Triton MOE kernels for Qwen3/gpt-oss support
|
|
# Try local repo patches/ first, then download from GitHub
|
|
PATCH_FILE=""
|
|
if [ -f "$SCRIPT_DIR/patches/gpt_oss_triton_moe.patch" ]; then
|
|
PATCH_FILE="$SCRIPT_DIR/patches/gpt_oss_triton_moe.patch"
|
|
else
|
|
log_info "Downloading GPT-OSS Triton MOE patch from repository..."
|
|
PATCH_FILE="$INSTALL_DIR/gpt_oss_triton_moe.patch"
|
|
if curl -fsSL "$REPO_RAW_URL/patches/gpt_oss_triton_moe.patch" -o "$PATCH_FILE" 2>/dev/null; then
|
|
log_info "Patch downloaded successfully"
|
|
else
|
|
PATCH_FILE=""
|
|
log_warning "Could not download GPT-OSS Triton MOE patch (skipping)"
|
|
fi
|
|
fi
|
|
|
|
if [ -n "$PATCH_FILE" ] && [ -f "$PATCH_FILE" ]; then
|
|
log_info "Applying GPT-OSS Triton MOE kernel patch for Qwen3/gpt-oss support..."
|
|
if patch --dry-run -p1 < "$PATCH_FILE" > /dev/null 2>&1; then
|
|
patch -p1 < "$PATCH_FILE"
|
|
log_success "GPT-OSS Triton MOE kernel patch applied"
|
|
else
|
|
log_warning "GPT-OSS Triton MOE kernel patch already applied or conflicts"
|
|
fi
|
|
fi
|
|
|
|
# Configure use_existing_torch
|
|
log_info "Configuring vLLM to use existing PyTorch..."
|
|
python3 use_existing_torch.py
|
|
|
|
log_success "All fixes applied successfully"
|
|
}
|
|
|
|
################################################################################
|
|
# Build and Install vLLM
|
|
################################################################################
|
|
|
|
build_vllm() {
|
|
print_header "Step 8/8: Building vLLM (15-20 minutes)"
|
|
|
|
cd "$INSTALL_DIR/vllm"
|
|
source "$INSTALL_DIR/.vllm/bin/activate"
|
|
|
|
# Set environment variables
|
|
export TORCH_CUDA_ARCH_LIST=12.1a
|
|
export VLLM_USE_FLASHINFER_MXFP4_MOE=1
|
|
export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
|
|
|
|
# Create a constraints file to prevent uv from replacing our
|
|
# custom-built Triton with a PyPI version
|
|
log_info "Creating constraints to protect pinned Triton build..."
|
|
TRITON_CONSTRAINT="$INSTALL_DIR/constraints.txt"
|
|
TRITON_INSTALLED=$(python -c "import importlib.metadata; print(importlib.metadata.version('triton'))" 2>/dev/null || echo "")
|
|
if [ -n "$TRITON_INSTALLED" ]; then
|
|
echo "triton==${TRITON_INSTALLED}" > "$TRITON_CONSTRAINT"
|
|
log_info "Pinning triton==${TRITON_INSTALLED} during vLLM build"
|
|
else
|
|
echo "" > "$TRITON_CONSTRAINT"
|
|
log_warning "Could not detect installed Triton version"
|
|
fi
|
|
|
|
log_info "Starting vLLM build..."
|
|
log_warning "This will take 15-20 minutes. Go grab a coffee!"
|
|
|
|
set +e # Don't exit on error, we'll handle it
|
|
UV_CONSTRAINT="$TRITON_CONSTRAINT" uv pip install \
|
|
--no-build-isolation --prerelease=allow -e . \
|
|
2>&1 | tee "$INSTALL_DIR/vllm-build.log"
|
|
BUILD_STATUS=${PIPESTATUS[0]}
|
|
set -e
|
|
|
|
if [ $BUILD_STATUS -ne 0 ]; then
|
|
if grep -q "flashinfer.*license.*must be valid" "$INSTALL_DIR/vllm-build.log"; then
|
|
log_warning "Build failed due to flashinfer-python license issue"
|
|
log_info "Upgrading setuptools and retrying..."
|
|
|
|
# Ensure setuptools is new enough
|
|
uv pip install --upgrade setuptools
|
|
|
|
# Also patch the cached flashinfer pyproject.toml as a belt-and-suspenders fix
|
|
find "$HOME/.cache/uv/sdists-v9/pypi/flashinfer-python" -name "pyproject.toml" 2>/dev/null | while read f; do
|
|
sed -i 's/^license = "Apache-2.0"$/license = {text = "Apache-2.0"}/' "$f"
|
|
sed -i '/^license-files = /d' "$f"
|
|
done
|
|
|
|
log_info "Retrying vLLM build..."
|
|
UV_CONSTRAINT="$TRITON_CONSTRAINT" uv pip install \
|
|
--no-build-isolation --prerelease=allow -e .
|
|
else
|
|
log_error "vLLM build failed. See $INSTALL_DIR/vllm-build.log for details"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Verify Triton wasn't replaced
|
|
TRITON_AFTER=$(python -c "import importlib.metadata; print(importlib.metadata.version('triton'))" 2>/dev/null || echo "unknown")
|
|
if [ -n "$TRITON_INSTALLED" ] && [ "$TRITON_AFTER" != "$TRITON_INSTALLED" ]; then
|
|
log_warning "Triton was changed during vLLM install: $TRITON_INSTALLED -> $TRITON_AFTER"
|
|
log_warning "Rebuilding pinned Triton from source..."
|
|
cd "$INSTALL_DIR/triton"
|
|
git checkout "$TRITON_VERSION"
|
|
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
|
|
python -m pip install --no-build-isolation --force-reinstall -v .
|
|
cd "$INSTALL_DIR/vllm"
|
|
fi
|
|
|
|
log_success "vLLM built successfully!"
|
|
}
|
|
|
|
################################################################################
|
|
# Create Helper Scripts
|
|
################################################################################
|
|
|
|
create_helper_scripts() {
|
|
print_header "Creating Helper Scripts"
|
|
|
|
# Create environment activation script
|
|
log_info "Creating vllm_env.sh..."
|
|
cat > "$INSTALL_DIR/vllm_env.sh" << 'ENVEOF'
|
|
#!/bin/bash
|
|
# vLLM Environment Configuration for DGX Spark
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
source "$SCRIPT_DIR/.vllm/bin/activate"
|
|
export TORCH_CUDA_ARCH_LIST=12.1a
|
|
export VLLM_USE_FLASHINFER_MXFP4_MOE=1
|
|
CUDA_PATH=$(ls -d /usr/local/cuda* 2>/dev/null | head -1)
|
|
export TRITON_PTXAS_PATH="$CUDA_PATH/bin/ptxas"
|
|
export PATH="$CUDA_PATH/bin:$PATH"
|
|
export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH"
|
|
# Cache tiktoken encodings to avoid re-downloading
|
|
export TIKTOKEN_CACHE_DIR="$SCRIPT_DIR/.tiktoken_cache"
|
|
mkdir -p "$TIKTOKEN_CACHE_DIR"
|
|
echo "=== vLLM Environment Active ==="
|
|
echo "Virtual env: $VIRTUAL_ENV"
|
|
echo "CUDA arch: $TORCH_CUDA_ARCH_LIST"
|
|
echo "Python: $(which python)"
|
|
echo "==============================="
|
|
ENVEOF
|
|
chmod +x "$INSTALL_DIR/vllm_env.sh"
|
|
|
|
# Create vllm-serve.sh (embedded so it works with curl|bash)
|
|
log_info "Creating vllm-serve.sh..."
|
|
cat > "$INSTALL_DIR/vllm-serve.sh" << 'SERVEEOF'
|
|
#!/bin/bash
|
|
# vLLM Server Startup Script for DGX Spark
|
|
# Usage: ./vllm-serve.sh <model_name> [port]
|
|
|
|
set -e
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
MODEL="${1:-Qwen/Qwen2.5-0.5B-Instruct}"
|
|
PORT="${2:-8000}"
|
|
VLLM_DIR="$SCRIPT_DIR/vllm"
|
|
ENV_SCRIPT="$SCRIPT_DIR/vllm_env.sh"
|
|
PID_FILE="$SCRIPT_DIR/.vllm-server.pid"
|
|
LOG_FILE="$SCRIPT_DIR/vllm-server.log"
|
|
|
|
# Check if server is already running
|
|
if [ -f "$PID_FILE" ]; then
|
|
PID=$(cat "$PID_FILE")
|
|
if ps -p $PID > /dev/null 2>&1; then
|
|
echo "ERROR: vLLM server is already running (PID: $PID)"
|
|
echo "Use ./vllm-stop.sh to stop it first"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Source environment
|
|
source "$ENV_SCRIPT"
|
|
|
|
echo "----------------------------------------------------------------------"
|
|
echo "Starting vLLM Server on DGX Spark"
|
|
echo "----------------------------------------------------------------------"
|
|
echo "Model: $MODEL"
|
|
echo "Port: $PORT"
|
|
echo "Log file: $LOG_FILE"
|
|
echo "PID file: $PID_FILE"
|
|
echo "----------------------------------------------------------------------"
|
|
|
|
# Start server in background
|
|
cd "$VLLM_DIR"
|
|
nohup python -m vllm.entrypoints.openai.api_server \
|
|
--model "$MODEL" \
|
|
--trust-remote-code \
|
|
--host 0.0.0.0 \
|
|
--port "$PORT" \
|
|
--gpu-memory-utilization 0.9 \
|
|
> "$LOG_FILE" 2>&1 &
|
|
|
|
echo $! > "$PID_FILE"
|
|
echo "OK: Server started with PID: $(cat $PID_FILE)"
|
|
echo "OK: Waiting for server to be ready..."
|
|
|
|
sleep 5
|
|
if ps -p $(cat "$PID_FILE") > /dev/null 2>&1; then
|
|
echo "OK: Server is running!"
|
|
echo ""
|
|
echo "Test with: curl http://localhost:$PORT/v1/models"
|
|
echo "View logs: tail -f $LOG_FILE"
|
|
echo "Stop server: ./vllm-stop.sh"
|
|
else
|
|
echo "ERROR: Server failed to start. Check logs: $LOG_FILE"
|
|
rm -f "$PID_FILE"
|
|
exit 1
|
|
fi
|
|
SERVEEOF
|
|
chmod +x "$INSTALL_DIR/vllm-serve.sh"
|
|
|
|
# Create vllm-stop.sh
|
|
log_info "Creating vllm-stop.sh..."
|
|
cat > "$INSTALL_DIR/vllm-stop.sh" << 'STOPEOF'
|
|
#!/bin/bash
|
|
# vLLM Server Stop Script for DGX Spark
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PID_FILE="$SCRIPT_DIR/.vllm-server.pid"
|
|
|
|
if [ ! -f "$PID_FILE" ]; then
|
|
echo "No vLLM server PID file found. Server may not be running."
|
|
exit 0
|
|
fi
|
|
|
|
PID=$(cat "$PID_FILE")
|
|
|
|
if ! ps -p $PID > /dev/null 2>&1; then
|
|
echo "vLLM server (PID: $PID) is not running. Cleaning up PID file."
|
|
rm -f "$PID_FILE"
|
|
exit 0
|
|
fi
|
|
|
|
echo "Stopping vLLM server (PID: $PID)..."
|
|
kill $PID
|
|
|
|
for i in {1..10}; do
|
|
if ! ps -p $PID > /dev/null 2>&1; then
|
|
echo "OK: Server stopped successfully"
|
|
rm -f "$PID_FILE"
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
if ps -p $PID > /dev/null 2>&1; then
|
|
echo "Server did not stop gracefully. Force killing..."
|
|
kill -9 $PID
|
|
sleep 1
|
|
if ! ps -p $PID > /dev/null 2>&1; then
|
|
echo "OK: Server force stopped"
|
|
rm -f "$PID_FILE"
|
|
else
|
|
echo "ERROR: Failed to stop server"
|
|
exit 1
|
|
fi
|
|
fi
|
|
STOPEOF
|
|
chmod +x "$INSTALL_DIR/vllm-stop.sh"
|
|
|
|
# Create vllm-status.sh
|
|
log_info "Creating vllm-status.sh..."
|
|
cat > "$INSTALL_DIR/vllm-status.sh" << 'STATUSEOF'
|
|
#!/bin/bash
|
|
# vLLM Server Status Script for DGX Spark
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PID_FILE="$SCRIPT_DIR/.vllm-server.pid"
|
|
LOG_FILE="$SCRIPT_DIR/vllm-server.log"
|
|
|
|
echo "----------------------------------------------------------------------"
|
|
echo "vLLM Server Status on DGX Spark"
|
|
echo "----------------------------------------------------------------------"
|
|
|
|
if [ ! -f "$PID_FILE" ]; then
|
|
echo "Status: NOT RUNNING (no PID file found)"
|
|
exit 0
|
|
fi
|
|
|
|
PID=$(cat "$PID_FILE")
|
|
|
|
if ! ps -p $PID > /dev/null 2>&1; then
|
|
echo "Status: NOT RUNNING (stale PID file)"
|
|
echo "Cleaning up PID file..."
|
|
rm -f "$PID_FILE"
|
|
exit 0
|
|
fi
|
|
|
|
echo "Status: RUNNING"
|
|
echo "PID: $PID"
|
|
echo "Started: $(ps -p $PID -o lstart= 2>/dev/null || echo 'Unknown')"
|
|
echo "CPU: $(ps -p $PID -o %cpu= 2>/dev/null || echo 'N/A')%"
|
|
echo "Memory: $(ps -p $PID -o %mem= 2>/dev/null || echo 'N/A')%"
|
|
echo ""
|
|
|
|
if [ -f "$LOG_FILE" ]; then
|
|
echo "Recent log entries (last 10 lines):"
|
|
echo "----------------------------------------------------------------------"
|
|
tail -n 10 "$LOG_FILE"
|
|
else
|
|
echo "Log file not found: $LOG_FILE"
|
|
fi
|
|
|
|
echo ""
|
|
echo "----------------------------------------------------------------------"
|
|
STATUSEOF
|
|
chmod +x "$INSTALL_DIR/vllm-status.sh"
|
|
|
|
log_success "Helper scripts created in $INSTALL_DIR"
|
|
}
|
|
|
|
################################################################################
|
|
# Post-Installation Tests
|
|
################################################################################
|
|
|
|
run_tests() {
|
|
if [ "$SKIP_TESTS" = true ]; then
|
|
log_info "Skipping post-installation tests"
|
|
return
|
|
fi
|
|
|
|
print_header "Post-Installation Tests"
|
|
|
|
source "$INSTALL_DIR/vllm_env.sh"
|
|
|
|
log_info "Test 1: Import vLLM..."
|
|
python -c "import vllm; print('vLLM version:', vllm.__version__)"
|
|
|
|
log_info "Test 2: Check CUDA availability..."
|
|
python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'; print('CUDA available')"
|
|
|
|
log_info "Test 3: Check GPU detection..."
|
|
python -c "import torch; print('GPU count:', torch.cuda.device_count()); print('GPU name:', torch.cuda.get_device_name(0))"
|
|
|
|
log_success "All tests passed!"
|
|
}
|
|
|
|
################################################################################
|
|
# Parse Command Line Arguments
|
|
################################################################################
|
|
|
|
parse_args() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--install-dir)
|
|
INSTALL_DIR="$2"
|
|
shift 2
|
|
;;
|
|
--vllm-version)
|
|
VLLM_VERSION="$2"
|
|
shift 2
|
|
;;
|
|
--python-version)
|
|
PYTHON_VERSION="$2"
|
|
shift 2
|
|
;;
|
|
--skip-tests)
|
|
SKIP_TESTS=true
|
|
shift
|
|
;;
|
|
--help)
|
|
head -20 "$0" | grep "^#" | sed 's/^# //'
|
|
exit 0
|
|
;;
|
|
*)
|
|
log_error "Unknown option: $1"
|
|
log_info "Use --help for usage information"
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
}
|
|
|
|
################################################################################
|
|
# Main Installation Flow
|
|
################################################################################
|
|
|
|
main() {
|
|
parse_args "$@"
|
|
|
|
print_header "vLLM Installation for DGX Spark (Blackwell GB10)"
|
|
log_info "Installation directory: $INSTALL_DIR"
|
|
log_info "vLLM version: $VLLM_VERSION"
|
|
log_info "Python version: $PYTHON_VERSION"
|
|
echo ""
|
|
|
|
preflight_checks
|
|
install_uv
|
|
create_venv
|
|
install_pytorch
|
|
install_triton
|
|
install_dependencies
|
|
clone_vllm
|
|
apply_fixes
|
|
build_vllm
|
|
create_helper_scripts
|
|
run_tests
|
|
|
|
print_header "Installation Complete!"
|
|
echo ""
|
|
log_success "vLLM has been successfully installed!"
|
|
echo ""
|
|
echo -e "${GREEN}Next steps:${NC}"
|
|
echo "1. Activate the environment:"
|
|
echo " ${BLUE}source $INSTALL_DIR/vllm_env.sh${NC}"
|
|
echo ""
|
|
echo "2. Start vLLM server:"
|
|
echo " ${BLUE}cd $INSTALL_DIR${NC}"
|
|
echo " ${BLUE}./vllm-serve.sh${NC}"
|
|
echo ""
|
|
echo "3. Test the API:"
|
|
echo " ${BLUE}curl http://localhost:8000/v1/models${NC}"
|
|
echo ""
|
|
echo "For more information, see README.md"
|
|
echo ""
|
|
}
|
|
|
|
# Run main function
|
|
main "$@"
|