Files
dgx-spark-vllm-setup/install.sh
2026-03-22 17:26:26 -04:00

778 lines
25 KiB
Bash

#!/bin/bash
################################################################################
# vLLM Installation Script for NVIDIA DGX Spark (Blackwell GB10)
# Version: 1.1.0
# Author: DGX Spark Community
# License: MIT
#
# This script automates the complete installation of vLLM on DGX Spark systems
# with Blackwell GB10 GPUs, including all necessary fixes and optimizations.
#
# Usage: ./install.sh [OPTIONS]
# Can also be run via: curl -fsSL <url>/install.sh | bash
#
# Options:
# --install-dir DIR Installation directory (default: $PWD/vllm-install)
# --vllm-version HASH vLLM git commit (default: 66a168a19 - tested with Blackwell)
# --python-version VER Python version (default: 3.12)
# --skip-tests Skip post-installation tests
# --help Show this help message
################################################################################
set -e # Exit on error
set -o pipefail # Catch errors in pipes
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Default configuration
INSTALL_DIR="$PWD/vllm-install"
VLLM_VERSION="66a168a197ba214a5b70a74fa2e713c9eeb3251a" # vLLM commit with Blackwell fixes
TRITON_VERSION="4caa0328bf8df64896dd5f6fb9df41b0eb2e750a" # Triton commit that works with Blackwell
PYTHON_VERSION="3.12"
SKIP_TESTS=false
# GitHub raw URL for downloading repo assets when run outside the repo
REPO_RAW_URL="https://raw.githubusercontent.com/eelbaz/dgx-spark-vllm-setup/main"
# Script directory (only meaningful when run from a local clone)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd || echo "")"
################################################################################
# Helper Functions
################################################################################
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
print_header() {
echo ""
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}========================================${NC}"
echo ""
}
check_command() {
if command -v "$1" &> /dev/null; then
return 0
else
return 1
fi
}
# Auto-confirm when stdin is not a terminal (e.g. curl | bash)
confirm_or_default_yes() {
local prompt="$1"
if [ -t 0 ]; then
read -p "$prompt (y/N) " -n 1 -r
echo
[[ $REPLY =~ ^[Yy]$ ]]
else
log_info "Non-interactive mode: auto-confirming"
return 0
fi
}
################################################################################
# Pre-flight Checks
################################################################################
preflight_checks() {
print_header "Pre-flight System Checks"
log_info "Checking system requirements..."
# Check if running on ARM64
ARCH=$(uname -m)
if [[ "$ARCH" != "aarch64" ]] && [[ "$ARCH" != "arm64" ]]; then
log_warning "This script is designed for ARM64 architecture (DGX Spark)"
log_warning "Detected architecture: $ARCH"
fi
# Check for NVIDIA GPU
if ! check_command nvidia-smi; then
log_error "nvidia-smi not found. NVIDIA drivers required."
exit 1
fi
# Check GPU type
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
log_info "Detected GPU: $GPU_NAME"
if [[ ! "$GPU_NAME" =~ "GB10" ]]; then
log_warning "This script is optimized for NVIDIA GB10 (Blackwell)"
log_warning "Your GPU: $GPU_NAME"
if ! confirm_or_default_yes "Continue anyway?"; then
exit 1
fi
fi
# Check CUDA
if ! check_command nvcc; then
# Check common CUDA install locations
if [ -x "/usr/local/cuda/bin/nvcc" ]; then
export PATH="/usr/local/cuda/bin:$PATH"
log_info "Found CUDA at /usr/local/cuda, added to PATH"
else
log_error "CUDA toolkit not found. Please install CUDA 13.0+"
exit 1
fi
fi
CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d',' -f1)
log_info "CUDA version: $CUDA_VERSION"
# Check for Python development headers (required for Triton build)
PYTHON_INCLUDE="/usr/include/python${PYTHON_VERSION}/patchlevel.h"
if [ ! -f "$PYTHON_INCLUDE" ]; then
log_warning "Python ${PYTHON_VERSION} development headers not found"
log_info "Installing python${PYTHON_VERSION}-dev (requires sudo)..."
if sudo apt-get install -y "python${PYTHON_VERSION}-dev"; then
log_success "python${PYTHON_VERSION}-dev installed"
else
log_error "Failed to install python${PYTHON_VERSION}-dev"
log_error "Please install manually: sudo apt install python${PYTHON_VERSION}-dev"
exit 1
fi
else
log_info "Python ${PYTHON_VERSION} development headers found"
fi
# Check disk space (need ~50GB)
AVAILABLE_SPACE=$(df -BG "$HOME" | tail -1 | awk '{print $4}' | sed 's/G//')
if [[ "$AVAILABLE_SPACE" -lt 50 ]]; then
log_error "Insufficient disk space. Need at least 50GB, have ${AVAILABLE_SPACE}GB"
exit 1
fi
log_success "Pre-flight checks passed!"
}
################################################################################
# Install uv Package Manager
################################################################################
install_uv() {
print_header "Step 1/8: Installing uv Package Manager"
if check_command uv; then
UV_VERSION=$(uv --version | awk '{print $2}')
log_info "uv already installed: v$UV_VERSION"
else
log_info "Installing uv..."
curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"
log_success "uv installed successfully"
fi
# Verify installation
if ! check_command uv; then
log_error "uv installation failed"
exit 1
fi
}
################################################################################
# Create Python Virtual Environment
################################################################################
create_venv() {
print_header "Step 2/8: Creating Python Virtual Environment"
VENV_DIR="$INSTALL_DIR/.vllm"
if [ -d "$VENV_DIR" ]; then
log_warning "Virtual environment already exists at $VENV_DIR"
if confirm_or_default_yes "Remove and recreate?"; then
rm -rf "$VENV_DIR"
else
log_info "Using existing virtual environment"
return
fi
fi
log_info "Creating Python $PYTHON_VERSION virtual environment..."
mkdir -p "$INSTALL_DIR"
cd "$INSTALL_DIR"
uv venv .vllm --python "$PYTHON_VERSION"
# Upgrade setuptools to 77+ so PEP 639 license fields are supported
# (fixes flashinfer-python build failure)
log_info "Upgrading setuptools in venv for PEP 639 license support..."
uv pip install --python "$VENV_DIR/bin/python" --upgrade setuptools
log_success "Virtual environment created at $VENV_DIR"
}
################################################################################
# Install PyTorch
################################################################################
install_pytorch() {
print_header "Step 3/8: Installing PyTorch with CUDA 13.0"
source "$INSTALL_DIR/.vllm/bin/activate"
log_info "Installing latest PyTorch for cu130..."
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130
# Verify PyTorch installation
log_info "Verifying PyTorch installation..."
python -c "import torch; print('PyTorch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available())"
log_success "PyTorch installed successfully"
}
################################################################################
# Clone and Build Triton
################################################################################
install_triton() {
print_header "Step 4/8: Installing Triton from Main Branch"
TRITON_DIR="$INSTALL_DIR/triton"
if [ -d "$TRITON_DIR" ]; then
log_info "Triton directory exists, updating..."
cd "$TRITON_DIR"
git fetch
else
log_info "Cloning Triton repository..."
cd "$INSTALL_DIR"
git clone https://github.com/triton-lang/triton.git
cd triton
fi
log_info "Checking out Triton commit $TRITON_VERSION (tested with Blackwell)..."
git checkout "$TRITON_VERSION"
git submodule update --init --recursive
log_info "Installing Triton build dependencies..."
source "$INSTALL_DIR/.vllm/bin/activate"
uv pip install pip cmake ninja pybind11
log_info "Building Triton (this takes ~5 minutes)..."
export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
python -m pip install --no-build-isolation -v . 2>&1 | tee "$INSTALL_DIR/triton-build.log"
if [ ${PIPESTATUS[0]} -ne 0 ]; then
log_error "Triton build failed. See $INSTALL_DIR/triton-build.log for details"
exit 1
fi
# Record the installed triton version so we can protect it later
TRITON_INSTALLED_VERSION=$(python -c "import triton; print(triton.__version__)" 2>/dev/null || echo "unknown")
log_info "Triton version installed: $TRITON_INSTALLED_VERSION"
log_success "Triton installed successfully"
}
################################################################################
# Install Additional Dependencies
################################################################################
install_dependencies() {
print_header "Step 5/8: Installing Additional Dependencies"
source "$INSTALL_DIR/.vllm/bin/activate"
log_info "Installing xgrammar, setuptools-scm, and apache-tvm-ffi..."
uv pip install xgrammar setuptools-scm apache-tvm-ffi==0.1.0b15 --prerelease=allow
log_success "Dependencies installed successfully"
}
################################################################################
# Clone vLLM
################################################################################
clone_vllm() {
print_header "Step 6/8: Cloning vLLM Repository"
VLLM_DIR="$INSTALL_DIR/vllm"
if [ -d "$VLLM_DIR" ]; then
log_warning "vLLM directory already exists at $VLLM_DIR"
if confirm_or_default_yes "Remove and re-clone?"; then
rm -rf "$VLLM_DIR"
else
log_info "Using existing vLLM directory"
cd "$VLLM_DIR"
return
fi
fi
log_info "Cloning vLLM $VLLM_VERSION..."
cd "$INSTALL_DIR"
git clone --recursive https://github.com/vllm-project/vllm.git
cd vllm
git checkout "$VLLM_VERSION"
git submodule update --init --recursive
log_success "vLLM repository cloned"
}
################################################################################
# Apply Critical Fixes
################################################################################
apply_fixes() {
print_header "Step 7/8: Applying Critical Fixes"
cd "$INSTALL_DIR/vllm"
# Fix 1: pyproject.toml license field
log_info "Fixing pyproject.toml license field..."
sed -i 's/^license = "Apache-2.0"$/license = {text = "Apache-2.0"}/' pyproject.toml
sed -i '/^license-files = /d' pyproject.toml
# Fix 2: CMakeLists.txt SM100/SM120 MOE kernels (check if already applied)
if grep -q 'cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f"' CMakeLists.txt; then
log_info "CMakeLists.txt SM100/SM120 fix already applied"
else
log_info "Applying CMakeLists.txt SM100/SM120 fix..."
sed -i 's/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f"/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f"/' CMakeLists.txt
sed -i 's/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a"/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;12.1a"/' CMakeLists.txt
fi
# Fix 3: flashinfer-python license field (pre-emptive fix)
log_info "Pre-fixing flashinfer-python license issue..."
rm -rf "$HOME/.cache/uv/sdists-v9/pypi/flashinfer-python" 2>/dev/null || true
# Fix 4: GPT-OSS Triton MOE kernels for Qwen3/gpt-oss support
# Try local repo patches/ first, then download from GitHub
PATCH_FILE=""
if [ -f "$SCRIPT_DIR/patches/gpt_oss_triton_moe.patch" ]; then
PATCH_FILE="$SCRIPT_DIR/patches/gpt_oss_triton_moe.patch"
else
log_info "Downloading GPT-OSS Triton MOE patch from repository..."
PATCH_FILE="$INSTALL_DIR/gpt_oss_triton_moe.patch"
if curl -fsSL "$REPO_RAW_URL/patches/gpt_oss_triton_moe.patch" -o "$PATCH_FILE" 2>/dev/null; then
log_info "Patch downloaded successfully"
else
PATCH_FILE=""
log_warning "Could not download GPT-OSS Triton MOE patch (skipping)"
fi
fi
if [ -n "$PATCH_FILE" ] && [ -f "$PATCH_FILE" ]; then
log_info "Applying GPT-OSS Triton MOE kernel patch for Qwen3/gpt-oss support..."
if patch --dry-run -p1 < "$PATCH_FILE" > /dev/null 2>&1; then
patch -p1 < "$PATCH_FILE"
log_success "GPT-OSS Triton MOE kernel patch applied"
else
log_warning "GPT-OSS Triton MOE kernel patch already applied or conflicts"
fi
fi
# Configure use_existing_torch
log_info "Configuring vLLM to use existing PyTorch..."
python3 use_existing_torch.py
log_success "All fixes applied successfully"
}
################################################################################
# Build and Install vLLM
################################################################################
build_vllm() {
print_header "Step 8/8: Building vLLM (15-20 minutes)"
cd "$INSTALL_DIR/vllm"
source "$INSTALL_DIR/.vllm/bin/activate"
# Set environment variables
export TORCH_CUDA_ARCH_LIST=12.1a
export VLLM_USE_FLASHINFER_MXFP4_MOE=1
export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
# Create a constraints file to prevent uv from replacing our
# custom-built Triton with a PyPI version
log_info "Creating constraints to protect pinned Triton build..."
TRITON_CONSTRAINT="$INSTALL_DIR/constraints.txt"
TRITON_INSTALLED=$(python -c "import importlib.metadata; print(importlib.metadata.version('triton'))" 2>/dev/null || echo "")
if [ -n "$TRITON_INSTALLED" ]; then
echo "triton==${TRITON_INSTALLED}" > "$TRITON_CONSTRAINT"
log_info "Pinning triton==${TRITON_INSTALLED} during vLLM build"
else
echo "" > "$TRITON_CONSTRAINT"
log_warning "Could not detect installed Triton version"
fi
log_info "Starting vLLM build..."
log_warning "This will take 15-20 minutes. Go grab a coffee!"
set +e # Don't exit on error, we'll handle it
UV_CONSTRAINT="$TRITON_CONSTRAINT" uv pip install \
--no-build-isolation --prerelease=allow -e . \
2>&1 | tee "$INSTALL_DIR/vllm-build.log"
BUILD_STATUS=${PIPESTATUS[0]}
set -e
if [ $BUILD_STATUS -ne 0 ]; then
if grep -q "flashinfer.*license.*must be valid" "$INSTALL_DIR/vllm-build.log"; then
log_warning "Build failed due to flashinfer-python license issue"
log_info "Upgrading setuptools and retrying..."
# Ensure setuptools is new enough
uv pip install --upgrade setuptools
# Also patch the cached flashinfer pyproject.toml as a belt-and-suspenders fix
find "$HOME/.cache/uv/sdists-v9/pypi/flashinfer-python" -name "pyproject.toml" 2>/dev/null | while read f; do
sed -i 's/^license = "Apache-2.0"$/license = {text = "Apache-2.0"}/' "$f"
sed -i '/^license-files = /d' "$f"
done
log_info "Retrying vLLM build..."
UV_CONSTRAINT="$TRITON_CONSTRAINT" uv pip install \
--no-build-isolation --prerelease=allow -e .
else
log_error "vLLM build failed. See $INSTALL_DIR/vllm-build.log for details"
exit 1
fi
fi
# Verify Triton wasn't replaced
TRITON_AFTER=$(python -c "import importlib.metadata; print(importlib.metadata.version('triton'))" 2>/dev/null || echo "unknown")
if [ -n "$TRITON_INSTALLED" ] && [ "$TRITON_AFTER" != "$TRITON_INSTALLED" ]; then
log_warning "Triton was changed during vLLM install: $TRITON_INSTALLED -> $TRITON_AFTER"
log_warning "Rebuilding pinned Triton from source..."
cd "$INSTALL_DIR/triton"
git checkout "$TRITON_VERSION"
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
python -m pip install --no-build-isolation --force-reinstall -v .
cd "$INSTALL_DIR/vllm"
fi
log_success "vLLM built successfully!"
}
################################################################################
# Create Helper Scripts
################################################################################
create_helper_scripts() {
print_header "Creating Helper Scripts"
# Create environment activation script
log_info "Creating vllm_env.sh..."
cat > "$INSTALL_DIR/vllm_env.sh" << 'ENVEOF'
#!/bin/bash
# vLLM Environment Configuration for DGX Spark
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/.vllm/bin/activate"
export TORCH_CUDA_ARCH_LIST=12.1a
export VLLM_USE_FLASHINFER_MXFP4_MOE=1
CUDA_PATH=$(ls -d /usr/local/cuda* 2>/dev/null | head -1)
export TRITON_PTXAS_PATH="$CUDA_PATH/bin/ptxas"
export PATH="$CUDA_PATH/bin:$PATH"
export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH"
# Cache tiktoken encodings to avoid re-downloading
export TIKTOKEN_CACHE_DIR="$SCRIPT_DIR/.tiktoken_cache"
mkdir -p "$TIKTOKEN_CACHE_DIR"
echo "=== vLLM Environment Active ==="
echo "Virtual env: $VIRTUAL_ENV"
echo "CUDA arch: $TORCH_CUDA_ARCH_LIST"
echo "Python: $(which python)"
echo "==============================="
ENVEOF
chmod +x "$INSTALL_DIR/vllm_env.sh"
# Create vllm-serve.sh (embedded so it works with curl|bash)
log_info "Creating vllm-serve.sh..."
cat > "$INSTALL_DIR/vllm-serve.sh" << 'SERVEEOF'
#!/bin/bash
# vLLM Server Startup Script for DGX Spark
# Usage: ./vllm-serve.sh <model_name> [port]
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MODEL="${1:-Qwen/Qwen2.5-0.5B-Instruct}"
PORT="${2:-8000}"
VLLM_DIR="$SCRIPT_DIR/vllm"
ENV_SCRIPT="$SCRIPT_DIR/vllm_env.sh"
PID_FILE="$SCRIPT_DIR/.vllm-server.pid"
LOG_FILE="$SCRIPT_DIR/vllm-server.log"
# Check if server is already running
if [ -f "$PID_FILE" ]; then
PID=$(cat "$PID_FILE")
if ps -p $PID > /dev/null 2>&1; then
echo "ERROR: vLLM server is already running (PID: $PID)"
echo "Use ./vllm-stop.sh to stop it first"
exit 1
fi
fi
# Source environment
source "$ENV_SCRIPT"
echo "----------------------------------------------------------------------"
echo "Starting vLLM Server on DGX Spark"
echo "----------------------------------------------------------------------"
echo "Model: $MODEL"
echo "Port: $PORT"
echo "Log file: $LOG_FILE"
echo "PID file: $PID_FILE"
echo "----------------------------------------------------------------------"
# Start server in background
cd "$VLLM_DIR"
nohup python -m vllm.entrypoints.openai.api_server \
--model "$MODEL" \
--trust-remote-code \
--host 0.0.0.0 \
--port "$PORT" \
--gpu-memory-utilization 0.9 \
> "$LOG_FILE" 2>&1 &
echo $! > "$PID_FILE"
echo "OK: Server started with PID: $(cat $PID_FILE)"
echo "OK: Waiting for server to be ready..."
sleep 5
if ps -p $(cat "$PID_FILE") > /dev/null 2>&1; then
echo "OK: Server is running!"
echo ""
echo "Test with: curl http://localhost:$PORT/v1/models"
echo "View logs: tail -f $LOG_FILE"
echo "Stop server: ./vllm-stop.sh"
else
echo "ERROR: Server failed to start. Check logs: $LOG_FILE"
rm -f "$PID_FILE"
exit 1
fi
SERVEEOF
chmod +x "$INSTALL_DIR/vllm-serve.sh"
# Create vllm-stop.sh
log_info "Creating vllm-stop.sh..."
cat > "$INSTALL_DIR/vllm-stop.sh" << 'STOPEOF'
#!/bin/bash
# vLLM Server Stop Script for DGX Spark
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PID_FILE="$SCRIPT_DIR/.vllm-server.pid"
if [ ! -f "$PID_FILE" ]; then
echo "No vLLM server PID file found. Server may not be running."
exit 0
fi
PID=$(cat "$PID_FILE")
if ! ps -p $PID > /dev/null 2>&1; then
echo "vLLM server (PID: $PID) is not running. Cleaning up PID file."
rm -f "$PID_FILE"
exit 0
fi
echo "Stopping vLLM server (PID: $PID)..."
kill $PID
for i in {1..10}; do
if ! ps -p $PID > /dev/null 2>&1; then
echo "OK: Server stopped successfully"
rm -f "$PID_FILE"
exit 0
fi
sleep 1
done
if ps -p $PID > /dev/null 2>&1; then
echo "Server did not stop gracefully. Force killing..."
kill -9 $PID
sleep 1
if ! ps -p $PID > /dev/null 2>&1; then
echo "OK: Server force stopped"
rm -f "$PID_FILE"
else
echo "ERROR: Failed to stop server"
exit 1
fi
fi
STOPEOF
chmod +x "$INSTALL_DIR/vllm-stop.sh"
# Create vllm-status.sh
log_info "Creating vllm-status.sh..."
cat > "$INSTALL_DIR/vllm-status.sh" << 'STATUSEOF'
#!/bin/bash
# vLLM Server Status Script for DGX Spark
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PID_FILE="$SCRIPT_DIR/.vllm-server.pid"
LOG_FILE="$SCRIPT_DIR/vllm-server.log"
echo "----------------------------------------------------------------------"
echo "vLLM Server Status on DGX Spark"
echo "----------------------------------------------------------------------"
if [ ! -f "$PID_FILE" ]; then
echo "Status: NOT RUNNING (no PID file found)"
exit 0
fi
PID=$(cat "$PID_FILE")
if ! ps -p $PID > /dev/null 2>&1; then
echo "Status: NOT RUNNING (stale PID file)"
echo "Cleaning up PID file..."
rm -f "$PID_FILE"
exit 0
fi
echo "Status: RUNNING"
echo "PID: $PID"
echo "Started: $(ps -p $PID -o lstart= 2>/dev/null || echo 'Unknown')"
echo "CPU: $(ps -p $PID -o %cpu= 2>/dev/null || echo 'N/A')%"
echo "Memory: $(ps -p $PID -o %mem= 2>/dev/null || echo 'N/A')%"
echo ""
if [ -f "$LOG_FILE" ]; then
echo "Recent log entries (last 10 lines):"
echo "----------------------------------------------------------------------"
tail -n 10 "$LOG_FILE"
else
echo "Log file not found: $LOG_FILE"
fi
echo ""
echo "----------------------------------------------------------------------"
STATUSEOF
chmod +x "$INSTALL_DIR/vllm-status.sh"
log_success "Helper scripts created in $INSTALL_DIR"
}
################################################################################
# Post-Installation Tests
################################################################################
run_tests() {
if [ "$SKIP_TESTS" = true ]; then
log_info "Skipping post-installation tests"
return
fi
print_header "Post-Installation Tests"
source "$INSTALL_DIR/vllm_env.sh"
log_info "Test 1: Import vLLM..."
python -c "import vllm; print('vLLM version:', vllm.__version__)"
log_info "Test 2: Check CUDA availability..."
python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'; print('CUDA available')"
log_info "Test 3: Check GPU detection..."
python -c "import torch; print('GPU count:', torch.cuda.device_count()); print('GPU name:', torch.cuda.get_device_name(0))"
log_success "All tests passed!"
}
################################################################################
# Parse Command Line Arguments
################################################################################
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
--install-dir)
INSTALL_DIR="$2"
shift 2
;;
--vllm-version)
VLLM_VERSION="$2"
shift 2
;;
--python-version)
PYTHON_VERSION="$2"
shift 2
;;
--skip-tests)
SKIP_TESTS=true
shift
;;
--help)
head -20 "$0" | grep "^#" | sed 's/^# //'
exit 0
;;
*)
log_error "Unknown option: $1"
log_info "Use --help for usage information"
exit 1
;;
esac
done
}
################################################################################
# Main Installation Flow
################################################################################
main() {
parse_args "$@"
print_header "vLLM Installation for DGX Spark (Blackwell GB10)"
log_info "Installation directory: $INSTALL_DIR"
log_info "vLLM version: $VLLM_VERSION"
log_info "Python version: $PYTHON_VERSION"
echo ""
preflight_checks
install_uv
create_venv
install_pytorch
install_triton
install_dependencies
clone_vllm
apply_fixes
build_vllm
create_helper_scripts
run_tests
print_header "Installation Complete!"
echo ""
log_success "vLLM has been successfully installed!"
echo ""
echo -e "${GREEN}Next steps:${NC}"
echo "1. Activate the environment:"
echo " ${BLUE}source $INSTALL_DIR/vllm_env.sh${NC}"
echo ""
echo "2. Start vLLM server:"
echo " ${BLUE}cd $INSTALL_DIR${NC}"
echo " ${BLUE}./vllm-serve.sh${NC}"
echo ""
echo "3. Test the API:"
echo " ${BLUE}curl http://localhost:8000/v1/models${NC}"
echo ""
echo "For more information, see README.md"
echo ""
}
# Run main function
main "$@"