#!/bin/bash ################################################################################ # vLLM Installation Script for NVIDIA DGX Spark (Blackwell GB10) # Version: 1.1.0 # Author: DGX Spark Community # License: MIT # # This script automates the complete installation of vLLM on DGX Spark systems # with Blackwell GB10 GPUs, including all necessary fixes and optimizations. # # Usage: ./install.sh [OPTIONS] # Can also be run via: curl -fsSL /install.sh | bash # # Options: # --install-dir DIR Installation directory (default: $PWD/vllm-install) # --vllm-version HASH vLLM git commit (default: 66a168a19 - tested with Blackwell) # --python-version VER Python version (default: 3.12) # --skip-tests Skip post-installation tests # --help Show this help message ################################################################################ set -e # Exit on error set -o pipefail # Catch errors in pipes # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Default configuration INSTALL_DIR="$PWD/vllm-install" VLLM_VERSION="66a168a197ba214a5b70a74fa2e713c9eeb3251a" # vLLM commit with Blackwell fixes TRITON_VERSION="4caa0328bf8df64896dd5f6fb9df41b0eb2e750a" # Triton commit that works with Blackwell PYTHON_VERSION="3.12" SKIP_TESTS=false # GitHub raw URL for downloading repo assets when run outside the repo REPO_RAW_URL="https://raw.githubusercontent.com/eelbaz/dgx-spark-vllm-setup/main" # Script directory (only meaningful when run from a local clone) SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd || echo "")" ################################################################################ # Helper Functions ################################################################################ log_info() { echo -e "${BLUE}[INFO]${NC} $1" } log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } log_error() { echo -e "${RED}[ERROR]${NC} $1" } print_header() { echo "" echo -e "${BLUE}========================================${NC}" echo -e "${BLUE}$1${NC}" echo -e "${BLUE}========================================${NC}" echo "" } check_command() { if command -v "$1" &> /dev/null; then return 0 else return 1 fi } # Auto-confirm when stdin is not a terminal (e.g. curl | bash) confirm_or_default_yes() { local prompt="$1" if [ -t 0 ]; then read -p "$prompt (y/N) " -n 1 -r echo [[ $REPLY =~ ^[Yy]$ ]] else log_info "Non-interactive mode: auto-confirming" return 0 fi } ################################################################################ # Pre-flight Checks ################################################################################ preflight_checks() { print_header "Pre-flight System Checks" log_info "Checking system requirements..." # Check if running on ARM64 ARCH=$(uname -m) if [[ "$ARCH" != "aarch64" ]] && [[ "$ARCH" != "arm64" ]]; then log_warning "This script is designed for ARM64 architecture (DGX Spark)" log_warning "Detected architecture: $ARCH" fi # Check for NVIDIA GPU if ! check_command nvidia-smi; then log_error "nvidia-smi not found. NVIDIA drivers required." exit 1 fi # Check GPU type GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1) log_info "Detected GPU: $GPU_NAME" if [[ ! "$GPU_NAME" =~ "GB10" ]]; then log_warning "This script is optimized for NVIDIA GB10 (Blackwell)" log_warning "Your GPU: $GPU_NAME" if ! confirm_or_default_yes "Continue anyway?"; then exit 1 fi fi # Check CUDA if ! check_command nvcc; then # Check common CUDA install locations if [ -x "/usr/local/cuda/bin/nvcc" ]; then export PATH="/usr/local/cuda/bin:$PATH" log_info "Found CUDA at /usr/local/cuda, added to PATH" else log_error "CUDA toolkit not found. Please install CUDA 13.0+" exit 1 fi fi CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d',' -f1) log_info "CUDA version: $CUDA_VERSION" # Check for Python development headers (required for Triton build) PYTHON_INCLUDE="/usr/include/python${PYTHON_VERSION}/patchlevel.h" if [ ! -f "$PYTHON_INCLUDE" ]; then log_warning "Python ${PYTHON_VERSION} development headers not found" log_info "Installing python${PYTHON_VERSION}-dev (requires sudo)..." if sudo apt-get install -y "python${PYTHON_VERSION}-dev"; then log_success "python${PYTHON_VERSION}-dev installed" else log_error "Failed to install python${PYTHON_VERSION}-dev" log_error "Please install manually: sudo apt install python${PYTHON_VERSION}-dev" exit 1 fi else log_info "Python ${PYTHON_VERSION} development headers found" fi # Check disk space (need ~50GB) AVAILABLE_SPACE=$(df -BG "$HOME" | tail -1 | awk '{print $4}' | sed 's/G//') if [[ "$AVAILABLE_SPACE" -lt 50 ]]; then log_error "Insufficient disk space. Need at least 50GB, have ${AVAILABLE_SPACE}GB" exit 1 fi log_success "Pre-flight checks passed!" } ################################################################################ # Install uv Package Manager ################################################################################ install_uv() { print_header "Step 1/8: Installing uv Package Manager" if check_command uv; then UV_VERSION=$(uv --version | awk '{print $2}') log_info "uv already installed: v$UV_VERSION" else log_info "Installing uv..." curl -LsSf https://astral.sh/uv/install.sh | sh export PATH="$HOME/.local/bin:$PATH" log_success "uv installed successfully" fi # Verify installation if ! check_command uv; then log_error "uv installation failed" exit 1 fi } ################################################################################ # Create Python Virtual Environment ################################################################################ create_venv() { print_header "Step 2/8: Creating Python Virtual Environment" VENV_DIR="$INSTALL_DIR/.vllm" if [ -d "$VENV_DIR" ]; then log_warning "Virtual environment already exists at $VENV_DIR" if confirm_or_default_yes "Remove and recreate?"; then rm -rf "$VENV_DIR" else log_info "Using existing virtual environment" return fi fi log_info "Creating Python $PYTHON_VERSION virtual environment..." mkdir -p "$INSTALL_DIR" cd "$INSTALL_DIR" uv venv .vllm --python "$PYTHON_VERSION" # Upgrade setuptools to 77+ so PEP 639 license fields are supported # (fixes flashinfer-python build failure) log_info "Upgrading setuptools in venv for PEP 639 license support..." uv pip install --python "$VENV_DIR/bin/python" --upgrade setuptools log_success "Virtual environment created at $VENV_DIR" } ################################################################################ # Install PyTorch ################################################################################ install_pytorch() { print_header "Step 3/8: Installing PyTorch with CUDA 13.0" source "$INSTALL_DIR/.vllm/bin/activate" log_info "Installing latest PyTorch for cu130..." uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130 # Verify PyTorch installation log_info "Verifying PyTorch installation..." python -c "import torch; print('PyTorch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available())" log_success "PyTorch installed successfully" } ################################################################################ # Clone and Build Triton ################################################################################ install_triton() { print_header "Step 4/8: Installing Triton from Main Branch" TRITON_DIR="$INSTALL_DIR/triton" if [ -d "$TRITON_DIR" ]; then log_info "Triton directory exists, updating..." cd "$TRITON_DIR" git fetch else log_info "Cloning Triton repository..." cd "$INSTALL_DIR" git clone https://github.com/triton-lang/triton.git cd triton fi log_info "Checking out Triton commit $TRITON_VERSION (tested with Blackwell)..." git checkout "$TRITON_VERSION" git submodule update --init --recursive log_info "Installing Triton build dependencies..." source "$INSTALL_DIR/.vllm/bin/activate" uv pip install pip cmake ninja pybind11 log_info "Building Triton (this takes ~5 minutes)..." export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) python -m pip install --no-build-isolation -v . 2>&1 | tee "$INSTALL_DIR/triton-build.log" if [ ${PIPESTATUS[0]} -ne 0 ]; then log_error "Triton build failed. See $INSTALL_DIR/triton-build.log for details" exit 1 fi # Record the installed triton version so we can protect it later TRITON_INSTALLED_VERSION=$(python -c "import triton; print(triton.__version__)" 2>/dev/null || echo "unknown") log_info "Triton version installed: $TRITON_INSTALLED_VERSION" log_success "Triton installed successfully" } ################################################################################ # Install Additional Dependencies ################################################################################ install_dependencies() { print_header "Step 5/8: Installing Additional Dependencies" source "$INSTALL_DIR/.vllm/bin/activate" log_info "Installing xgrammar, setuptools-scm, and apache-tvm-ffi..." uv pip install xgrammar setuptools-scm apache-tvm-ffi==0.1.0b15 --prerelease=allow log_success "Dependencies installed successfully" } ################################################################################ # Clone vLLM ################################################################################ clone_vllm() { print_header "Step 6/8: Cloning vLLM Repository" VLLM_DIR="$INSTALL_DIR/vllm" if [ -d "$VLLM_DIR" ]; then log_warning "vLLM directory already exists at $VLLM_DIR" if confirm_or_default_yes "Remove and re-clone?"; then rm -rf "$VLLM_DIR" else log_info "Using existing vLLM directory" cd "$VLLM_DIR" return fi fi log_info "Cloning vLLM $VLLM_VERSION..." cd "$INSTALL_DIR" git clone --recursive https://github.com/vllm-project/vllm.git cd vllm git checkout "$VLLM_VERSION" git submodule update --init --recursive log_success "vLLM repository cloned" } ################################################################################ # Apply Critical Fixes ################################################################################ apply_fixes() { print_header "Step 7/8: Applying Critical Fixes" cd "$INSTALL_DIR/vllm" # Fix 1: pyproject.toml license field log_info "Fixing pyproject.toml license field..." sed -i 's/^license = "Apache-2.0"$/license = {text = "Apache-2.0"}/' pyproject.toml sed -i '/^license-files = /d' pyproject.toml # Fix 2: CMakeLists.txt SM100/SM120 MOE kernels (check if already applied) if grep -q 'cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f"' CMakeLists.txt; then log_info "CMakeLists.txt SM100/SM120 fix already applied" else log_info "Applying CMakeLists.txt SM100/SM120 fix..." sed -i 's/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f"/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f"/' CMakeLists.txt sed -i 's/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a"/cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;12.1a"/' CMakeLists.txt fi # Fix 3: flashinfer-python license field (pre-emptive fix) log_info "Pre-fixing flashinfer-python license issue..." rm -rf "$HOME/.cache/uv/sdists-v9/pypi/flashinfer-python" 2>/dev/null || true # Fix 4: GPT-OSS Triton MOE kernels for Qwen3/gpt-oss support # Try local repo patches/ first, then download from GitHub PATCH_FILE="" if [ -f "$SCRIPT_DIR/patches/gpt_oss_triton_moe.patch" ]; then PATCH_FILE="$SCRIPT_DIR/patches/gpt_oss_triton_moe.patch" else log_info "Downloading GPT-OSS Triton MOE patch from repository..." PATCH_FILE="$INSTALL_DIR/gpt_oss_triton_moe.patch" if curl -fsSL "$REPO_RAW_URL/patches/gpt_oss_triton_moe.patch" -o "$PATCH_FILE" 2>/dev/null; then log_info "Patch downloaded successfully" else PATCH_FILE="" log_warning "Could not download GPT-OSS Triton MOE patch (skipping)" fi fi if [ -n "$PATCH_FILE" ] && [ -f "$PATCH_FILE" ]; then log_info "Applying GPT-OSS Triton MOE kernel patch for Qwen3/gpt-oss support..." if patch --dry-run -p1 < "$PATCH_FILE" > /dev/null 2>&1; then patch -p1 < "$PATCH_FILE" log_success "GPT-OSS Triton MOE kernel patch applied" else log_warning "GPT-OSS Triton MOE kernel patch already applied or conflicts" fi fi # Configure use_existing_torch log_info "Configuring vLLM to use existing PyTorch..." python3 use_existing_torch.py log_success "All fixes applied successfully" } ################################################################################ # Build and Install vLLM ################################################################################ build_vllm() { print_header "Step 8/8: Building vLLM (15-20 minutes)" cd "$INSTALL_DIR/vllm" source "$INSTALL_DIR/.vllm/bin/activate" # Set environment variables export TORCH_CUDA_ARCH_LIST=12.1a export VLLM_USE_FLASHINFER_MXFP4_MOE=1 export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas # Create a constraints file to prevent uv from replacing our # custom-built Triton with a PyPI version log_info "Creating constraints to protect pinned Triton build..." TRITON_CONSTRAINT="$INSTALL_DIR/constraints.txt" TRITON_INSTALLED=$(python -c "import importlib.metadata; print(importlib.metadata.version('triton'))" 2>/dev/null || echo "") if [ -n "$TRITON_INSTALLED" ]; then echo "triton==${TRITON_INSTALLED}" > "$TRITON_CONSTRAINT" log_info "Pinning triton==${TRITON_INSTALLED} during vLLM build" else echo "" > "$TRITON_CONSTRAINT" log_warning "Could not detect installed Triton version" fi log_info "Starting vLLM build..." log_warning "This will take 15-20 minutes. Go grab a coffee!" set +e # Don't exit on error, we'll handle it UV_CONSTRAINT="$TRITON_CONSTRAINT" uv pip install \ --no-build-isolation --prerelease=allow -e . \ 2>&1 | tee "$INSTALL_DIR/vllm-build.log" BUILD_STATUS=${PIPESTATUS[0]} set -e if [ $BUILD_STATUS -ne 0 ]; then if grep -q "flashinfer.*license.*must be valid" "$INSTALL_DIR/vllm-build.log"; then log_warning "Build failed due to flashinfer-python license issue" log_info "Upgrading setuptools and retrying..." # Ensure setuptools is new enough uv pip install --upgrade setuptools # Also patch the cached flashinfer pyproject.toml as a belt-and-suspenders fix find "$HOME/.cache/uv/sdists-v9/pypi/flashinfer-python" -name "pyproject.toml" 2>/dev/null | while read f; do sed -i 's/^license = "Apache-2.0"$/license = {text = "Apache-2.0"}/' "$f" sed -i '/^license-files = /d' "$f" done log_info "Retrying vLLM build..." UV_CONSTRAINT="$TRITON_CONSTRAINT" uv pip install \ --no-build-isolation --prerelease=allow -e . else log_error "vLLM build failed. See $INSTALL_DIR/vllm-build.log for details" exit 1 fi fi # Verify Triton wasn't replaced TRITON_AFTER=$(python -c "import importlib.metadata; print(importlib.metadata.version('triton'))" 2>/dev/null || echo "unknown") if [ -n "$TRITON_INSTALLED" ] && [ "$TRITON_AFTER" != "$TRITON_INSTALLED" ]; then log_warning "Triton was changed during vLLM install: $TRITON_INSTALLED -> $TRITON_AFTER" log_warning "Rebuilding pinned Triton from source..." cd "$INSTALL_DIR/triton" git checkout "$TRITON_VERSION" export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) python -m pip install --no-build-isolation --force-reinstall -v . cd "$INSTALL_DIR/vllm" fi log_success "vLLM built successfully!" } ################################################################################ # Create Helper Scripts ################################################################################ create_helper_scripts() { print_header "Creating Helper Scripts" # Create environment activation script log_info "Creating vllm_env.sh..." cat > "$INSTALL_DIR/vllm_env.sh" << 'ENVEOF' #!/bin/bash # vLLM Environment Configuration for DGX Spark SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/.vllm/bin/activate" export TORCH_CUDA_ARCH_LIST=12.1a export VLLM_USE_FLASHINFER_MXFP4_MOE=1 CUDA_PATH=$(ls -d /usr/local/cuda* 2>/dev/null | head -1) export TRITON_PTXAS_PATH="$CUDA_PATH/bin/ptxas" export PATH="$CUDA_PATH/bin:$PATH" export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH" # Cache tiktoken encodings to avoid re-downloading export TIKTOKEN_CACHE_DIR="$SCRIPT_DIR/.tiktoken_cache" mkdir -p "$TIKTOKEN_CACHE_DIR" echo "=== vLLM Environment Active ===" echo "Virtual env: $VIRTUAL_ENV" echo "CUDA arch: $TORCH_CUDA_ARCH_LIST" echo "Python: $(which python)" echo "===============================" ENVEOF chmod +x "$INSTALL_DIR/vllm_env.sh" # Create vllm-serve.sh (embedded so it works with curl|bash) log_info "Creating vllm-serve.sh..." cat > "$INSTALL_DIR/vllm-serve.sh" << 'SERVEEOF' #!/bin/bash # vLLM Server Startup Script for DGX Spark # Usage: ./vllm-serve.sh [port] set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" MODEL="${1:-Qwen/Qwen2.5-0.5B-Instruct}" PORT="${2:-8000}" VLLM_DIR="$SCRIPT_DIR/vllm" ENV_SCRIPT="$SCRIPT_DIR/vllm_env.sh" PID_FILE="$SCRIPT_DIR/.vllm-server.pid" LOG_FILE="$SCRIPT_DIR/vllm-server.log" # Check if server is already running if [ -f "$PID_FILE" ]; then PID=$(cat "$PID_FILE") if ps -p $PID > /dev/null 2>&1; then echo "ERROR: vLLM server is already running (PID: $PID)" echo "Use ./vllm-stop.sh to stop it first" exit 1 fi fi # Source environment source "$ENV_SCRIPT" echo "----------------------------------------------------------------------" echo "Starting vLLM Server on DGX Spark" echo "----------------------------------------------------------------------" echo "Model: $MODEL" echo "Port: $PORT" echo "Log file: $LOG_FILE" echo "PID file: $PID_FILE" echo "----------------------------------------------------------------------" # Start server in background cd "$VLLM_DIR" nohup python -m vllm.entrypoints.openai.api_server \ --model "$MODEL" \ --trust-remote-code \ --host 0.0.0.0 \ --port "$PORT" \ --gpu-memory-utilization 0.9 \ > "$LOG_FILE" 2>&1 & echo $! > "$PID_FILE" echo "OK: Server started with PID: $(cat $PID_FILE)" echo "OK: Waiting for server to be ready..." sleep 5 if ps -p $(cat "$PID_FILE") > /dev/null 2>&1; then echo "OK: Server is running!" echo "" echo "Test with: curl http://localhost:$PORT/v1/models" echo "View logs: tail -f $LOG_FILE" echo "Stop server: ./vllm-stop.sh" else echo "ERROR: Server failed to start. Check logs: $LOG_FILE" rm -f "$PID_FILE" exit 1 fi SERVEEOF chmod +x "$INSTALL_DIR/vllm-serve.sh" # Create vllm-stop.sh log_info "Creating vllm-stop.sh..." cat > "$INSTALL_DIR/vllm-stop.sh" << 'STOPEOF' #!/bin/bash # vLLM Server Stop Script for DGX Spark SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PID_FILE="$SCRIPT_DIR/.vllm-server.pid" if [ ! -f "$PID_FILE" ]; then echo "No vLLM server PID file found. Server may not be running." exit 0 fi PID=$(cat "$PID_FILE") if ! ps -p $PID > /dev/null 2>&1; then echo "vLLM server (PID: $PID) is not running. Cleaning up PID file." rm -f "$PID_FILE" exit 0 fi echo "Stopping vLLM server (PID: $PID)..." kill $PID for i in {1..10}; do if ! ps -p $PID > /dev/null 2>&1; then echo "OK: Server stopped successfully" rm -f "$PID_FILE" exit 0 fi sleep 1 done if ps -p $PID > /dev/null 2>&1; then echo "Server did not stop gracefully. Force killing..." kill -9 $PID sleep 1 if ! ps -p $PID > /dev/null 2>&1; then echo "OK: Server force stopped" rm -f "$PID_FILE" else echo "ERROR: Failed to stop server" exit 1 fi fi STOPEOF chmod +x "$INSTALL_DIR/vllm-stop.sh" # Create vllm-status.sh log_info "Creating vllm-status.sh..." cat > "$INSTALL_DIR/vllm-status.sh" << 'STATUSEOF' #!/bin/bash # vLLM Server Status Script for DGX Spark SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PID_FILE="$SCRIPT_DIR/.vllm-server.pid" LOG_FILE="$SCRIPT_DIR/vllm-server.log" echo "----------------------------------------------------------------------" echo "vLLM Server Status on DGX Spark" echo "----------------------------------------------------------------------" if [ ! -f "$PID_FILE" ]; then echo "Status: NOT RUNNING (no PID file found)" exit 0 fi PID=$(cat "$PID_FILE") if ! ps -p $PID > /dev/null 2>&1; then echo "Status: NOT RUNNING (stale PID file)" echo "Cleaning up PID file..." rm -f "$PID_FILE" exit 0 fi echo "Status: RUNNING" echo "PID: $PID" echo "Started: $(ps -p $PID -o lstart= 2>/dev/null || echo 'Unknown')" echo "CPU: $(ps -p $PID -o %cpu= 2>/dev/null || echo 'N/A')%" echo "Memory: $(ps -p $PID -o %mem= 2>/dev/null || echo 'N/A')%" echo "" if [ -f "$LOG_FILE" ]; then echo "Recent log entries (last 10 lines):" echo "----------------------------------------------------------------------" tail -n 10 "$LOG_FILE" else echo "Log file not found: $LOG_FILE" fi echo "" echo "----------------------------------------------------------------------" STATUSEOF chmod +x "$INSTALL_DIR/vllm-status.sh" log_success "Helper scripts created in $INSTALL_DIR" } ################################################################################ # Post-Installation Tests ################################################################################ run_tests() { if [ "$SKIP_TESTS" = true ]; then log_info "Skipping post-installation tests" return fi print_header "Post-Installation Tests" source "$INSTALL_DIR/vllm_env.sh" log_info "Test 1: Import vLLM..." python -c "import vllm; print('vLLM version:', vllm.__version__)" log_info "Test 2: Check CUDA availability..." python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'; print('CUDA available')" log_info "Test 3: Check GPU detection..." python -c "import torch; print('GPU count:', torch.cuda.device_count()); print('GPU name:', torch.cuda.get_device_name(0))" log_success "All tests passed!" } ################################################################################ # Parse Command Line Arguments ################################################################################ parse_args() { while [[ $# -gt 0 ]]; do case $1 in --install-dir) INSTALL_DIR="$2" shift 2 ;; --vllm-version) VLLM_VERSION="$2" shift 2 ;; --python-version) PYTHON_VERSION="$2" shift 2 ;; --skip-tests) SKIP_TESTS=true shift ;; --help) head -20 "$0" | grep "^#" | sed 's/^# //' exit 0 ;; *) log_error "Unknown option: $1" log_info "Use --help for usage information" exit 1 ;; esac done } ################################################################################ # Main Installation Flow ################################################################################ main() { parse_args "$@" print_header "vLLM Installation for DGX Spark (Blackwell GB10)" log_info "Installation directory: $INSTALL_DIR" log_info "vLLM version: $VLLM_VERSION" log_info "Python version: $PYTHON_VERSION" echo "" preflight_checks install_uv create_venv install_pytorch install_triton install_dependencies clone_vllm apply_fixes build_vllm create_helper_scripts run_tests print_header "Installation Complete!" echo "" log_success "vLLM has been successfully installed!" echo "" echo -e "${GREEN}Next steps:${NC}" echo "1. Activate the environment:" echo " ${BLUE}source $INSTALL_DIR/vllm_env.sh${NC}" echo "" echo "2. Start vLLM server:" echo " ${BLUE}cd $INSTALL_DIR${NC}" echo " ${BLUE}./vllm-serve.sh${NC}" echo "" echo "3. Test the API:" echo " ${BLUE}curl http://localhost:8000/v1/models${NC}" echo "" echo "For more information, see README.md" echo "" } # Run main function main "$@"