name: leann-build resources: # Choose a GPU for fast embeddings (examples: L4, A10G, A100). CPU also works but is slower. accelerators: L4:1 # Optionally pin a cloud, otherwise SkyPilot will auto-select # cloud: aws disk_size: 100 env: # Build parameters (override with: sky launch -c leann-gpu sky/leann-build.yaml -e key=value) index_name: my-index docs: ./data backend: hnsw # hnsw | diskann complexity: 64 graph_degree: 32 num_threads: 8 # Embedding selection embedding_mode: sentence-transformers # sentence-transformers | openai | mlx | ollama embedding_model: facebook/contriever # Storage/latency knobs recompute: true # true => selective recomputation (recommended) compact: true # for HNSW only # Optional pass-through extra_args: "" # Sync local paths to the remote VM. Adjust as needed. file_mounts: # Example: mount your local data directory used for building ~/leann-data: ${docs} setup: | set -e # Install uv (package manager) curl -LsSf https://astral.sh/uv/install.sh | sh export PATH="$HOME/.local/bin:$PATH" # Install the LEANN CLI globally on the remote machine uv tool install leann run: | export PATH="$HOME/.local/bin:$PATH" # Derive flags from env recompute_flag="" if [ "${recompute}" = "false" ] || [ "${recompute}" = "0" ]; then recompute_flag="--no-recompute" fi # Build command leann build ${index_name} \ --docs ~/leann-data \ --backend ${backend} \ --complexity ${complexity} \ --graph-degree ${graph_degree} \ --num-threads ${num_threads} \ --embedding-mode ${embedding_mode} \ --embedding-model ${embedding_model} \ ${recompute_flag} ${extra_args}