63 lines
1.9 KiB
YAML
63 lines
1.9 KiB
YAML
name: leann-build
|
|
|
|
resources:
|
|
# Choose a GPU for fast embeddings (examples: L4, A10G, A100). CPU also works but is slower.
|
|
accelerators: L4:1
|
|
# Optionally pin a cloud, otherwise SkyPilot will auto-select
|
|
# cloud: aws
|
|
disk_size: 100
|
|
|
|
env:
|
|
# Build parameters (override with: sky launch -c leann-gpu sky/leann-build.yaml -e key=value)
|
|
index_name: my-index
|
|
docs: ./data
|
|
backend: hnsw # hnsw | diskann
|
|
complexity: 64
|
|
graph_degree: 32
|
|
num_threads: 8
|
|
# Embedding selection
|
|
embedding_mode: sentence-transformers # sentence-transformers | openai | mlx | ollama
|
|
embedding_model: facebook/contriever
|
|
# Storage/latency knobs
|
|
recompute: true # true => selective recomputation; false => store full embeddings
|
|
compact: true # for HNSW only: false when recompute=false
|
|
# Optional pass-through
|
|
extra_args: ""
|
|
|
|
# Sync local paths to the remote VM. Adjust as needed.
|
|
file_mounts:
|
|
# Example: mount your local data directory used for building
|
|
~/leann-data: ${docs}
|
|
|
|
setup: |
|
|
set -e
|
|
# Install uv (package manager)
|
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
export PATH="$HOME/.local/bin:$PATH"
|
|
|
|
# Install the LEANN CLI globally on the remote machine
|
|
uv tool install leann
|
|
|
|
run: |
|
|
export PATH="$HOME/.local/bin:$PATH"
|
|
# Derive flags from env
|
|
recompute_flag=""
|
|
if [ "${recompute}" = "false" ] || [ "${recompute}" = "0" ]; then
|
|
recompute_flag="--no-recompute"
|
|
fi
|
|
compact_flag=""
|
|
if [ "${compact}" = "false" ] || [ "${compact}" = "0" ]; then
|
|
compact_flag="--no-compact"
|
|
fi
|
|
|
|
# Build command
|
|
leann build ${index_name} \
|
|
--docs ~/leann-data \
|
|
--backend ${backend} \
|
|
--complexity ${complexity} \
|
|
--graph-degree ${graph_degree} \
|
|
--num-threads ${num_threads} \
|
|
--embedding-mode ${embedding_mode} \
|
|
--embedding-model ${embedding_model} \
|
|
${recompute_flag} ${compact_flag} ${extra_args}
|