42 lines
749 B
YAML
42 lines
749 B
YAML
# ARC training config
|
|
|
|
defaults:
|
|
- arch: trm
|
|
- _self_
|
|
|
|
hydra:
|
|
output_subdir: null
|
|
|
|
# Data path
|
|
data_paths: ['data/arc-aug-1000']
|
|
data_paths_test: []
|
|
|
|
evaluators:
|
|
- name: arc@ARC
|
|
|
|
# Hyperparams - Training
|
|
global_batch_size: 768
|
|
|
|
epochs: 100000
|
|
eval_interval: 10000
|
|
checkpoint_every_eval: True
|
|
|
|
lr: 1e-4
|
|
lr_min_ratio: 1.0
|
|
lr_warmup_steps: 2000
|
|
|
|
# Standard hyperparameter settings for LM, as used in Llama
|
|
beta1: 0.9
|
|
beta2: 0.95
|
|
weight_decay: 0.1
|
|
puzzle_emb_weight_decay: 0.1
|
|
|
|
# Hyperparams - Puzzle embeddings training
|
|
puzzle_emb_lr: 1e-2
|
|
|
|
seed: 0
|
|
min_eval_interval: 0 # when to start the eval
|
|
|
|
ema: False # use Exponential-Moving-Average
|
|
ema_rate: 0.999 # EMA-rate
|
|
freeze_weights: False # If True, freeze weights and only learn the embeddings |