# ARC training config defaults: - arch: trm - _self_ hydra: output_subdir: null # Data path data_paths: ['data/arc-aug-1000'] data_paths_test: [] evaluators: - name: arc@ARC # Hyperparams - Training global_batch_size: 768 epochs: 100000 eval_interval: 10000 checkpoint_every_eval: True lr: 1e-4 lr_min_ratio: 1.0 lr_warmup_steps: 2000 # Standard hyperparameter settings for LM, as used in Llama beta1: 0.9 beta2: 0.95 weight_decay: 0.1 puzzle_emb_weight_decay: 0.1 # Hyperparams - Puzzle embeddings training puzzle_emb_lr: 1e-2 seed: 0 min_eval_interval: 0 # when to start the eval ema: False # use Exponential-Moving-Average ema_rate: 0.999 # EMA-rate freeze_weights: False # If True, freeze weights and only learn the embeddings