upload
This commit is contained in:
42
config/cfg_pretrain.yaml
Normal file
42
config/cfg_pretrain.yaml
Normal file
@@ -0,0 +1,42 @@
|
||||
# ARC training config
|
||||
|
||||
defaults:
|
||||
- arch: trm
|
||||
- _self_
|
||||
|
||||
hydra:
|
||||
output_subdir: null
|
||||
|
||||
# Data path
|
||||
data_paths: ['data/arc-aug-1000']
|
||||
data_paths_test: []
|
||||
|
||||
evaluators:
|
||||
- name: arc@ARC
|
||||
|
||||
# Hyperparams - Training
|
||||
global_batch_size: 768
|
||||
|
||||
epochs: 100000
|
||||
eval_interval: 10000
|
||||
checkpoint_every_eval: True
|
||||
|
||||
lr: 1e-4
|
||||
lr_min_ratio: 1.0
|
||||
lr_warmup_steps: 2000
|
||||
|
||||
# Standard hyperparameter settings for LM, as used in Llama
|
||||
beta1: 0.9
|
||||
beta2: 0.95
|
||||
weight_decay: 0.1
|
||||
puzzle_emb_weight_decay: 0.1
|
||||
|
||||
# Hyperparams - Puzzle embeddings training
|
||||
puzzle_emb_lr: 1e-2
|
||||
|
||||
seed: 0
|
||||
min_eval_interval: 0 # when to start the eval
|
||||
|
||||
ema: False # use Exponential-Moving-Average
|
||||
ema_rate: 0.999 # EMA-rate
|
||||
freeze_weights: False # If True, freeze weights and only learn the embeddings
|
||||
Reference in New Issue
Block a user