This commit is contained in:
Alexia Jolicoeur-Martineau
2025-10-07 09:26:04 -04:00
commit 8120f2bdf7
39 changed files with 27428 additions and 0 deletions

24
config/arch/hrm.yaml Normal file
View File

@@ -0,0 +1,24 @@
name: recursive_reasoning.hrm@HierarchicalReasoningModel_ACTV1
loss:
name: losses@ACTLossHead
loss_type: stablemax_cross_entropy
halt_exploration_prob: 0.1
halt_max_steps: 16
H_cycles: 2
L_cycles: 2
H_layers: 4
L_layers: 4
hidden_size: 512
num_heads: 8 # min(2, hidden_size // 64)
expansion: 4
puzzle_emb_ndim: ${.hidden_size}
pos_encodings: rope
forward_dtype: bfloat16
mlp_t: False # use mlp on L instead of transformer

View File

@@ -0,0 +1,18 @@
name: recursive_reasoning.transformers_baseline@Model_ACTV2
loss:
name: losses@ACTLossHead
loss_type: stablemax_cross_entropy
halt_exploration_prob: 0.1
halt_max_steps: 16
H_cycles: 1 # kept for compatibility
H_layers: 8
hidden_size: 512
num_heads: 12
expansion: 4
puzzle_emb_ndim: ${.hidden_size}
pos_encodings: rope

26
config/arch/trm.yaml Normal file
View File

@@ -0,0 +1,26 @@
name: recursive_reasoning.trm@TinyRecursiveReasoningModel_ACTV1
loss:
name: losses@ACTLossHead
loss_type: stablemax_cross_entropy
halt_exploration_prob: 0.1
halt_max_steps: 16
H_cycles: 3
L_cycles: 6
H_layers: 0
L_layers: 2
hidden_size: 512
num_heads: 8 # min(2, hidden_size // 64)
expansion: 4
puzzle_emb_ndim: ${.hidden_size}
pos_encodings: rope
forward_dtype: bfloat16
mlp_t: False # use mlp on L instead of transformer
puzzle_emb_len: 16 # if non-zero, its specified to this value
no_ACT_continue: True # No continue ACT loss, only use the sigmoid of the halt which makes much more sense

View File

@@ -0,0 +1,26 @@
name: recursive_reasoning.trm_hier6@TinyRecursiveReasoningModel_ACTV1
loss:
name: losses@ACTLossHead
loss_type: stablemax_cross_entropy
halt_exploration_prob: 0.1
halt_max_steps: 16
H_cycles: 3
L_cycles: 6
H_layers: 0
L_layers: 2
hidden_size: 512
num_heads: 8 # min(2, hidden_size // 64)
expansion: 4
puzzle_emb_ndim: ${.hidden_size}
pos_encodings: rope
forward_dtype: bfloat16
mlp_t: False # use mlp on L instead of transformer
puzzle_emb_len: 16 # if non-zero, its specified to this value
no_ACT_continue: True # No continue ACT loss, only use the sigmoid of the halt which makes much more sense

View File

@@ -0,0 +1,26 @@
name: recursive_reasoning.trm_singlez@TinyRecursiveReasoningModel_ACTV1
loss:
name: losses@ACTLossHead
loss_type: stablemax_cross_entropy
halt_exploration_prob: 0.1
halt_max_steps: 16
H_cycles: 3
L_cycles: 6
H_layers: 0
L_layers: 2
hidden_size: 512
num_heads: 8 # min(2, hidden_size // 64)
expansion: 4
puzzle_emb_ndim: ${.hidden_size}
pos_encodings: rope
forward_dtype: bfloat16
mlp_t: False # use mlp on L instead of transformer
puzzle_emb_len: 16 # if non-zero, its specified to this value
no_ACT_continue: True # No continue ACT loss, only use the sigmoid of the halt which makes much more sense

42
config/cfg_pretrain.yaml Normal file
View File

@@ -0,0 +1,42 @@
# ARC training config
defaults:
- arch: trm
- _self_
hydra:
output_subdir: null
# Data path
data_paths: ['data/arc-aug-1000']
data_paths_test: []
evaluators:
- name: arc@ARC
# Hyperparams - Training
global_batch_size: 768
epochs: 100000
eval_interval: 10000
checkpoint_every_eval: True
lr: 1e-4
lr_min_ratio: 1.0
lr_warmup_steps: 2000
# Standard hyperparameter settings for LM, as used in Llama
beta1: 0.9
beta2: 0.95
weight_decay: 0.1
puzzle_emb_weight_decay: 0.1
# Hyperparams - Puzzle embeddings training
puzzle_emb_lr: 1e-2
seed: 0
min_eval_interval: 0 # when to start the eval
ema: False # use Exponential-Moving-Average
ema_rate: 0.999 # EMA-rate
freeze_weights: False # If True, freeze weights and only learn the embeddings