18 lines
330 B
YAML
18 lines
330 B
YAML
name: recursive_reasoning.transformers_baseline@Model_ACTV2
|
|
loss:
|
|
name: losses@ACTLossHead
|
|
loss_type: stablemax_cross_entropy
|
|
|
|
halt_exploration_prob: 0.1
|
|
halt_max_steps: 16
|
|
|
|
H_cycles: 1 # kept for compatibility
|
|
H_layers: 8
|
|
|
|
hidden_size: 512
|
|
num_heads: 12
|
|
expansion: 4
|
|
|
|
puzzle_emb_ndim: ${.hidden_size}
|
|
|
|
pos_encodings: rope |