Skip to content

Commit

Permalink
more
Browse files Browse the repository at this point in the history
  • Loading branch information
albertz committed Nov 6, 2024
1 parent 69423e5 commit 432a729
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions users/zeyer/experiments/exp2024_04_23_baselines/claix2023.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,42 @@ def py():
env_updates={"PYTORCH_CUDA_ALLOC_CONF": "backend:cudaMallocAsync,expandable_segments:True"},
)

# Normalize by num seqs, sum over frames.
train(
"lm/trafo-n24-d512-gelu-drop0-b2k_80k-laplace100k-spm10k-lossSeqNorm",
config=dict_update_deep(
config_96gb_bf16_accgrad1,
{
**_get_cfg_lrlin_oclr_by_bs_nep_v3(80_000, 100, batch_size_factor=1),
"max_seqs": 2_000,
"optimizer.weight_decay": 1e-2,
"calculate_exp_loss": True,
"use_normalized_loss": "seqs",
},
),
post_config={"log_grad_norm": True},
train_dataset=get_librispeech_lm_dataset(
vocab="spm10k", train_epoch_split=20, train_sort_laplace_num_seqs=100_000
),
model_def=ModelDefWithCfg(
lm_model_def,
{
"_model_def_dict": rf.build_dict(
TransformerDecoder,
encoder_dim=None,
num_layers=24,
model_dim=512,
ff_activation=rf.build_dict(rf.gelu),
dropout=0.0,
att_dropout=0.0,
)
},
),
train_def=lm_train_def,
# avoid oom
env_updates={"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True"},
)

from returnn.util.math import PiecewiseLinear

# Try warmup of batch size (warmupBs).
Expand Down

0 comments on commit 432a729

Please sign in to comment.