-
Notifications
You must be signed in to change notification settings - Fork 18
/
finetune_promoter_rmt_pretrained.sh
executable file
·74 lines (66 loc) · 2.65 KB
/
finetune_promoter_rmt_pretrained.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env bash
set -e
# change dir to the repository root
cd ../..
BASE_MODEL=rmt_bert_base_lastln_t2t_1000G_seglen_512_len_3992_maxnsegm_8_msz_10_bptt-1_bs256_lr_1e-05_wd_1e-04_fp16_O2
BASE_CKPTS=(model_450000)
TOKENIZER=./data/tokenizers/t2t_1000h_multi_32k/
CONFIG=./data/configs/L12-H768-A12-V32k-preln-lastln.json
OPT=AdamW
SCHEDULER=constant_with_warmup
TASK=epdnew_promoter
LEN=16000_fxd
ITERS=50000
TBS=128 # total batch size
BS=16 # * grad_acc_steps = per gpu batch size
PATIENCE=10
WD=0.0
LR=5e-05
BODY_LR_MULT=1.0
CLIP_NORM=1.0
BPE_DROPOUT=0.0
# RMT
INPUT_SIZE=512 # segment length
MAX_N_SEGMENTS=6
MEMORY_SIZE=10
HOME_PATH=/home/jovyan
DATA_PATH=${HOME_PATH}/data
PRETRAINED_PATH=${HOME_PATH}/t5-experiments/runs
for N in 1 2 3 4 5
do
for (( i=0; i<${#BASE_CKPTS[@]}; i++ ))
do
# rmt_params=rmt_seglen_${INPUT_SIZE}_msz_${MEMORY_SIZE}_sum_loss
rmt_params=rmt_seglen_${INPUT_SIZE}_msz_${MEMORY_SIZE}
BASE_CKPT=${BASE_CKPTS[i]}
MODEL_PATH=./runs/${TASK}_${LEN}_rmt/${BASE_MODEL}/${BASE_CKPT}/${rmt_params}_lr${LR}_body_m${BODY_LR_MULT}_${OPT}_${SCHEDULER}_wd${WD}_cgn${CLIP_NORM}_bpe${BPE_DROPOUT}_p${PATIENCE}_bs${TBS}_it${ITERS}/run_${N}
echo $MODEL_PATH
horovodrun --gloo -np $NP python -m downstream_tasks.promoter_prediction.run_promoter_finetuning_rmt \
--data_path ${DATA_PATH}/downstream_tasks/${TASK}/len_${LEN}/split_${N}/train \
--valid_data_path ${DATA_PATH}/downstream_tasks/${TASK}/len_${LEN}/split_${N}/valid \
--test_data_path ${DATA_PATH}/downstream_tasks/${TASK}/len_${LEN}/split_${N}/test \
--model_path $MODEL_PATH \
--tokenizer $TOKENIZER --model_cfg $CONFIG \
--backbone_cls src.gena_lm.modeling_bert:BertForSequenceClassification \
--model_cls src.gena_lm.modeling_rmt:RMTEncoderForSequenceClassification \
--init_checkpoint ${PRETRAINED_PATH}/${BASE_MODEL}/${BASE_CKPT}.pth \
--input_seq_len 4096 --data_n_workers 2 \
--input_size $INPUT_SIZE \
--num_mem_tokens $MEMORY_SIZE \
--max_n_segments $MAX_N_SEGMENTS \
--backbone_trainable \
--bptt_depth -1 \
--iters $ITERS \
--batch_size $BS --gradient_accumulation_steps $(($TBS/($BS*$NP))) \
--lr $LR --lr_scheduler $SCHEDULER --num_warmup_steps 250 \
--body_lr_multiplier ${BODY_LR_MULT} \
--optimizer ${OPT} --weight_decay $WD \
--bpe_dropout ${BPE_DROPOUT} \
--reset_lr --reset_optimizer --reset_iteration \
--optimize_metric f1 --optimize_mode max --save_best \
--log_interval 100 --valid_interval 100 --early_stopping_patience $PATIENCE \
--clip_grad_norm $CLIP_NORM \
--seed $(($N+42))
done
done
echo "done"