Skip to content

Commit

Permalink
Migrate to epochs: 1 in all configs (pytorch#1981)
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasjpfan authored Nov 11, 2024
1 parent e1caa9f commit ac4f88e
Show file tree
Hide file tree
Showing 37 changed files with 37 additions and 37 deletions.
2 changes: 1 addition & 1 deletion recipes/configs/dev/8B_full_experimental.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1

optimizer:
_component_: torch.optim.AdamW
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma/2B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: torch.optim.AdamW
fused: True
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma/2B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase virtual batch size
compile: False # pytorch compile, set to true for better perf/memory
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma/2B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase virtual batch size
compile: False # pytorch compile, set to true for better perf/memory
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma/2B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase virtual batch size
compile: False # pytorch compile, set to true for better perf/memory
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase virtual batch size
compile: False # pytorch compile, set to true for better perf/memory
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase virtual batch size
compile: False # pytorch compile, set to true for better perf/memory
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma2/27B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1
compile: False # pytorch compile, set to true for perf/memory improvement
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma2/27B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 4
compile: False # pytorch compile, set to true for perf/memory improvement
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma2/2B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: torch.optim.AdamW
fused: True
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma2/2B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1
compile: False # pytorch compile, set to true for perf/memory improvement
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma2/2B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ loss:

# Fine-tuning arguments
batch_size: 8
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 2
compile: False # pytorch compile, set to true for perf/memory improvement
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma2/2B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 4
compile: False # pytorch compile, set to true for perf/memory improvement
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma2/9B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1
compile: False # pytorch compile, set to true for perf/memory improvement
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/gemma2/9B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 4
compile: False # pytorch compile, set to true for perf/memory improvement
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama2/13B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ shuffle: True

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: torch.optim.AdamW
fused: True
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama2/7B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: torch.optim.AdamW
fused: True
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama2/7B_full_low_memory.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: bitsandbytes.optim.PagedAdamW
lr: 1e-5
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama2/7B_qat_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: torch.optim.AdamW
fused: True
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3/70B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1

optimizer:
_component_: torch.optim.AdamW
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3/8B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1

optimizer:
_component_: torch.optim.AdamW
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3/8B_full_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: bitsandbytes.optim.PagedAdamW8bit
lr: 1e-5
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3/8B_qat_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1

# QAT arguments
quantizer:
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_1/70B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1

optimizer:
_component_: torch.optim.AdamW
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_1/8B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1

optimizer:
_component_: torch.optim.AdamW
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_1/8B_full_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: bitsandbytes.optim.PagedAdamW8bit
lr: 2e-5
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_2/1B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1

optimizer:
_component_: torch.optim.AdamW
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_2/1B_full_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
optimizer:
_component_: bitsandbytes.optim.PagedAdamW8bit
lr: 2e-5
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_2/3B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1

optimizer:
_component_: torch.optim.AdamW
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama3_2/3B_full_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
optimizer:
_component_: bitsandbytes.optim.PagedAdamW8bit
lr: 2e-5
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/mistral/7B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: torch.optim.AdamW
fused: True
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/mistral/7B_full_low_memory.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: bitsandbytes.optim.PagedAdamW
lr: 5e-6
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/mistral/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase virtual batch size
compile: False # pytorch compile, set to true for better perf/memory
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/mistral/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase virtual batch size
compile: False # pytorch compile, set to true for better perf/memory
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/mistral/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ loss:

# Fine-tuning arguments
batch_size: 4
epochs: 3
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase virtual batch size
compile: False # pytorch compile, set to true for better perf/memory
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/qwen2/1.5B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: torch.optim.AdamW
fused: True
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/qwen2_5/1_5B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
epochs: 1
optimizer:
_component_: torch.optim.AdamW
fused: True
Expand Down

0 comments on commit ac4f88e

Please sign in to comment.