From 49eac2f27a5bb98a7f7ecc1154918880aa55256c Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Wed, 30 Oct 2024 09:30:17 -0700 Subject: [PATCH] Fix #726 grad checkpointing + dynamo work in newer torch, but w/ ddp, ddp optimizer must be disabled --- src/open_clip_train/main.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/open_clip_train/main.py b/src/open_clip_train/main.py index 7c244ae35..b3e9b9b50 100644 --- a/src/open_clip_train/main.py +++ b/src/open_clip_train/main.py @@ -422,6 +422,12 @@ def main(args): original_model = model if args.torchcompile: logging.info('Compiling model...') + + if args.grad_checkpointing and args.distributed: + logging.info('Disabling DDP dynamo optimizer when grad checkpointing enabled.') + # As of now (~PyTorch 2.4/2.5), compile + checkpointing but DDP optimizer must be disabled + torch._dynamo.config.optimize_ddp = False + model = torch.compile(original_model) if 'train' not in data: