From 31bebdea147c96f8a00a0d55931858bf727ae370 Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Mon, 5 Aug 2024 13:07:02 -0700 Subject: [PATCH 1/3] Fix torch export issue Summary: X-link: https://github.com/fairinternal/detectron2/pull/604 Pull Request resolved: https://github.com/facebookresearch/detectron2/pull/5334 Add a check to deal with dynamic shapes so that the model can be exported with `torch.export`. This check prevents the graph break caused by the SymInt by delaying the assertion to runtime. Reviewed By: wat3rBro Differential Revision: D60126415 fbshipit-source-id: a2a75530db523bfdde984b890595e02360d8e07f --- detectron2/export/c10.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/detectron2/export/c10.py b/detectron2/export/c10.py index adbc62bea7..35380af01a 100644 --- a/detectron2/export/c10.py +++ b/detectron2/export/c10.py @@ -84,6 +84,11 @@ def set(self, name, value): else: data_len = len(value) if len(self.batch_extra_fields): + # If we are tracing with Dynamo, the check here is needed since len(self) + # represents the number of bounding boxes detected in the image and thus is + # an unbounded SymInt. + if torch._utils.is_compiling(): + torch._check(len(self) == data_len) assert ( len(self) == data_len ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) From bcfd464d0c810f0442d91a349c0f6df945467143 Mon Sep 17 00:00:00 2001 From: generatedunixname89002005307016 Date: Fri, 9 Aug 2024 03:00:43 -0700 Subject: [PATCH 2/3] upgrade pyre version in `fbcode/vision` - batch 1 Differential Revision: D60992191 fbshipit-source-id: f826042c9d5b4f9b72b142fcef13f5772e3b9a8d --- projects/DensePose/densepose/modeling/losses/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/DensePose/densepose/modeling/losses/utils.py b/projects/DensePose/densepose/modeling/losses/utils.py index f865798760..f4475820c5 100644 --- a/projects/DensePose/densepose/modeling/losses/utils.py +++ b/projects/DensePose/densepose/modeling/losses/utils.py @@ -225,6 +225,7 @@ def resample_data( grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout) grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout) + # pyre-fixme[16]: `float` has no attribute `__getitem__`. dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout) dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout) x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout) From 5b72c27ae39f99db75d43f18fd1312e1ea934e60 Mon Sep 17 00:00:00 2001 From: Yanghan Wang Date: Thu, 22 Aug 2024 10:00:16 -0700 Subject: [PATCH 3/3] fix inference accuracy test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Pull Request resolved: https://github.com/facebookresearch/detectron2/pull/5348 Some accuracy tests started to fail in between Jun 11 and Jun 17: - ❌ mask_rcnn_R_50_FPN_inference_acc_test - ✅ keypoint_rcnn_R_50_FPN_inference_acc_test - ✅ fast_rcnn_R_50_FPN_inference_acc_test - ❌ panoptic_fpn_R_50_inference_acc_test - ✅ retinanet_R_50_FPN_inference_acc_test - ❌ rpn_R_50_FPN_inference_acc_test - ✅ semantic_R_50_FPN_inference_acc_test - ❌ cascade_mask_rcnn_R_50_FPN_inference_acc_test V1: update the yaml to reflect the new scores. V5: it turns out that we can match the old scores by disabling tf32. Reviewed By: balakv504 Differential Revision: D61301698 fbshipit-source-id: 60f17b03574fbde62c7a84f47bedff4fd040aaa8 --- ...mask_rcnn_R_50_FPN_inference_acc_test.yaml | 1 + ...fast_rcnn_R_50_FPN_inference_acc_test.yaml | 1 + ...oint_rcnn_R_50_FPN_inference_acc_test.yaml | 1 + .../mask_rcnn_R_50_C4_inference_acc_test.yaml | 1 + ...mask_rcnn_R_50_DC5_inference_acc_test.yaml | 1 + ...mask_rcnn_R_50_FPN_inference_acc_test.yaml | 1 + .../panoptic_fpn_R_50_inference_acc_test.yaml | 1 + .../rpn_R_50_FPN_inference_acc_test.yaml | 1 + detectron2/config/defaults.py | 4 +++ detectron2/engine/defaults.py | 32 +++++++++++++++++++ 10 files changed, 44 insertions(+) diff --git a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml index fc5a4116cb..b76788b6b4 100644 --- a/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml +++ b/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml @@ -5,3 +5,4 @@ DATASETS: TEST: ("coco_2017_val_100",) TEST: EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] +FLOAT32_PRECISION: "highest" diff --git a/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml index a2f37e5e2c..1be53eb7d7 100644 --- a/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml +++ b/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml @@ -5,3 +5,4 @@ DATASETS: TEST: ("coco_2017_val_100",) TEST: EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] +FLOAT32_PRECISION: "highest" diff --git a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml index 14cf2aa82a..df496c1f27 100644 --- a/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml +++ b/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml @@ -5,3 +5,4 @@ DATASETS: TEST: ("keypoints_coco_2017_val_100",) TEST: EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] +FLOAT32_PRECISION: "highest" diff --git a/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml index b2d5b7ff87..5f18275274 100644 --- a/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml +++ b/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml @@ -5,3 +5,4 @@ DATASETS: TEST: ("coco_2017_val_100",) TEST: EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] +FLOAT32_PRECISION: "highest" diff --git a/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml index e3ce6cf922..b72ffc9fe4 100644 --- a/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml +++ b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml @@ -5,3 +5,4 @@ DATASETS: TEST: ("coco_2017_val_100",) TEST: EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] +FLOAT32_PRECISION: "highest" diff --git a/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml index e5454bfd95..8cad72eb23 100644 --- a/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml +++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml @@ -8,3 +8,4 @@ TEST: AUG: ENABLED: True MIN_SIZES: (700, 800) # to save some time +FLOAT32_PRECISION: "highest" diff --git a/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml index 70874e3a92..f5429b6330 100644 --- a/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml +++ b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml @@ -5,3 +5,4 @@ DATASETS: TEST: ("coco_2017_val_100_panoptic_separated",) TEST: EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] +FLOAT32_PRECISION: "highest" diff --git a/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml index c7c3f908a9..aa17e742d7 100644 --- a/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml +++ b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml @@ -5,3 +5,4 @@ DATASETS: TEST: ("coco_2017_val_100",) TEST: EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] +FLOAT32_PRECISION: "highest" diff --git a/detectron2/config/defaults.py b/detectron2/config/defaults.py index 5d97ec92d2..506651730e 100644 --- a/detectron2/config/defaults.py +++ b/detectron2/config/defaults.py @@ -636,6 +636,10 @@ # for about 10k iterations. It usually hurts total time, but can benefit for certain models. # If input images have the same or similar sizes, benchmark is often helpful. _C.CUDNN_BENCHMARK = False +# Option to set PyTorch matmul and CuDNN's float32 precision. When set to non-empty string, +# the corresponding precision ("highest", "high" or "medium") will be used. The highest +# precision will effectively disable tf32. +_C.FLOAT32_PRECISION = "" # The period (in terms of steps) for minibatch visualization at train time. # Set to 0 to disable. _C.VIS_PERIOD = 0 diff --git a/detectron2/engine/defaults.py b/detectron2/engine/defaults.py index c649bf8ff7..3dbcd86b75 100644 --- a/detectron2/engine/defaults.py +++ b/detectron2/engine/defaults.py @@ -171,6 +171,30 @@ def _highlight(code, filename): return code +# adapted from: +# https://github.com/pytorch/tnt/blob/ebda066f8f55af6a906807d35bc829686618074d/torchtnt/utils/device.py#L328-L346 +def _set_float32_precision(precision: str = "high") -> None: + """Sets the precision of float32 matrix multiplications and convolution operations. + + For more information, see the PyTorch docs: + - https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html + - https://pytorch.org/docs/stable/backends.html#torch.backends.cudnn.allow_tf32 + + Args: + precision: The setting to determine which datatypes to use for matrix + multiplication and convolution operations. + """ + if not (torch.cuda.is_available()): # Not relevant for non-CUDA devices + return + # set precision for matrix multiplications + torch.set_float32_matmul_precision(precision) + # set precision for convolution operations + if precision == "highest": + torch.backends.cudnn.allow_tf32 = False + else: + torch.backends.cudnn.allow_tf32 = True + + def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: @@ -226,6 +250,14 @@ def default_setup(cfg, args): cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False ) + fp32_precision = _try_get_key(cfg, "FLOAT32_PRECISION", "train.float32_precision", default="") + if fp32_precision != "": + logger.info(f"Set fp32 precision to {fp32_precision}") + _set_float32_precision(fp32_precision) + logger.info(f"{torch.get_float32_matmul_precision()=}") + logger.info(f"{torch.backends.cuda.matmul.allow_tf32=}") + logger.info(f"{torch.backends.cudnn.allow_tf32=}") + def default_writers(output_dir: str, max_iter: Optional[int] = None): """