Skip to content

Commit

Permalink
add augmentation arguments to datamodules
Browse files Browse the repository at this point in the history
  • Loading branch information
djdameln committed Dec 16, 2024
1 parent 23df070 commit 5a12622
Show file tree
Hide file tree
Showing 41 changed files with 300 additions and 132 deletions.
35 changes: 30 additions & 5 deletions src/anomalib/data/datamodules/base/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (C) 2022-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import copy
import logging
from abc import ABC, abstractmethod
from pathlib import Path
Expand All @@ -12,6 +13,7 @@
from lightning.pytorch.trainer.states import TrainerFn
from lightning.pytorch.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
from torch.utils.data.dataloader import DataLoader
from torchvision.transforms.v2 import Transform

from anomalib import TaskType
from anomalib.data.utils import TestSplitMode, ValSplitMode, random_split, split_by_label
Expand All @@ -32,6 +34,14 @@ class AnomalibDataModule(LightningDataModule, ABC):
train_batch_size (int): Batch size used by the train dataloader.
eval_batch_size (int): Batch size used by the val and test dataloaders.
num_workers (int): Number of workers used by the train, val and test dataloaders.
train_augmentations (Transform | None): Augmentations to apply dto the training images
Defaults to ``None``.
val_augmentations (Transform | None): Augmentations to apply to the validation images.
Defaults to ``None``.
test_augmentations (Transform | None): Augmentations to apply to the test images.
Defaults to ``None``.
augmentations (Transform | None): General augmentations to apply if stage-specific
augmentations are not provided.
val_split_mode (ValSplitMode): Determines how the validation split is obtained.
Options: [none, same_as_test, from_test, synthetic]
val_split_ratio (float): Fraction of the train or test images held our for validation.
Expand All @@ -49,8 +59,12 @@ def __init__(
train_batch_size: int,
eval_batch_size: int,
num_workers: int,
val_split_mode: ValSplitMode | str,
val_split_ratio: float,
train_augmentations: Transform | None = None,
val_augmentations: Transform | None = None,
test_augmentations: Transform | None = None,
augmentations: Transform | None = None,
val_split_mode: ValSplitMode | str | None = None,
val_split_ratio: float | None = None,
test_split_mode: TestSplitMode | str | None = None,
test_split_ratio: float | None = None,
seed: int | None = None,
Expand All @@ -60,11 +74,15 @@ def __init__(
self.eval_batch_size = eval_batch_size
self.num_workers = num_workers
self.test_split_mode = TestSplitMode(test_split_mode) if test_split_mode else TestSplitMode.NONE
self.test_split_ratio = test_split_ratio
self.test_split_ratio = test_split_ratio or 0.5
self.val_split_mode = ValSplitMode(val_split_mode)
self.val_split_ratio = val_split_ratio
self.val_split_ratio = val_split_ratio or 0.5
self.seed = seed

self.train_augmentations = train_augmentations or augmentations
self.val_augmentations = val_augmentations or augmentations
self.test_augmentations = test_augmentations or augmentations

self.train_data: AnomalibDataset
self.val_data: AnomalibDataset
self.test_data: AnomalibDataset
Expand Down Expand Up @@ -95,6 +113,13 @@ def setup(self, stage: str | None = None) -> None:
# only set the flag if the stage is a TrainerFn, which means the setup has been called from a trainer
self._is_setup = True

if hasattr(self, "train_data"):
self.train_data.augmentations = self.train_augmentations
if hasattr(self, "val_data"):
self.val_data.augmentations = self.val_augmentations
if hasattr(self, "test_data"):
self.test_data.augmentations = self.test_augmentations

@abstractmethod
def _setup(self, _stage: str | None = None) -> None:
"""Set up the datasets and perform dynamic subset splitting.
Expand Down Expand Up @@ -175,7 +200,7 @@ def _create_val_split(self) -> None:
)
elif self.val_split_mode == ValSplitMode.SAME_AS_TEST:
# equal to test set
self.val_data = self.test_data
self.val_data = copy.deepcopy(self.test_data)
elif self.val_split_mode == ValSplitMode.SYNTHETIC:
# converted from random training sample
self.train_data, normal_val_data = random_split(self.train_data, self.val_split_ratio, seed=self.seed)
Expand Down
18 changes: 18 additions & 0 deletions src/anomalib/data/datamodules/depth/folder_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from pathlib import Path

from torchvision.transforms.v2 import Transform

from anomalib.data.datamodules.base.image import AnomalibDataModule
from anomalib.data.datasets.depth.folder_3d import Folder3DDataset
from anomalib.data.utils import Split, TestSplitMode, ValSplitMode
Expand Down Expand Up @@ -46,6 +48,14 @@ class Folder3D(AnomalibDataModule):
Defaults to ``32``.
num_workers (int, optional): Number of workers.
Defaults to ``8``.
train_augmentations (Transform | None): Augmentations to apply dto the training images
Defaults to ``None``.
val_augmentations (Transform | None): Augmentations to apply to the validation images.
Defaults to ``None``.
test_augmentations (Transform | None): Augmentations to apply to the test images.
Defaults to ``None``.
augmentations (Transform | None): General augmentations to apply if stage-specific
augmentations are not provided.
test_split_mode (TestSplitMode): Setting that determines how the testing subset is obtained.
Defaults to ``TestSplitMode.FROM_DIR``.
test_split_ratio (float): Fraction of images from the train set that will be reserved for testing.
Expand Down Expand Up @@ -73,6 +83,10 @@ def __init__(
train_batch_size: int = 32,
eval_batch_size: int = 32,
num_workers: int = 8,
train_augmentations: Transform | None = None,
val_augmentations: Transform | None = None,
test_augmentations: Transform | None = None,
augmentations: Transform | None = None,
test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR,
test_split_ratio: float = 0.2,
val_split_mode: ValSplitMode | str = ValSplitMode.FROM_TEST,
Expand All @@ -83,6 +97,10 @@ def __init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
train_augmentations=train_augmentations,
val_augmentations=val_augmentations,
test_augmentations=test_augmentations,
augmentations=augmentations,
test_split_mode=test_split_mode,
test_split_ratio=test_split_ratio,
val_split_mode=val_split_mode,
Expand Down
18 changes: 18 additions & 0 deletions src/anomalib/data/datamodules/depth/mvtec_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import logging
from pathlib import Path

from torchvision.transforms.v2 import Transform

from anomalib.data.datamodules.base.image import AnomalibDataModule
from anomalib.data.datasets.depth.mvtec_3d import MVTec3DDataset
from anomalib.data.utils import DownloadInfo, Split, TestSplitMode, ValSplitMode, download_and_extract
Expand Down Expand Up @@ -51,6 +53,14 @@ class MVTec3D(AnomalibDataModule):
Defaults to ``32``.
num_workers (int, optional): Number of workers.
Defaults to ``8``.
train_augmentations (Transform | None): Augmentations to apply dto the training images
Defaults to ``None``.
val_augmentations (Transform | None): Augmentations to apply to the validation images.
Defaults to ``None``.
test_augmentations (Transform | None): Augmentations to apply to the test images.
Defaults to ``None``.
augmentations (Transform | None): General augmentations to apply if stage-specific
augmentations are not provided.
test_split_mode (TestSplitMode): Setting that determines how the testing subset is obtained.
Defaults to ``TestSplitMode.FROM_DIR``.
test_split_ratio (float): Fraction of images from the train set that will be reserved for testing.
Expand All @@ -70,6 +80,10 @@ def __init__(
train_batch_size: int = 32,
eval_batch_size: int = 32,
num_workers: int = 8,
train_augmentations: Transform | None = None,
val_augmentations: Transform | None = None,
test_augmentations: Transform | None = None,
augmentations: Transform | None = None,
test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR,
test_split_ratio: float = 0.2,
val_split_mode: ValSplitMode | str = ValSplitMode.SAME_AS_TEST,
Expand All @@ -80,6 +94,10 @@ def __init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
train_augmentations=train_augmentations,
val_augmentations=val_augmentations,
test_augmentations=test_augmentations,
augmentations=augmentations,
test_split_mode=test_split_mode,
test_split_ratio=test_split_ratio,
val_split_mode=val_split_mode,
Expand Down
17 changes: 17 additions & 0 deletions src/anomalib/data/datamodules/image/btech.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pathlib import Path

import cv2
from torchvision.transforms.v2 import Transform
from tqdm import tqdm

from anomalib.data.datamodules.base.image import AnomalibDataModule
Expand Down Expand Up @@ -43,6 +44,14 @@ class BTech(AnomalibDataModule):
Defaults to ``32``.
num_workers (int, optional): Number of workers.
Defaults to ``8``.
train_augmentations (Transform | None): Augmentations to apply dto the training images
Defaults to ``None``.
val_augmentations (Transform | None): Augmentations to apply to the validation images.
Defaults to ``None``.
test_augmentations (Transform | None): Augmentations to apply to the test images.
Defaults to ``None``.
augmentations (Transform | None): General augmentations to apply if stage-specific
augmentations are not provided.
test_split_mode (TestSplitMode, optional): Setting that determines how the testing subset is obtained.
Defaults to ``TestSplitMode.FROM_DIR``.
test_split_ratio (float, optional): Fraction of images from the train set that will be reserved for testing.
Expand Down Expand Up @@ -99,6 +108,10 @@ def __init__(
train_batch_size: int = 32,
eval_batch_size: int = 32,
num_workers: int = 8,
train_augmentations: Transform | None = None,
val_augmentations: Transform | None = None,
test_augmentations: Transform | None = None,
augmentations: Transform | None = None,
test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR,
test_split_ratio: float = 0.2,
val_split_mode: ValSplitMode | str = ValSplitMode.SAME_AS_TEST,
Expand All @@ -109,6 +122,10 @@ def __init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
train_augmentations=train_augmentations,
val_augmentations=val_augmentations,
test_augmentations=test_augmentations,
augmentations=augmentations,
test_split_mode=test_split_mode,
test_split_ratio=test_split_ratio,
val_split_mode=val_split_mode,
Expand Down
20 changes: 16 additions & 4 deletions src/anomalib/data/datamodules/image/datumaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from pathlib import Path

from torchvision.transforms.v2 import Transform

from anomalib.data.datamodules.base import AnomalibDataModule
from anomalib.data.datasets.image.datumaro import DatumaroDataset
from anomalib.data.utils import Split, TestSplitMode, ValSplitMode
Expand All @@ -24,13 +26,15 @@ class Datumaro(AnomalibDataModule):
Defaults to ``32``.
num_workers (int): Number of workers for dataloaders.
Defaults to ``8``.
image_size (tuple[int, int], optional): Size to which input images should be resized.
train_augmentations (Transform | None): Augmentations to apply dto the training images
Defaults to ``None``.
transform (Transform, optional): Transforms that should be applied to the input images.
val_augmentations (Transform | None): Augmentations to apply to the validation images.
Defaults to ``None``.
train_transform (Transform, optional): Transforms that should be applied to the input images during training.
test_augmentations (Transform | None): Augmentations to apply to the test images.
Defaults to ``None``.
eval_transform (Transform, optional): Transforms that should be applied to the input images during evaluation.
augmentations (Transform | None): General augmentations to apply if stage-specific
augmentations are not provided.
image_size (tuple[int, int], optional): Size to which input images should be resized.
Defaults to ``None``.
test_split_mode (TestSplitMode): Setting that determines how the testing subset is obtained.
Defaults to ``TestSplitMode.FROM_DIR``.
Expand Down Expand Up @@ -65,6 +69,10 @@ def __init__(
train_batch_size: int = 32,
eval_batch_size: int = 32,
num_workers: int = 8,
train_augmentations: Transform | None = None,
val_augmentations: Transform | None = None,
test_augmentations: Transform | None = None,
augmentations: Transform | None = None,
test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR,
test_split_ratio: float = 0.5,
val_split_mode: ValSplitMode | str = ValSplitMode.FROM_TEST,
Expand All @@ -75,6 +83,10 @@ def __init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
train_augmentations=train_augmentations,
val_augmentations=val_augmentations,
test_augmentations=test_augmentations,
augmentations=augmentations,
val_split_mode=val_split_mode,
val_split_ratio=val_split_ratio,
test_split_mode=test_split_mode,
Expand Down
18 changes: 18 additions & 0 deletions src/anomalib/data/datamodules/image/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from collections.abc import Sequence
from pathlib import Path

from torchvision.transforms.v2 import Transform

from anomalib.data.datamodules.base.image import AnomalibDataModule
from anomalib.data.datasets.image.folder import FolderDataset
from anomalib.data.utils import Split, TestSplitMode, ValSplitMode
Expand Down Expand Up @@ -42,6 +44,14 @@ class Folder(AnomalibDataModule):
Defaults to ``32``.
num_workers (int, optional): Number of workers.
Defaults to ``8``.
train_augmentations (Transform | None): Augmentations to apply dto the training images
Defaults to ``None``.
val_augmentations (Transform | None): Augmentations to apply to the validation images.
Defaults to ``None``.
test_augmentations (Transform | None): Augmentations to apply to the test images.
Defaults to ``None``.
augmentations (Transform | None): General augmentations to apply if stage-specific
augmentations are not provided.
test_split_mode (TestSplitMode): Setting that determines how the testing subset is obtained.
Defaults to ``TestSplitMode.FROM_DIR``.
test_split_ratio (float): Fraction of images from the train set that will be reserved for testing.
Expand Down Expand Up @@ -119,6 +129,10 @@ def __init__(
train_batch_size: int = 32,
eval_batch_size: int = 32,
num_workers: int = 8,
train_augmentations: Transform | None = None,
val_augmentations: Transform | None = None,
test_augmentations: Transform | None = None,
augmentations: Transform | None = None,
test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR,
test_split_ratio: float = 0.2,
val_split_mode: ValSplitMode | str = ValSplitMode.FROM_TEST,
Expand All @@ -138,6 +152,10 @@ def __init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
train_augmentations=train_augmentations,
val_augmentations=val_augmentations,
test_augmentations=test_augmentations,
augmentations=augmentations,
test_split_mode=test_split_mode,
test_split_ratio=test_split_ratio,
val_split_mode=val_split_mode,
Expand Down
18 changes: 18 additions & 0 deletions src/anomalib/data/datamodules/image/kolektor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import logging
from pathlib import Path

from torchvision.transforms.v2 import Transform

from anomalib.data.datamodules.base.image import AnomalibDataModule
from anomalib.data.datasets.image.kolektor import KolektorDataset
from anomalib.data.utils import DownloadInfo, Split, TestSplitMode, ValSplitMode, download_and_extract
Expand All @@ -45,6 +47,14 @@ class Kolektor(AnomalibDataModule):
Defaults to ``32``.
num_workers (int, optional): Number of workers.
Defaults to ``8``.
train_augmentations (Transform | None): Augmentations to apply dto the training images
Defaults to ``None``.
val_augmentations (Transform | None): Augmentations to apply to the validation images.
Defaults to ``None``.
test_augmentations (Transform | None): Augmentations to apply to the test images.
Defaults to ``None``.
augmentations (Transform | None): General augmentations to apply if stage-specific
augmentations are not provided.
test_split_mode (TestSplitMode): Setting that determines how the testing subset is obtained.
Defaults to ``TestSplitMode.FROM_DIR``
test_split_ratio (float): Fraction of images from the train set that will be reserved for testing.
Expand All @@ -63,6 +73,10 @@ def __init__(
train_batch_size: int = 32,
eval_batch_size: int = 32,
num_workers: int = 8,
train_augmentations: Transform | None = None,
val_augmentations: Transform | None = None,
test_augmentations: Transform | None = None,
augmentations: Transform | None = None,
test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR,
test_split_ratio: float = 0.2,
val_split_mode: ValSplitMode | str = ValSplitMode.SAME_AS_TEST,
Expand All @@ -73,6 +87,10 @@ def __init__(
train_batch_size=train_batch_size,
eval_batch_size=eval_batch_size,
num_workers=num_workers,
train_augmentations=train_augmentations,
val_augmentations=val_augmentations,
test_augmentations=test_augmentations,
augmentations=augmentations,
test_split_mode=test_split_mode,
test_split_ratio=test_split_ratio,
val_split_mode=val_split_mode,
Expand Down
Loading

0 comments on commit 5a12622

Please sign in to comment.