From 7c16e4739989b4b4ec196716e89c01601d5c819b Mon Sep 17 00:00:00 2001
From: Drejc Pesjak <47791324+DrejcPesjak@users.noreply.github.com>
Date: Tue, 20 Feb 2024 03:44:01 +0100
Subject: [PATCH 01/75] Resnet Variants (#9)

* Added ResNet variants

* ResNet50 example

* Simplified example

* fixed resnet config

* [Automated] Updated coverage badge

---------

Co-authored-by: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/resnet_model.yaml                     | 57 +++++++++++++++++++
 luxonis_train/nodes/README.md                 | 13 +++--
 luxonis_train/nodes/__init__.py               |  4 +-
 .../nodes/{resnet18.py => resnet.py}          | 30 +++++++---
 media/coverage_badge.svg                      |  4 +-
 5 files changed, 91 insertions(+), 17 deletions(-)
 create mode 100644 configs/resnet_model.yaml
 rename luxonis_train/nodes/{resnet18.py => resnet.py} (61%)

diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml
new file mode 100644
index 00000000..7e93d269
--- /dev/null
+++ b/configs/resnet_model.yaml
@@ -0,0 +1,57 @@
+
+model:
+  name: resnet50_classification
+  nodes:
+    - name: ResNet
+      variant: "50"
+      download_weights: True
+
+    - name: ClassificationHead
+      inputs:
+        - ResNet
+
+  losses:
+    - name: CrossEntropyLoss
+      attached_to: ClassificationHead
+
+  metrics:
+    - name: Accuracy
+      is_main_metric: true
+      attached_to: ClassificationHead
+
+  visualizers:
+    - name: ClassificationVisualizer
+      attached_to: ClassificationHead
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+dataset:
+  name: cifar10_test
+
+trainer:
+  batch_size: 4
+  epochs: &epochs 200
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  preprocessing:
+    train_image_size: [&height 224, &width 224]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.02
+
+  scheduler:
+    name: ConstantLR
diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md
index bd44ac5a..637c5026 100644
--- a/luxonis_train/nodes/README.md
+++ b/luxonis_train/nodes/README.md
@@ -5,7 +5,7 @@ arbitrarily as long as the two nodes are compatible with each other.
 
 ## Table Of Contents
 
-- [ResNet18](#resnet18)
+- [ResNet](#resnet)
 - [MicroNet](#micronet)
 - [RepVGG](#repvgg)
 - [EfficientRep](#efficientrep)
@@ -30,15 +30,16 @@ Every node takes these parameters:
 
 Additional parameters for specific nodes are listed below.
 
-## ResNet18
+## ResNet
 
-Adapted from [here](https://pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html).
+Adapted from [here](https://pytorch.org/vision/main/models/resnet.html).
 
 **Params**
 
-| Key              | Type | Default value | Description                            |
-| ---------------- | ---- | ------------- | -------------------------------------- |
-| download_weights | bool | False         | If True download weights from imagenet |
+| Key              | Type                                      | Default value | Description                            |
+| ---------------- | ----------------------------------------- | ------------- | -------------------------------------- |
+| variant          | Literal\["18", "34", "50", "101", "152"\] | "18"          | Variant of the network.                |
+| download_weights | bool                                      | False         | If True download weights from imagenet |
 
 ## MicroNet
 
diff --git a/luxonis_train/nodes/__init__.py b/luxonis_train/nodes/__init__.py
index d7ec70d0..954db2be 100644
--- a/luxonis_train/nodes/__init__.py
+++ b/luxonis_train/nodes/__init__.py
@@ -10,7 +10,7 @@
 from .mobileone import MobileOne
 from .reppan_neck import RepPANNeck
 from .repvgg import RepVGG
-from .resnet18 import ResNet18
+from .resnet import ResNet
 from .rexnetv1 import ReXNetV1_lite
 from .segmentation_head import SegmentationHead
 
@@ -28,6 +28,6 @@
     "ReXNetV1_lite",
     "RepPANNeck",
     "RepVGG",
-    "ResNet18",
+    "ResNet",
     "SegmentationHead",
 ]
diff --git a/luxonis_train/nodes/resnet18.py b/luxonis_train/nodes/resnet.py
similarity index 61%
rename from luxonis_train/nodes/resnet18.py
rename to luxonis_train/nodes/resnet.py
index 9c38681a..14ff8066 100644
--- a/luxonis_train/nodes/resnet18.py
+++ b/luxonis_train/nodes/resnet.py
@@ -1,10 +1,9 @@
-"""ResNet18 backbone.
+"""ResNet backbone.
 
-Source: U{https://pytorch.org/vision/main/models/generated/
-torchvision.models.resnet18.html}
+Source: U{https://pytorch.org/vision/main/models/resnet.html}
 @license: U{PyTorch<https://github.com/pytorch/pytorch/blob/master/LICENSE>}
 """
-
+from typing import Literal
 
 import torchvision
 from torch import Tensor
@@ -12,19 +11,22 @@
 from .base_node import BaseNode
 
 
-class ResNet18(BaseNode[Tensor, list[Tensor]]):
+class ResNet(BaseNode[Tensor, list[Tensor]]):
     attach_index: int = -1
 
     def __init__(
         self,
+        variant: Literal["18", "34", "50", "101", "152"] = "18",
         channels_list: list[int] | None = None,
         download_weights: bool = False,
         **kwargs,
     ):
-        """Implementation of the ResNet18 backbone.
+        """Implementation of the ResNetX backbone.
 
         TODO: add more info
 
+        @type variant: Literal["18", "34", "50", "101", "152"]
+        @param variant: ResNet variant. Defaults to "18".
         @type channels_list: list[int] | None
         @param channels_list: List of channels to return.
             If unset, defaults to [64, 128, 256, 512].
@@ -35,7 +37,12 @@ def __init__(
         """
         super().__init__(**kwargs)
 
-        self.backbone = torchvision.models.resnet18(
+        if variant not in RESNET_VARIANTS:
+            raise ValueError(
+                f"ResNet model variant should be in {list(RESNET_VARIANTS.keys())}"
+            )
+
+        self.backbone = RESNET_VARIANTS[variant](
             weights="DEFAULT" if download_weights else None
         )
         self.channels_list = channels_list or [64, 128, 256, 512]
@@ -57,3 +64,12 @@ def forward(self, x: Tensor) -> list[Tensor]:
         outs.append(x)
 
         return outs
+
+
+RESNET_VARIANTS = {
+    "18": torchvision.models.resnet18,
+    "34": torchvision.models.resnet34,
+    "50": torchvision.models.resnet50,
+    "101": torchvision.models.resnet101,
+    "152": torchvision.models.resnet152,
+}
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 12876e69..4033e89e 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">78%</text>
-        <text x="80" y="14">78%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">79%</text>
+        <text x="80" y="14">79%</text>
     </g>
 </svg>

From 8e35f25e21ebc70ae1a5a421a35ffd412f24765d Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Tue, 20 Feb 2024 03:44:24 +0100
Subject: [PATCH 02/75] MLFlow Upload Fix (#10)

* fixed incorrect class property call

* fixed exporter uploading

* uploadCheckpoint uploads on every checkpoint epoch

* fix temp files names

* updated callback readme

* pre-commit run
---
 luxonis_train/callbacks/README.md             |  9 +++
 luxonis_train/callbacks/__init__.py           |  4 +-
 .../callbacks/export_on_train_end.py          |  4 +-
 luxonis_train/callbacks/upload_checkpoint.py  | 61 +++++++++++++++++++
 .../upload_checkpoint_on_train_end.py         | 41 -------------
 luxonis_train/core/exporter.py                |  6 +-
 6 files changed, 78 insertions(+), 47 deletions(-)
 create mode 100644 luxonis_train/callbacks/upload_checkpoint.py
 delete mode 100644 luxonis_train/callbacks/upload_checkpoint_on_train_end.py

diff --git a/luxonis_train/callbacks/README.md b/luxonis_train/callbacks/README.md
index d8e3da74..be441017 100644
--- a/luxonis_train/callbacks/README.md
+++ b/luxonis_train/callbacks/README.md
@@ -9,6 +9,7 @@ List of all supported callbacks.
 - [LuxonisProgressBar](#luxonisprogressbar)
 - [MetadataLogger](#metadatalogger)
 - [TestOnTrainEnd](#testontrainend)
+- [UploadCheckpoint](#uploadcheckpoint)
 
 ## PytorchLightning Callbacks
 
@@ -51,3 +52,11 @@ Metadata include all defined hyperparameters together with git hashes of `luxoni
 ## TestOnTrainEnd
 
 Callback to perform a test run at the end of the training.
+
+## UploadCheckpoint
+
+Callback that uploads currently best checkpoint (based on validation loss) to specified cloud directory after every validation epoch.
+
+| Key              | Type | Default value | Description                                                                                                                   |
+| ---------------- | ---- | ------------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| upload_directory | str  | /             | Path to cloud directory where checkpoints should be uploaded to. If you want to use current mlflow run set it to `mlflow://`. |
diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index 4be94600..cec9e000 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -13,7 +13,7 @@
 from .metadata_logger import MetadataLogger
 from .module_freezer import ModuleFreezer
 from .test_on_train_end import TestOnTrainEnd
-from .upload_checkpoint_on_train_end import UploadCheckpointOnTrainEnd
+from .upload_checkpoint import UploadCheckpoint
 
 CALLBACKS.register_module(module=EarlyStopping)
 CALLBACKS.register_module(module=LearningRateMonitor)
@@ -28,5 +28,5 @@
     "MetadataLogger",
     "ModuleFreezer",
     "TestOnTrainEnd",
-    "UploadCheckpointOnTrainEnd",
+    "UploadCheckpoint",
 ]
diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py
index de5fde88..923267c1 100644
--- a/luxonis_train/callbacks/export_on_train_end.py
+++ b/luxonis_train/callbacks/export_on_train_end.py
@@ -51,8 +51,8 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
         if self.upload_to_mlflow:
             if cfg.tracker.is_mlflow:
                 tracker = cast(LuxonisTrackerPL, trainer.logger)
-                new_upload_directory = f"mlflow://{tracker.project_id}/{tracker.run_id}"
-                cfg.exporter.upload_directory = new_upload_directory
+                new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
+                cfg.exporter.upload_url = new_upload_url
             else:
                 logging.getLogger(__name__).warning(
                     "`upload_to_mlflow` is set to True, "
diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
new file mode 100644
index 00000000..a0fa137a
--- /dev/null
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -0,0 +1,61 @@
+import logging
+import os
+from typing import Any
+
+import lightning.pytorch as pl
+import torch
+from luxonis_ml.utils.filesystem import LuxonisFileSystem
+
+from luxonis_train.utils.registry import CALLBACKS
+
+
+@CALLBACKS.register_module()
+class UploadCheckpoint(pl.Callback):
+    """Callback that uploads best checkpoint based on the validation loss."""
+
+    def __init__(self, upload_directory: str):
+        """Constructs `UploadCheckpoint`.
+
+        @type upload_directory: str
+        @param upload_directory: Path used as upload directory
+        """
+        super().__init__()
+        self.fs = LuxonisFileSystem(
+            upload_directory, allow_active_mlflow_run=True, allow_local=False
+        )
+        self.logger = logging.getLogger(__name__)
+        self.last_logged_epoch = None
+        self.last_best_checkpoint = None
+
+    def on_save_checkpoint(
+        self,
+        trainer: pl.Trainer,
+        pl_module: pl.LightningModule,
+        checkpoint: dict[str, Any],
+    ) -> None:
+        # Log only once per epoch in case there are multiple ModelCheckpoint callbacks
+        if not self.last_logged_epoch == trainer.current_epoch:
+            model_checkpoint_callbacks = [
+                c
+                for c in trainer.callbacks  # type: ignore
+                if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
+            ]
+            # NOTE: assume that first checkpoint callback is based on val loss
+            curr_best_checkpoint = model_checkpoint_callbacks[0].best_model_path
+
+            if self.last_best_checkpoint != curr_best_checkpoint:
+                self.logger.info(f"Started checkpoint upload to {self.fs.full_path}...")
+                temp_filename = "curr_best_val_loss.ckpt"
+                torch.save(checkpoint, temp_filename)
+                self.fs.put_file(
+                    local_path=temp_filename,
+                    remote_path=temp_filename,
+                    mlflow_instance=trainer.logger.experiment.get(  # type: ignore
+                        "mlflow", None
+                    ),
+                )
+                os.remove(temp_filename)
+                self.logger.info("Checkpoint upload finished")
+                self.last_best_checkpoint = curr_best_checkpoint
+
+            self.last_logged_epoch = trainer.current_epoch
diff --git a/luxonis_train/callbacks/upload_checkpoint_on_train_end.py b/luxonis_train/callbacks/upload_checkpoint_on_train_end.py
deleted file mode 100644
index 86879ec9..00000000
--- a/luxonis_train/callbacks/upload_checkpoint_on_train_end.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import logging
-
-import lightning.pytorch as pl
-from luxonis_ml.utils.filesystem import LuxonisFileSystem
-
-from luxonis_train.utils.registry import CALLBACKS
-
-
-@CALLBACKS.register_module()
-class UploadCheckpointOnTrainEnd(pl.Callback):
-    """Callback that uploads best checkpoint based on the validation loss."""
-
-    def __init__(self, upload_directory: str):
-        """Constructs `UploadCheckpointOnTrainEnd`.
-
-        @type upload_directory: str
-        @param upload_directory: Path used as upload directory
-        """
-        super().__init__()
-        self.fs = LuxonisFileSystem(
-            upload_directory, allow_active_mlflow_run=True, allow_local=False
-        )
-
-    def on_train_end(self, trainer: pl.Trainer, _: pl.LightningModule) -> None:
-        logger = logging.getLogger(__name__)
-        logger.info(f"Started checkpoint upload to {self.fs.full_path()}...")
-        model_checkpoint_callbacks = [
-            c
-            for c in trainer.callbacks  # type: ignore
-            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
-        ]
-        # NOTE: assume that first checkpoint callback is based on val loss
-        local_path = model_checkpoint_callbacks[0].best_model_path
-        self.fs.put_file(
-            local_path=local_path,
-            remote_path=local_path.split("/")[-1],
-            mlflow_instance=trainer.logger.experiment.get(  # type: ignore
-                "mlflow", None
-            ),
-        )
-        logger.info("Checkpoint upload finished")
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index ab73ce72..7ed94f45 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -200,7 +200,7 @@ def _upload(self, files_to_upload: list[str]):
                 remote_path=self.cfg.exporter.export_model_name + suffix,
             )
 
-        with tempfile.TemporaryFile() as f:
+        with tempfile.NamedTemporaryFile(prefix="config", suffix=".yaml") as f:
             self.cfg.save_data(f.name)
             fs.put_file(local_path=f.name, remote_path="config.yaml")
 
@@ -209,7 +209,9 @@ def _upload(self, files_to_upload: list[str]):
         )
         modelconverter_config = self._get_modelconverter_config(onnx_path)
 
-        with tempfile.TemporaryFile() as f:
+        with tempfile.NamedTemporaryFile(
+            prefix="config_export", suffix=".yaml", mode="w+"
+        ) as f:
             yaml.dump(modelconverter_config, f, default_flow_style=False)
             fs.put_file(local_path=f.name, remote_path="config_export.yaml")
 

From 15bd923479283bdc0eb4a7e390974a495a380123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 21 Feb 2024 02:15:43 +0100
Subject: [PATCH 03/75] CLI Source Option (#11)

* option to source custom code in CLI

* removed empty dicts

* [Automated] Updated coverage badge

---------

Co-authored-by: GitHub Actions <actions@github.com>
---
 luxonis_train/__main__.py      |  17 ++-
 luxonis_train/core/exporter.py |   5 +-
 media/coverage_badge.svg       |   4 +-
 pyproject.toml                 |   2 +-
 tools/main.py                  | 226 ---------------------------------
 5 files changed, 15 insertions(+), 239 deletions(-)
 delete mode 100644 tools/main.py

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 73843593..24cfd69b 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -214,13 +214,18 @@ def common(
             "--version", callback=version_callback, help="Show version and exit."
         ),
     ] = False,
+    source: Annotated[
+        Optional[Path],
+        typer.Option(
+            help="Path to a python file with custom components. "
+            "Will be sourced before running the command.",
+            metavar="FILE",
+        ),
+    ] = None,
 ):
-    ...
-
-
-def main():
-    app()
+    if source:
+        exec(source.read_text())
 
 
 if __name__ == "__main__":
-    main()
+    app()
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 7ed94f45..6602a040 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -18,15 +18,12 @@
 
 
 class Exporter(Core):
-    """Main API which is used to create the model, setup pytorch lightning environment
-    and perform training based on provided arguments and config."""
-
     def __init__(
         self,
         cfg: str | dict[str, Any] | Config,
         opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
     ):
-        """Constructs a new Exporter instance.
+        """Provides an interface for exporting models to .onnx and .blob formats.
 
         @type cfg: str | dict[str, Any] | Config
         @param cfg: Path to config file or config dict used to setup training.
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 4033e89e..7a18c7f4 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">79%</text>
-        <text x="80" y="14">79%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
+        <text x="80" y="14">80%</text>
     </g>
 </svg>
diff --git a/pyproject.toml b/pyproject.toml
index 048c005b..2093e25b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ classifiers = [
 ]
 
 [project.scripts]
-luxonis_train = "tools.main:main"
+luxonis_train = "luxonis_train.__main__:app"
 
 [project.urls]
 repository = "https://github.com/luxonis/luxonis-train"
diff --git a/tools/main.py b/tools/main.py
deleted file mode 100644
index 73843593..00000000
--- a/tools/main.py
+++ /dev/null
@@ -1,226 +0,0 @@
-import os
-from enum import Enum
-from importlib.metadata import version
-from pathlib import Path
-from typing import Annotated, Optional
-
-import cv2
-import torch
-import typer
-
-app = typer.Typer(help="Luxonis Train CLI", add_completion=False)
-
-
-class View(str, Enum):
-    train = "train"
-    val = "val"
-    test = "test"
-
-    def __str__(self):
-        return self.value
-
-
-ConfigType = Annotated[
-    Optional[Path],
-    typer.Option(
-        help="Path to the configuration file.",
-        show_default=False,
-    ),
-]
-
-OptsType = Annotated[
-    Optional[list[str]],
-    typer.Argument(
-        help="A list of optional CLI overrides of the config file.",
-        show_default=False,
-    ),
-]
-
-ViewType = Annotated[View, typer.Option(help="Which dataset view to use.")]
-
-SaveDirType = Annotated[
-    Optional[Path],
-    typer.Option(help="Where to save the inference results."),
-]
-
-
-@app.command()
-def train(config: ConfigType = None, opts: OptsType = None):
-    """Start training."""
-    from luxonis_train.core import Trainer
-
-    Trainer(str(config), opts).train()
-
-
-@app.command()
-def eval(config: ConfigType = None, view: ViewType = View.val, opts: OptsType = None):
-    """Evaluate model."""
-    from luxonis_train.core import Trainer
-
-    Trainer(str(config), opts).test(view=view.name)
-
-
-@app.command()
-def tune(config: ConfigType = None, opts: OptsType = None):
-    """Start hyperparameter tuning."""
-    from luxonis_train.core import Tuner
-
-    Tuner(str(config), opts).tune()
-
-
-@app.command()
-def export(config: ConfigType = None, opts: OptsType = None):
-    """Export model."""
-    from luxonis_train.core import Exporter
-
-    Exporter(str(config), opts).export()
-
-
-@app.command()
-def infer(
-    config: ConfigType = None,
-    view: ViewType = View.val,
-    save_dir: SaveDirType = None,
-    opts: OptsType = None,
-):
-    """Run inference."""
-    from luxonis_train.core import Inferer
-
-    Inferer(str(config), opts, view=view.name, save_dir=save_dir).infer()
-
-
-@app.command()
-def inspect(
-    config: ConfigType = None,
-    view: ViewType = View.val,
-    save_dir: SaveDirType = None,
-    opts: OptsType = None,
-):
-    """Inspect dataset."""
-    from luxonis_ml.data import (
-        LuxonisDataset,
-        TrainAugmentations,
-        ValAugmentations,
-    )
-
-    from luxonis_train.attached_modules.visualizers.utils import (
-        draw_bounding_box_labels,
-        draw_keypoint_labels,
-        draw_segmentation_labels,
-        get_unnormalized_images,
-    )
-    from luxonis_train.utils.config import Config
-    from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
-    from luxonis_train.utils.types import LabelType
-
-    overrides = {}
-    if opts:
-        if len(opts) % 2 != 0:
-            raise ValueError("Override options should be a list of key-value pairs")
-
-        for i in range(0, len(opts), 2):
-            overrides[opts[i]] = opts[i + 1]
-
-    cfg = Config.get_config(str(config), overrides)
-
-    image_size = cfg.trainer.preprocessing.train_image_size
-
-    dataset = LuxonisDataset(
-        dataset_name=cfg.dataset.name,
-        team_id=cfg.dataset.team_id,
-        dataset_id=cfg.dataset.id,
-        bucket_type=cfg.dataset.bucket_type,
-        bucket_storage=cfg.dataset.bucket_storage,
-    )
-    augmentations = (
-        TrainAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
-        if view == "train"
-        else ValAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
-    )
-
-    loader_train = LuxonisLoaderTorch(
-        dataset,
-        view=view,
-        augmentations=augmentations,
-    )
-
-    pytorch_loader_train = torch.utils.data.DataLoader(
-        loader_train,
-        batch_size=4,
-        num_workers=1,
-        collate_fn=collate_fn,
-    )
-
-    if save_dir is not None:
-        os.makedirs(save_dir, exist_ok=True)
-
-    counter = 0
-    for data in pytorch_loader_train:
-        imgs, label_dict = data
-        images = get_unnormalized_images(cfg, imgs)
-        for i, img in enumerate(images):
-            for label_type, labels in label_dict.items():
-                if label_type == LabelType.CLASSIFICATION:
-                    continue
-                elif label_type == LabelType.BOUNDINGBOX:
-                    img = draw_bounding_box_labels(
-                        img, labels[labels[:, 0] == i][:, 2:], colors="yellow", width=1
-                    )
-                elif label_type == LabelType.KEYPOINT:
-                    img = draw_keypoint_labels(
-                        img, labels[labels[:, 0] == i][:, 1:], colors="red"
-                    )
-                elif label_type == LabelType.SEGMENTATION:
-                    img = draw_segmentation_labels(
-                        img, labels[i], alpha=0.8, colors="#5050FF"
-                    )
-
-            img_arr = img.permute(1, 2, 0).numpy()
-            img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
-            if save_dir is not None:
-                counter += 1
-                cv2.imwrite(os.path.join(save_dir, f"{counter}.png"), img_arr)
-            else:
-                cv2.imshow("img", img_arr)
-                if cv2.waitKey() == ord("q"):
-                    exit()
-
-
-def version_callback(value: bool):
-    if value:
-        typer.echo(f"LuxonisTrain Version: {version(__package__)}")
-        raise typer.Exit()
-
-
-@app.callback()
-def common(
-    _: Annotated[
-        bool,
-        typer.Option(
-            "--version", callback=version_callback, help="Show version and exit."
-        ),
-    ] = False,
-):
-    ...
-
-
-def main():
-    app()
-
-
-if __name__ == "__main__":
-    main()

From 279727897a0f0fdec752fd303d9dd738ef23224d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 21 Feb 2024 12:54:02 +0100
Subject: [PATCH 04/75] Fix Removed Tensor Metadata (#12)

* option to source custom code in CLI

* removed empty dicts

* fixed issue with removed tensor metadata in match case statements
---
 luxonis_train/attached_modules/visualizers/multi_visualizer.py | 2 +-
 luxonis_train/attached_modules/visualizers/utils.py            | 2 +-
 luxonis_train/core/exporter.py                                 | 2 +-
 luxonis_train/models/luxonis_model.py                          | 2 +-
 luxonis_train/nodes/base_node.py                               | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
index 2fee8e1f..99b64bf0 100644
--- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
@@ -47,7 +47,7 @@ def forward(
     ) -> tuple[Tensor, Tensor]:
         for visualizer in self.visualizers:
             match visualizer.run(label_canvas, prediction_canvas, outputs, labels):
-                case Tensor(data=prediction_viz):
+                case Tensor() as prediction_viz:
                     prediction_canvas = prediction_viz
                 case (Tensor(data=label_viz), Tensor(data=prediction_viz)):
                     label_canvas = label_viz
diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py
index 52431204..aa1a90d3 100644
--- a/luxonis_train/attached_modules/visualizers/utils.py
+++ b/luxonis_train/attached_modules/visualizers/utils.py
@@ -405,7 +405,7 @@ def resize_to_match(
         return fst_resized, snd_resized
 
     match visualization:
-        case Tensor(data=viz):
+        case Tensor() as viz:
             return viz
         case (Tensor(data=viz_labels), Tensor(data=viz_predictions)):
             viz_labels, viz_predictions = resize_to_match(viz_labels, viz_predictions)
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 6602a040..0efd6d56 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -128,7 +128,7 @@ def export(self, onnx_path: str | None = None):
             model_onnx = onnx.load(onnx_path)
             onnx_model, check = onnxsim.simplify(model_onnx)
             if not check:
-                raise RuntimeError("Onnx simplify failed.")
+                raise RuntimeError("ONNX simplify failed.")
             onnx.save(onnx_model, onnx_path)
             logger.info(f"ONNX model saved to {onnx_path}")
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 80a57d99..88d4fa28 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -360,7 +360,7 @@ def compute_metrics(self) -> dict[str, dict[str, Tensor]]:
                         computed_submetrics = {
                             metric_name: metric_value,
                         } | submetrics
-                    case Tensor(data=metric_value):
+                    case Tensor() as metric_value:
                         computed_submetrics = {metric_name: metric_value}
                     case dict(submetrics):
                         computed_submetrics = submetrics
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 6ec216fb..7338a802 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -291,7 +291,7 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]:
         """
 
         match output:
-            case Tensor(data=out):
+            case Tensor() as out:
                 outputs = [out]
             case list(tensors) if all(isinstance(t, Tensor) for t in tensors):
                 outputs = tensors

From 2c62a0812e3075331a0724d3a25fe1f35c34dd95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 22 Feb 2024 08:57:10 +0100
Subject: [PATCH 05/75] Forbid Extra Fields (#13)

* forbid extra fields in config

* fixed configs
---
 configs/coco_model.yaml       |  1 -
 configs/resnet_model.yaml     |  5 ++--
 luxonis_train/utils/config.py | 44 +++++++++++++++++++----------------
 3 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
index 491152ce..67f3b91d 100755
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -117,7 +117,6 @@ trainer:
   validation_interval: 10
   num_log_images: 8
   skip_last_batch: True
-  main_head_index: 0
   log_sub_losses: True
   save_top_k: 3
 
diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml
index 7e93d269..e768d259 100644
--- a/configs/resnet_model.yaml
+++ b/configs/resnet_model.yaml
@@ -3,8 +3,9 @@ model:
   name: resnet50_classification
   nodes:
     - name: ResNet
-      variant: "50"
-      download_weights: True
+      params:
+        variant: "50"
+        download_weights: True
 
     - name: ClassificationHead
       inputs:
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 48661f7d..591376f8 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -5,7 +5,7 @@
 
 from luxonis_ml.data import BucketStorage, BucketType
 from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
-from pydantic import BaseModel, Field, field_serializer, model_validator
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, model_validator
 
 from luxonis_train.utils.general import is_acyclic
 from luxonis_train.utils.registry import MODELS
@@ -13,7 +13,11 @@
 logger = logging.getLogger(__name__)
 
 
-class AttachedModuleConfig(BaseModel):
+class CustomBaseModel(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+
+class AttachedModuleConfig(CustomBaseModel):
     name: str
     attached_to: str
     alias: str | None = None
@@ -28,12 +32,12 @@ class MetricModuleConfig(AttachedModuleConfig):
     is_main_metric: bool = False
 
 
-class FreezingConfig(BaseModel):
+class FreezingConfig(CustomBaseModel):
     active: bool = False
     unfreeze_after: int | float | None = None
 
 
-class ModelNodeConfig(BaseModel):
+class ModelNodeConfig(CustomBaseModel):
     name: str
     alias: str | None = None
     inputs: list[str] = []
@@ -41,7 +45,7 @@ class ModelNodeConfig(BaseModel):
     freezing: FreezingConfig = FreezingConfig()
 
 
-class PredefinedModelConfig(BaseModel):
+class PredefinedModelConfig(CustomBaseModel):
     name: str
     params: dict[str, Any] = {}
     include_nodes: bool = True
@@ -50,7 +54,7 @@ class PredefinedModelConfig(BaseModel):
     include_visualizers: bool = True
 
 
-class ModelConfig(BaseModel):
+class ModelConfig(CustomBaseModel):
     name: str
     predefined_model: PredefinedModelConfig | None = None
     weights: str | None = None
@@ -114,7 +118,7 @@ def check_unique_names(self):
         return self
 
 
-class TrackerConfig(BaseModel):
+class TrackerConfig(CustomBaseModel):
     project_name: str | None = None
     project_id: str | None = None
     run_name: str | None = None
@@ -126,7 +130,7 @@ class TrackerConfig(BaseModel):
     is_mlflow: bool = False
 
 
-class DatasetConfig(BaseModel):
+class DatasetConfig(CustomBaseModel):
     name: str | None = None
     id: str | None = None
     team_name: str | None = None
@@ -143,7 +147,7 @@ def get_enum_value(self, v: Enum, _) -> str:
         return str(v.value)
 
 
-class NormalizeAugmentationConfig(BaseModel):
+class NormalizeAugmentationConfig(CustomBaseModel):
     active: bool = True
     params: dict[str, Any] = {
         "mean": [0.485, 0.456, 0.406],
@@ -151,12 +155,12 @@ class NormalizeAugmentationConfig(BaseModel):
     }
 
 
-class AugmentationConfig(BaseModel):
+class AugmentationConfig(CustomBaseModel):
     name: str
     params: dict[str, Any] = {}
 
 
-class PreprocessingConfig(BaseModel):
+class PreprocessingConfig(CustomBaseModel):
     train_image_size: Annotated[
         list[int], Field(default=[256, 256], min_length=2, max_length=2)
     ] = [256, 256]
@@ -174,23 +178,23 @@ def check_normalize(self):
         return self
 
 
-class CallbackConfig(BaseModel):
+class CallbackConfig(CustomBaseModel):
     name: str
     active: bool = True
     params: dict[str, Any] = {}
 
 
-class OptimizerConfig(BaseModel):
+class OptimizerConfig(CustomBaseModel):
     name: str = "Adam"
     params: dict[str, Any] = {}
 
 
-class SchedulerConfig(BaseModel):
+class SchedulerConfig(CustomBaseModel):
     name: str = "ConstantLR"
     params: dict[str, Any] = {}
 
 
-class TrainerConfig(BaseModel):
+class TrainerConfig(CustomBaseModel):
     preprocessing: PreprocessingConfig = PreprocessingConfig()
 
     accelerator: Literal["auto", "cpu", "gpu"] = "auto"
@@ -229,17 +233,17 @@ def check_num_workes_platform(self):
         return self
 
 
-class OnnxExportConfig(BaseModel):
+class OnnxExportConfig(CustomBaseModel):
     opset_version: int = 12
     dynamic_axes: dict[str, Any] | None = None
 
 
-class BlobconverterExportConfig(BaseModel):
+class BlobconverterExportConfig(CustomBaseModel):
     active: bool = False
     shaves: int = 6
 
 
-class ExportConfig(BaseModel):
+class ExportConfig(CustomBaseModel):
     export_save_directory: str = "output_export"
     input_shape: list[int] | None = None
     export_model_name: str = "model"
@@ -265,12 +269,12 @@ def pad_values(values: float | list[float] | None):
         return self
 
 
-class StorageConfig(BaseModel):
+class StorageConfig(CustomBaseModel):
     active: bool = True
     storage_type: Literal["local", "remote"] = "local"
 
 
-class TunerConfig(BaseModel):
+class TunerConfig(CustomBaseModel):
     study_name: str = "test-study"
     use_pruner: bool = True
     n_trials: int | None = 15

From 0b51fa0e6f7c124d922738d820fb3c5b3652972c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Sat, 24 Feb 2024 10:09:53 +0100
Subject: [PATCH 06/75] Automatic Inference of attach_index (#14)

* automatic inference of attach index based on type signature

* added inference for input and x names
---
 luxonis_train/nodes/base_node.py              | 19 ++++++++++++---
 luxonis_train/nodes/bisenet_head.py           |  7 +++---
 luxonis_train/nodes/classification_head.py    |  1 -
 luxonis_train/nodes/contextspatial.py         |  8 +++----
 luxonis_train/nodes/efficientrep.py           |  6 ++---
 .../nodes/implicit_keypoint_bbox_head.py      |  4 +---
 luxonis_train/nodes/micronet.py               | 24 ++++++++-----------
 luxonis_train/nodes/mobilenetv2.py            |  6 ++---
 luxonis_train/nodes/mobileone.py              |  5 ++--
 luxonis_train/nodes/resnet.py                 |  6 ++---
 luxonis_train/nodes/rexnetv1.py               | 11 ++++-----
 luxonis_train/nodes/segmentation_head.py      |  1 -
 12 files changed, 45 insertions(+), 53 deletions(-)

diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 7338a802..c3124f82 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -1,3 +1,4 @@
+import inspect
 from abc import ABC, abstractmethod
 from typing import Generic, TypeVar
 
@@ -80,8 +81,6 @@ class BaseNode(
         Provide only in case the `input_shapes` were not provided.
     """
 
-    attach_index: AttachIndexType = "all"
-
     def __init__(
         self,
         *,
@@ -96,7 +95,21 @@ def __init__(
     ):
         super().__init__()
 
-        self.attach_index = attach_index or self.attach_index
+        if attach_index is None:
+            parameters = inspect.signature(self.forward).parameters
+            inputs_forward_type = parameters.get(
+                "inputs", parameters.get("input", parameters.get("x", None))
+            )
+            if (
+                inputs_forward_type is not None
+                and inputs_forward_type.annotation == Tensor
+            ):
+                self.attach_index = -1
+            else:
+                self.attach_index = "all"
+        else:
+            self.attach_index = attach_index
+
         self.in_protocols = in_protocols or [FeaturesProtocol]
         self.task_type = task_type
 
diff --git a/luxonis_train/nodes/bisenet_head.py b/luxonis_train/nodes/bisenet_head.py
index 99845177..a3b11df6 100644
--- a/luxonis_train/nodes/bisenet_head.py
+++ b/luxonis_train/nodes/bisenet_head.py
@@ -15,7 +15,6 @@
 
 
 class BiSeNetHead(BaseNode[Tensor, Tensor]):
-    attach_index: int = -1
     in_height: int
     in_channels: int
 
@@ -45,6 +44,6 @@ def wrap(self, output: Tensor) -> Packet[Tensor]:
         return {"segmentation": [output]}
 
     def forward(self, inputs: Tensor) -> Tensor:
-        inputs = self.conv_3x3(inputs)
-        inputs = self.conv_1x1(inputs)
-        return self.upscale(inputs)
+        x = self.conv_3x3(inputs)
+        x = self.conv_1x1(x)
+        return self.upscale(x)
diff --git a/luxonis_train/nodes/classification_head.py b/luxonis_train/nodes/classification_head.py
index 10f9b3c9..d96e6b72 100644
--- a/luxonis_train/nodes/classification_head.py
+++ b/luxonis_train/nodes/classification_head.py
@@ -7,7 +7,6 @@
 
 class ClassificationHead(BaseNode[Tensor, Tensor]):
     in_channels: int
-    attach_index: int = -1
 
     def __init__(
         self,
diff --git a/luxonis_train/nodes/contextspatial.py b/luxonis_train/nodes/contextspatial.py
index adbb84bc..1ca1460d 100644
--- a/luxonis_train/nodes/contextspatial.py
+++ b/luxonis_train/nodes/contextspatial.py
@@ -18,8 +18,6 @@
 
 
 class ContextSpatial(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(self, context_backbone: str = "MobileNetV2", **kwargs):
         """Context spatial backbone.
         TODO: Add more documentation.
@@ -34,9 +32,9 @@ def __init__(self, context_backbone: str = "MobileNetV2", **kwargs):
         self.spatial_path = SpatialPath(3, 128)
         self.ffm = FeatureFusionBlock(256, 256)
 
-    def forward(self, x: Tensor) -> list[Tensor]:
-        spatial_out = self.spatial_path(x)
-        context16, _ = self.context_path(x)
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        spatial_out = self.spatial_path(inputs)
+        context16, _ = self.context_path(inputs)
         fm_fuse = self.ffm(spatial_out, context16)
         outs = [fm_fuse]
         return outs
diff --git a/luxonis_train/nodes/efficientrep.py b/luxonis_train/nodes/efficientrep.py
index e6a014af..ccff4189 100644
--- a/luxonis_train/nodes/efficientrep.py
+++ b/luxonis_train/nodes/efficientrep.py
@@ -19,8 +19,6 @@
 
 
 class EfficientRep(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(
         self,
         channels_list: list[int] | None = None,
@@ -104,9 +102,9 @@ def set_export_mode(self, mode: bool = True) -> None:
                 if isinstance(module, RepVGGBlock):
                     module.reparametrize()
 
-    def forward(self, x: Tensor) -> list[Tensor]:
+    def forward(self, inputs: Tensor) -> list[Tensor]:
         outputs = []
-        x = self.repvgg_encoder(x)
+        x = self.repvgg_encoder(inputs)
         for block in self.blocks:
             x = block(x)
             outputs.append(x)
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index 0fdca420..aff2b5a6 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -1,6 +1,6 @@
 import logging
 import math
-from typing import Literal, cast
+from typing import cast
 
 import torch
 from torch import Tensor, nn
@@ -22,8 +22,6 @@
 
 
 class ImplicitKeypointBBoxHead(BaseNode):
-    attach_index: Literal["all"] = "all"
-
     def __init__(
         self,
         n_keypoints: int | None = None,
diff --git a/luxonis_train/nodes/micronet.py b/luxonis_train/nodes/micronet.py
index 03b43e1f..603eabde 100644
--- a/luxonis_train/nodes/micronet.py
+++ b/luxonis_train/nodes/micronet.py
@@ -15,8 +15,6 @@ class MicroNet(BaseNode[Tensor, list[Tensor]]):
     TODO: DOCS
     """
 
-    attach_index: int = -1
-
     def __init__(self, variant: Literal["M1", "M2", "M3"] = "M1", **kwargs):
         """MicroNet backbone.
 
@@ -236,23 +234,21 @@ def __init__(
                 ChannelShuffle(out_channels // 2) if y3 != 0 else nn.Sequential(),
             )
 
-    def forward(self, x: Tensor):
-        identity = x
-        out = self.layers(x)
+    def forward(self, inputs: Tensor) -> Tensor:
+        out = self.layers(inputs)
         if self.identity:
-            out += identity
+            out += inputs
         return out
 
 
 class ChannelShuffle(nn.Module):
     def __init__(self, groups: int):
-        super(ChannelShuffle, self).__init__()
+        super().__init__()
         self.groups = groups
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         b, c, h, w = x.size()
         channels_per_group = c // self.groups
-        # reshape
         x = x.view(b, self.groups, channels_per_group, h, w)
         x = torch.transpose(x, 1, 2).contiguous()
         out = x.view(b, -1, h, w)
@@ -300,7 +296,7 @@ def __init__(
         indexs = torch.cat([indexs[1], indexs[0]], dim=2)
         self.index = indexs.view(in_channels).long()
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         B, C, _, _ = x.shape
         x_out = x
 
@@ -350,7 +346,7 @@ def __init__(self, in_channels: int, out_channels: int):
             nn.Linear(in_channels, out_channels), nn.BatchNorm1d(out_channels), HSwish()
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.linear(x)
 
 
@@ -383,7 +379,7 @@ def __init__(
             ChannelShuffle(out_channels1),
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.conv(x)
 
 
@@ -394,7 +390,7 @@ def __init__(self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)
             SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True)
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.stem(x)
 
 
@@ -430,7 +426,7 @@ def __init__(
             nn.BatchNorm2d(out_channels),
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.conv(x)
 
 
diff --git a/luxonis_train/nodes/mobilenetv2.py b/luxonis_train/nodes/mobilenetv2.py
index 27fe87ec..732d0b12 100644
--- a/luxonis_train/nodes/mobilenetv2.py
+++ b/luxonis_train/nodes/mobilenetv2.py
@@ -15,8 +15,6 @@ class MobileNetV2(BaseNode[Tensor, list[Tensor]]):
     TODO: add more info
     """
 
-    attach_index: int = -1
-
     def __init__(self, download_weights: bool = False, **kwargs):
         """Constructor of the MobileNetV2 backbone.
 
@@ -37,8 +35,8 @@ def __init__(self, download_weights: bool = False, **kwargs):
 
     def forward(self, x: Tensor) -> list[Tensor]:
         outs = []
-        for i, m in enumerate(self.backbone.features):
-            x = m(x)
+        for i, module in enumerate(self.backbone.features):
+            x = module(x)
             if i in self.out_indices:
                 outs.append(x)
 
diff --git a/luxonis_train/nodes/mobileone.py b/luxonis_train/nodes/mobileone.py
index e92d3225..14e6e02b 100644
--- a/luxonis_train/nodes/mobileone.py
+++ b/luxonis_train/nodes/mobileone.py
@@ -52,7 +52,6 @@ class MobileOne(BaseNode[Tensor, list[Tensor]]):
     TODO: add more details
     """
 
-    attach_index: int = -1
     in_channels: int
 
     VARIANTS_SETTINGS: dict[str, dict] = {
@@ -115,9 +114,9 @@ def __init__(self, variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", **kwar
             num_se_blocks=self.num_blocks_per_stage[3] if self.use_se else 0,
         )
 
-    def forward(self, x: Tensor) -> list[Tensor]:
+    def forward(self, inputs: Tensor) -> list[Tensor]:
         outs = []
-        x = self.stage0(x)
+        x = self.stage0(inputs)
         outs.append(x)
         x = self.stage1(x)
         outs.append(x)
diff --git a/luxonis_train/nodes/resnet.py b/luxonis_train/nodes/resnet.py
index 14ff8066..8228d37a 100644
--- a/luxonis_train/nodes/resnet.py
+++ b/luxonis_train/nodes/resnet.py
@@ -12,8 +12,6 @@
 
 
 class ResNet(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(
         self,
         variant: Literal["18", "34", "50", "101", "152"] = "18",
@@ -47,9 +45,9 @@ def __init__(
         )
         self.channels_list = channels_list or [64, 128, 256, 512]
 
-    def forward(self, x: Tensor) -> list[Tensor]:
+    def forward(self, inputs: Tensor) -> list[Tensor]:
         outs = []
-        x = self.backbone.conv1(x)
+        x = self.backbone.conv1(inputs)
         x = self.backbone.bn1(x)
         x = self.backbone.relu(x)
         x = self.backbone.maxpool(x)
diff --git a/luxonis_train/nodes/rexnetv1.py b/luxonis_train/nodes/rexnetv1.py
index fb4de4b1..de2c08ae 100644
--- a/luxonis_train/nodes/rexnetv1.py
+++ b/luxonis_train/nodes/rexnetv1.py
@@ -17,8 +17,6 @@
 
 
 class ReXNetV1_lite(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(
         self,
         fix_head_stem: bool = False,
@@ -129,8 +127,8 @@ def __init__(
 
     def forward(self, x: Tensor) -> list[Tensor]:
         outs = []
-        for i, m in enumerate(self.features):
-            x = m(x)
+        for i, module in enumerate(self.features):
+            x = module(x)
             if i in self.out_indices:
                 outs.append(x)
         return outs
@@ -186,12 +184,11 @@ def __init__(
 
         self.out = nn.Sequential(*out)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         out = self.out(x)
 
         if self.use_shortcut:
-            # this results in a ScatterND node which isn't supported yet in myriad
-            # out[:, 0:self.in_channels] += x
+            # NOTE: this results in a ScatterND node which isn't supported yet in myriad
             a = out[:, : self.in_channels]
             b = x
             a = a + b
diff --git a/luxonis_train/nodes/segmentation_head.py b/luxonis_train/nodes/segmentation_head.py
index bdfe814d..a3420491 100644
--- a/luxonis_train/nodes/segmentation_head.py
+++ b/luxonis_train/nodes/segmentation_head.py
@@ -16,7 +16,6 @@
 
 
 class SegmentationHead(BaseNode[Tensor, Tensor]):
-    attach_index: int = -1
     in_height: int
     in_channels: int
 

From bd67595c88e2d43f03cf95f91cbfd619a3366067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 27 Feb 2024 10:28:35 +0100
Subject: [PATCH 07/75] Backbone Fix (#15)

* fixed link in docs

* fixed repvgg backbone

* fixed efficientnet
---
 luxonis_train/nodes/__init__.py      |  2 +
 luxonis_train/nodes/blocks/blocks.py | 69 +++++++++++-----------------
 luxonis_train/nodes/efficientnet.py  |  2 +
 luxonis_train/nodes/efficientrep.py  |  7 +--
 luxonis_train/nodes/mobileone.py     | 35 +-------------
 luxonis_train/nodes/repvgg.py        | 61 +++++++++++++-----------
 6 files changed, 70 insertions(+), 106 deletions(-)

diff --git a/luxonis_train/nodes/__init__.py b/luxonis_train/nodes/__init__.py
index 954db2be..9a506c1f 100644
--- a/luxonis_train/nodes/__init__.py
+++ b/luxonis_train/nodes/__init__.py
@@ -3,6 +3,7 @@
 from .classification_head import ClassificationHead
 from .contextspatial import ContextSpatial
 from .efficient_bbox_head import EfficientBBoxHead
+from .efficientnet import EfficientNet
 from .efficientrep import EfficientRep
 from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
 from .micronet import MicroNet
@@ -19,6 +20,7 @@
     "ClassificationHead",
     "ContextSpatial",
     "EfficientBBoxHead",
+    "EfficientNet",
     "EfficientRep",
     "ImplicitKeypointBBoxHead",
     "BaseNode",
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index f4bd0172..4ab2ad2d 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -216,10 +216,7 @@ def __init__(
         kernel_size: int = 3,
         stride: int = 1,
         padding: int = 1,
-        dilation: int = 1,
         groups: int = 1,
-        padding_mode: str = "zeros",
-        deploy: bool = False,
         use_se: bool = False,
     ):
         """RepVGGBlock is a basic rep-style block, including training and deploy status
@@ -249,7 +246,6 @@ def __init__(
         """
         super().__init__()
 
-        self.deploy = deploy
         self.groups = groups
         self.in_channels = in_channels
         self.out_channels = out_channels
@@ -262,49 +258,37 @@ def __init__(
         self.nonlinearity = nn.ReLU()
 
         if use_se:
-            #   Note that RepVGG-D2se uses SE before nonlinearity. But RepVGGplus models uses SqueezeExciteBlock after nonlinearity.
+            # NOTE: that RepVGG-D2se uses SE before nonlinearity.
+            # But RepVGGplus models uses SqueezeExciteBlock after nonlinearity.
             self.se = SqueezeExciteBlock(
                 out_channels, intermediate_channels=int(out_channels // 16)
             )
         else:
-            self.se = nn.Identity()  # type: ignore
+            self.se = nn.Identity()
 
-        if deploy:
-            self.rbr_reparam = nn.Conv2d(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                bias=True,
-                padding_mode=padding_mode,
-            )
-        else:
-            self.rbr_identity = (
-                nn.BatchNorm2d(num_features=in_channels)
-                if out_channels == in_channels and stride == 1
-                else None
-            )
-            self.rbr_dense = ConvModule(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=padding,
-                groups=groups,
-                activation=nn.Identity(),
-            )
-            self.rbr_1x1 = ConvModule(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=stride,
-                padding=padding_11,
-                groups=groups,
-                activation=nn.Identity(),
-            )
+        self.rbr_identity = (
+            nn.BatchNorm2d(num_features=in_channels)
+            if out_channels == in_channels and stride == 1
+            else None
+        )
+        self.rbr_dense = ConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            activation=nn.Identity(),
+        )
+        self.rbr_1x1 = ConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=stride,
+            padding=padding_11,
+            groups=groups,
+            activation=nn.Identity(),
+        )
 
     def forward(self, x: Tensor):
         if hasattr(self, "rbr_reparam"):
@@ -320,6 +304,7 @@ def forward(self, x: Tensor):
     def reparametrize(self):
         if hasattr(self, "rbr_reparam"):
             return
+
         kernel, bias = self._get_equivalent_kernel_bias()
         self.rbr_reparam = nn.Conv2d(
             in_channels=self.rbr_dense[0].in_channels,
diff --git a/luxonis_train/nodes/efficientnet.py b/luxonis_train/nodes/efficientnet.py
index 0b0aedde..57b52d09 100644
--- a/luxonis_train/nodes/efficientnet.py
+++ b/luxonis_train/nodes/efficientnet.py
@@ -11,6 +11,8 @@
 
 
 class EfficientNet(BaseNode[Tensor, list[Tensor]]):
+    attach_index: int = -1
+
     def __init__(self, download_weights: bool = False, **kwargs):
         """EfficientNet backbone.
 
diff --git a/luxonis_train/nodes/efficientrep.py b/luxonis_train/nodes/efficientrep.py
index ccff4189..4e92222f 100644
--- a/luxonis_train/nodes/efficientrep.py
+++ b/luxonis_train/nodes/efficientrep.py
@@ -17,6 +17,8 @@
 
 from .base_node import BaseNode
 
+logger = logging.getLogger(__name__)
+
 
 class EfficientRep(BaseNode[Tensor, list[Tensor]]):
     def __init__(
@@ -89,14 +91,13 @@ def __init__(
         )
 
     def set_export_mode(self, mode: bool = True) -> None:
-        """Reparametrizes instances of `RepVGGBlock` in the network.
+        """Reparametrizes instances of L{RepVGGBlock} in the network.
 
         @type mode: bool
         @param mode: Whether to set the export mode. Defaults to C{True}.
         """
         super().set_export_mode(mode)
-        logger = logging.getLogger(__name__)
-        if mode:
+        if self.export:
             logger.info("Reparametrizing EfficientRep.")
             for module in self.modules():
                 if isinstance(module, RepVGGBlock):
diff --git a/luxonis_train/nodes/mobileone.py b/luxonis_train/nodes/mobileone.py
index 14e6e02b..b1658eb4 100644
--- a/luxonis_train/nodes/mobileone.py
+++ b/luxonis_train/nodes/mobileone.py
@@ -1,38 +1,7 @@
 """MobileOne backbone.
 
-Soure: U{https://github.com/apple/ml-mobileone} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
+Source: U{<https://github.com/apple/ml-mobileone>}
+@license: U{Apple<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
 """
 
 
diff --git a/luxonis_train/nodes/repvgg.py b/luxonis_train/nodes/repvgg.py
index 44579fa5..f488a68c 100644
--- a/luxonis_train/nodes/repvgg.py
+++ b/luxonis_train/nodes/repvgg.py
@@ -1,4 +1,5 @@
-from copy import deepcopy
+import logging
+from typing import Literal
 
 import torch.utils.checkpoint as checkpoint
 from torch import Tensor, nn
@@ -7,6 +8,8 @@
 
 from .base_node import BaseNode
 
+logger = logging.getLogger(__name__)
+
 
 class RepVGG(BaseNode):
     """Implementation of RepVGG backbone.
@@ -18,53 +21,37 @@ class RepVGG(BaseNode):
     """
 
     in_channels: int
+    attach_index: int = -1
 
     VARIANTS_SETTINGS = {
         "A0": {
             "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
             "width_multiplier": [0.75, 0.75, 0.75, 2.5],
         },
         "A1": {
             "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
             "width_multiplier": [1, 1, 1, 2.5],
         },
         "A2": {
             "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
             "width_multiplier": [1.5, 1.5, 1.5, 2.75],
         },
     }
 
-    def __new__(cls, **kwargs):
-        variant = kwargs.pop("variant", "A0")
-
-        if variant not in RepVGG.VARIANTS_SETTINGS.keys():
-            raise ValueError(
-                f"RepVGG model variant should be in {list(RepVGG.VARIANTS_SETTINGS.keys())}"
-            )
-
-        overrides = deepcopy(kwargs)
-        kwargs.clear()
-        kwargs.update(RepVGG.VARIANTS_SETTINGS[variant])
-        kwargs.update(overrides)
-        return cls.__new__(cls)
-
     def __init__(
         self,
-        deploy: bool = False,
+        variant: Literal["A0", "A1", "A2"] = "A0",
+        num_blocks: list[int] | None = None,
+        width_multiplier: list[float] | None = None,
         override_groups_map: dict[int, int] | None = None,
         use_se: bool = False,
         use_checkpoint: bool = False,
-        num_blocks: list[int] | None = None,
-        width_multiplier: list[float] | None = None,
         **kwargs,
     ):
         """Constructor for the RepVGG module.
 
-        @type deploy: bool
-        @param deploy: Whether to use the model in deploy mode.
+        @type variant: Literal["A0", "A1", "A2"]
+        @param variant: RepVGG model variant. Defaults to "A0".
         @type override_groups_map: dict[int, int] | None
         @param override_groups_map: Dictionary mapping layer index to number of groups.
         @type use_se: bool
@@ -77,9 +64,16 @@ def __init__(
         @param width_multiplier: Width multiplier for each stage.
         """
         super().__init__(**kwargs)
-        num_blocks = num_blocks or [2, 4, 14, 1]
-        width_multiplier = width_multiplier or [0.75, 0.75, 0.75, 2.5]
-        self.deploy = deploy
+        if variant not in self.VARIANTS_SETTINGS.keys():
+            raise ValueError(
+                f"RepVGG model variant should be one of "
+                f"{list(self.VARIANTS_SETTINGS.keys())}."
+            )
+
+        num_blocks = num_blocks or self.VARIANTS_SETTINGS[variant]["num_blocks"]
+        width_multiplier = (
+            width_multiplier or self.VARIANTS_SETTINGS[variant]["width_multiplier"]
+        )
         self.override_groups_map = override_groups_map or {}
         assert 0 not in self.override_groups_map
         self.use_se = use_se
@@ -92,7 +86,6 @@ def __init__(
             kernel_size=3,
             stride=2,
             padding=1,
-            deploy=self.deploy,
             use_se=self.use_se,
         )
         self.cur_layer_idx = 1
@@ -135,10 +128,22 @@ def _make_stage(self, planes: int, num_blocks: int, stride: int):
                     stride=stride,
                     padding=1,
                     groups=cur_groups,
-                    deploy=self.deploy,
                     use_se=self.use_se,
                 )
             )
             self.in_planes = planes
             self.cur_layer_idx += 1
         return nn.ModuleList(blocks)
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Reparametrizes instances of L{RepVGGBlock} in the network.
+
+        @type mode: bool
+        @param mode: Whether to set the export mode. Defaults to C{True}.
+        """
+        super().set_export_mode(mode)
+        if self.export:
+            logger.info("Reparametrizing RepVGG.")
+            for module in self.modules():
+                if isinstance(module, RepVGGBlock):
+                    module.reparametrize()

From f42192cfd679aa6ed4e6200908b089a963c5c7d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 28 Feb 2024 17:07:11 +0100
Subject: [PATCH 08/75] Uploading logs to MLFlow (#16)

* upload logs to mlflow

* added mlflwo instance

* multithread log upload

* fixed upload logs

* fixed log file path

* removed exceptions

* logging exceptions

* fixed typo

* reverted exception

* moved line

* replaced warning with error log

* Update trainer.py
---
 .../callbacks/export_on_train_end.py          | 16 +++++++----
 luxonis_train/core/core.py                    |  4 ++-
 luxonis_train/core/trainer.py                 | 28 +++++++++++++++++--
 luxonis_train/models/luxonis_model.py         |  2 ++
 4 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py
index 923267c1..5d7bf6da 100644
--- a/luxonis_train/callbacks/export_on_train_end.py
+++ b/luxonis_train/callbacks/export_on_train_end.py
@@ -8,6 +8,8 @@
 from luxonis_train.utils.registry import CALLBACKS
 from luxonis_train.utils.tracker import LuxonisTrackerPL
 
+logger = logging.getLogger(__name__)
+
 
 @CALLBACKS.register_module()
 class ExportOnTrainEnd(pl.Callback):
@@ -41,11 +43,13 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
         # NOTE: assume that first checkpoint callback is based on val loss
         best_model_path = model_checkpoint_callbacks[0].best_model_path
         if not best_model_path:
-            raise RuntimeError(
-                "No best model path found. "
-                "Please make sure that ModelCheckpoint callback is present "
-                "and at least one validation epoch has been performed."
+            logger.error(
+                "No model checkpoint found. "
+                "Make sure that `ModelCheckpoint` callback is present "
+                "and at least one validation epoch has been performed. "
+                "Skipping model export."
             )
+            return
         cfg: Config = pl_module.cfg
         cfg.model.weights = best_model_path
         if self.upload_to_mlflow:
@@ -54,9 +58,9 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
                 new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
                 cfg.exporter.upload_url = new_upload_url
             else:
-                logging.getLogger(__name__).warning(
+                logger.error(
                     "`upload_to_mlflow` is set to True, "
-                    "but there is  no MLFlow active run, skipping."
+                    "but there is no MLFlow active run, skipping."
                 )
         exporter = Exporter(cfg=cfg)
         onnx_path = str(Path(best_model_path).parent.with_suffix(".onnx"))
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 75bd1d2a..86b63600 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -79,12 +79,14 @@ def __init__(
         self.run_save_dir = os.path.join(
             self.cfg.tracker.save_directory, self.tracker.run_name
         )
+        self.log_file = osp.join(self.run_save_dir, "luxonis_train.log")
+
         # NOTE: to add the file handler (we only get the save dir now,
         # but we want to use the logger before)
         reset_logging()
         setup_logging(
             use_rich=self.cfg.use_rich_text,
-            file=osp.join(self.run_save_dir, "luxonis_train.log"),
+            file=self.log_file,
         )
 
         # NOTE: overriding logger in pl so it uses our logger to log device info
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index cb2c5a2c..2b3d6a78 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -3,6 +3,7 @@
 from typing import Any, Literal
 
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
+from luxonis_ml.utils import LuxonisFileSystem
 
 from luxonis_train.models import LuxonisModel
 from luxonis_train.utils.config import Config
@@ -39,6 +40,28 @@ def __init__(
             input_shape=self.loader_train.input_shape,
         )
 
+    def _upload_logs(self) -> None:
+        if self.cfg.tracker.is_mlflow:
+            logger.info("Uploading logs to MLFlow.")
+            fs = LuxonisFileSystem(
+                "mlflow://",
+                allow_active_mlflow_run=True,
+                allow_local=False,
+            )
+            fs.put_file(
+                local_path=self.log_file,
+                remote_path="luxonis_train.log",
+                mlflow_instance=self.tracker.experiment.get("mlflow", None),
+            )
+
+    def _trainer_fit(self, *args, **kwargs):
+        try:
+            self.pl_trainer.fit(*args, **kwargs)
+        except Exception:
+            logger.exception("Encountered exception during training.")
+        finally:
+            self._upload_logs()
+
     def train(self, new_thread: bool = False) -> None:
         """Runs training.
 
@@ -48,13 +71,14 @@ def train(self, new_thread: bool = False) -> None:
         if not new_thread:
             logger.info(f"Checkpoints will be saved in: {self.get_save_dir()}")
             logger.info("Starting training...")
-            self.pl_trainer.fit(
+            self._trainer_fit(
                 self.lightning_module,
                 self.pytorch_loader_train,
                 self.pytorch_loader_val,
             )
             logger.info("Training finished")
             logger.info(f"Checkpoints saved in: {self.get_save_dir()}")
+
         else:
             # Every time exception happens in the Thread, this hook will activate
             def thread_exception_hook(args):
@@ -63,7 +87,7 @@ def thread_exception_hook(args):
             threading.excepthook = thread_exception_hook
 
             self.thread = threading.Thread(
-                target=self.pl_trainer.fit,
+                target=self._trainer_fit,
                 args=(
                     self.lightning_module,
                     self.pytorch_loader_train,
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 88d4fa28..7cd396f9 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -681,7 +681,9 @@ def load_checkpoint(self, path: str | None) -> None:
         """
         if path is None:
             return
+
         checkpoint = torch.load(path, map_location=self.device)
+
         if "state_dict" not in checkpoint:
             raise ValueError("Checkpoint does not contain state_dict.")
         state_dict = {}

From e1ab39b7bd49e16971e49f181e9ceefd8129b3dd Mon Sep 17 00:00:00 2001
From: jkbmrz <74824974+jkbmrz@users.noreply.github.com>
Date: Wed, 20 Mar 2024 09:06:32 +0100
Subject: [PATCH 09/75] Generate NN archive from training configs (#17)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add archiver CLI

* add archiver callback

* add max_det parameter to EfficientBBoxHead

* add enum to categorize tasks for the implemented heads

* add archiver tests

* adjust Archiver to new nn archive format

* pre-comit formatting

* add LDF creation and adjust to new nn archive format

* update requirements.txt

* add opencv-python to requirements.txt

* add support for ImplicitKeypointBBoxHead

* remove support for ObjectDetectionSSD

* Update requirements.txt

* Added mlflow and removed opencv

* [Automated] Updated coverage badge

* add support for SegmentationHead and BiSeNetHead

* base archiver tests on model from luxonis-train instead of torchvision

* adjust head parameters to changes in NN Archive

* adjust keypoint detection head parameters to changes in NN Archive

* bugfix - make sure self.max_det is used in nms

* add max_det parameter to ImplicitKeypointBBoxHead

* adjust task categorization for ImplicitKeypointBBoxHead

* fixing  Windows PermissionError occuring on file deletion

* fixing Windows PermissionError occuring on file deletion due to unreleased logging handlers

* add method to remove file handlers keeping the log file open

* add a logging statement at the end of archiving

* add optuna_integration to requirements.txt

* add hard-coded solution to determining is_softmax parameter

* added help

---------

Co-authored-by: Martin Kozlovský <martin.kozlovsky@luxonis.com>
Co-authored-by: GitHub Actions <actions@github.com>
---
 luxonis_train/__main__.py                     |  14 +
 luxonis_train/callbacks/__init__.py           |   2 +
 .../callbacks/archive_on_train_end.py         |  72 ++++
 luxonis_train/core/__init__.py                |   3 +-
 luxonis_train/core/archiver.py                | 371 ++++++++++++++++++
 luxonis_train/core/core.py                    |   4 +
 luxonis_train/nodes/efficient_bbox_head.py    |   6 +
 .../nodes/enums/head_categorization.py        |  21 +
 .../nodes/implicit_keypoint_bbox_head.py      |   5 +
 luxonis_train/utils/config.py                 |   7 +
 media/coverage_badge.svg                      |   4 +-
 requirements.txt                              |   5 +-
 tests/unittests/test_core/__init__.py         |   0
 tests/unittests/test_core/test_archiver.py    | 158 ++++++++
 14 files changed, 668 insertions(+), 4 deletions(-)
 create mode 100644 luxonis_train/callbacks/archive_on_train_end.py
 create mode 100644 luxonis_train/core/archiver.py
 create mode 100644 luxonis_train/nodes/enums/head_categorization.py
 create mode 100644 tests/unittests/test_core/__init__.py
 create mode 100644 tests/unittests/test_core/test_archiver.py

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 24cfd69b..b1fd3971 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -200,6 +200,20 @@ def inspect(
                     exit()
 
 
+@app.command()
+def archive(
+    executable: Annotated[
+        Optional[Path], typer.Option(help="Path to the model file.", show_default=False)
+    ],
+    config: ConfigType = None,
+    opts: OptsType = None,
+):
+    """Generate NN archive."""
+    from luxonis_train.core import Archiver
+
+    Archiver(str(config), opts).archive(executable)
+
+
 def version_callback(value: bool):
     if value:
         typer.echo(f"LuxonisTrain Version: {version(__package__)}")
diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index cec9e000..ae1fe86e 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -8,6 +8,7 @@
 
 from luxonis_train.utils.registry import CALLBACKS
 
+from .archive_on_train_end import ArchiveOnTrainEnd
 from .export_on_train_end import ExportOnTrainEnd
 from .luxonis_progress_bar import LuxonisProgressBar
 from .metadata_logger import MetadataLogger
@@ -23,6 +24,7 @@
 
 
 __all__ = [
+    "ArchiveOnTrainEnd",
     "ExportOnTrainEnd",
     "LuxonisProgressBar",
     "MetadataLogger",
diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py
new file mode 100644
index 00000000..4f5b6bc2
--- /dev/null
+++ b/luxonis_train/callbacks/archive_on_train_end.py
@@ -0,0 +1,72 @@
+import logging
+import os
+from pathlib import Path
+from typing import cast
+
+import lightning.pytorch as pl
+
+from luxonis_train.utils.config import Config
+from luxonis_train.utils.registry import CALLBACKS
+from luxonis_train.utils.tracker import LuxonisTrackerPL
+
+
+@CALLBACKS.register_module()
+class ArchiveOnTrainEnd(pl.Callback):
+    def __init__(self, upload_to_mlflow: bool = False):
+        """Callback that performs archiving of onnx or exported model at the end of
+        training/export. TODO: description.
+
+        @type upload_to_mlflow: bool
+        @param upload_to_mlflow: If set to True, overrides the upload url in Archiver
+            with currently active MLFlow run (if present).
+        """
+        super().__init__()
+        self.upload_to_mlflow = upload_to_mlflow
+
+    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+        """Archives the model on train end.
+
+        @type trainer: L{pl.Trainer}
+        @param trainer: Pytorch Lightning trainer.
+        @type pl_module: L{pl.LightningModule}
+        @param pl_module: Pytorch Lightning module.
+        @raises RuntimeError: If no best model path is found.
+        """
+        from luxonis_train.core.archiver import Archiver
+
+        model_checkpoint_callbacks = [
+            c
+            for c in trainer.callbacks  # type: ignore
+            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
+        ]
+
+        # NOTE: assume that first checkpoint callback is based on val loss
+        best_model_path = model_checkpoint_callbacks[0].best_model_path
+        if not best_model_path:
+            raise RuntimeError(
+                "No best model path found. "
+                "Please make sure that ModelCheckpoint callback is present "
+                "and at least one validation epoch has been performed."
+            )
+        cfg: Config = pl_module.cfg
+        cfg.model.weights = best_model_path
+        if self.upload_to_mlflow:
+            if cfg.tracker.is_mlflow:
+                tracker = cast(LuxonisTrackerPL, trainer.logger)
+                new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
+                cfg.archiver.upload_url = new_upload_url
+            else:
+                logging.getLogger(__name__).warning(
+                    "`upload_to_mlflow` is set to True, "
+                    "but there is  no MLFlow active run, skipping."
+                )
+
+        onnx_path = str(Path(best_model_path).parent.with_suffix(".onnx"))
+        if not os.path.exists(onnx_path):
+            raise FileNotFoundError(
+                "Model executable not found. Make sure to run exporter callback before archiver callback"
+            )
+
+        archiver = Archiver(cfg=cfg)
+
+        archiver.archive(onnx_path)
diff --git a/luxonis_train/core/__init__.py b/luxonis_train/core/__init__.py
index 6264473b..d3e89663 100644
--- a/luxonis_train/core/__init__.py
+++ b/luxonis_train/core/__init__.py
@@ -1,6 +1,7 @@
+from .archiver import Archiver
 from .exporter import Exporter
 from .inferer import Inferer
 from .trainer import Trainer
 from .tuner import Tuner
 
-__all__ = ["Exporter", "Trainer", "Tuner", "Inferer"]
+__all__ = ["Exporter", "Trainer", "Tuner", "Inferer", "Archiver"]
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
new file mode 100644
index 00000000..58fc231f
--- /dev/null
+++ b/luxonis_train/core/archiver.py
@@ -0,0 +1,371 @@
+import os
+from logging import getLogger
+from pathlib import Path
+from typing import Any
+
+import onnx
+from luxonis_ml.nn_archive.archive_generator import ArchiveGenerator
+from luxonis_ml.nn_archive.config import CONFIG_VERSION
+from luxonis_ml.nn_archive.config_building_blocks import ObjectDetectionSubtypeYOLO
+from luxonis_ml.utils import LuxonisFileSystem
+
+from luxonis_train.models import LuxonisModel
+from luxonis_train.nodes.enums.head_categorization import (
+    ImplementedHeads,
+    ImplementedHeadsIsSoxtmaxed,
+)
+from luxonis_train.utils.config import Config
+
+from .core import Core
+
+logger = getLogger(__name__)
+
+
+class Archiver(Core):
+    """Main API which is used to construct the NN archive out of a trainig config and
+    model executables."""
+
+    def __init__(
+        self,
+        cfg: str | dict[str, Any] | Config,
+        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
+    ):
+        """Constructs a new Archiver instance.
+
+        @type cfg: str | dict[str, Any] | Config
+        @param cfg: Path to config file or config dict used to setup training.
+        @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
+        @param opts: Argument dict provided through command line,
+            used for config overriding.
+        """
+
+        super().__init__(cfg, opts)
+
+        self.lightning_module = LuxonisModel(
+            cfg=self.cfg,
+            dataset_metadata=self.dataset_metadata,
+            save_dir=self.run_save_dir,
+            input_shape=self.loader_train.input_shape,
+        )
+
+        self.model_name = self.cfg.model.name
+
+        self.archive_name = self.cfg.archiver.archive_name
+        archive_save_directory = Path(self.cfg.archiver.archive_save_directory)
+        if not archive_save_directory.exists():
+            logger.info(f"Creating archive directory {archive_save_directory}")
+            archive_save_directory.mkdir(parents=True, exist_ok=True)
+        self.archive_save_directory = str(archive_save_directory)
+
+        self.inputs = []
+        self.outputs = []
+        self.heads = []
+
+    def archive(self, executable_path: str):
+        """Runs archiving.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file (e.g. ONNX model).
+        """
+
+        executable_fname = os.path.split(executable_path)[1]
+        _, executable_suffix = os.path.splitext(executable_fname)
+        self.archive_name += f"_{executable_suffix[1:]}"
+
+        preprocessing = {  # TODO: keep preprocessing same for each input?
+            "mean": self.cfg.trainer.preprocessing.normalize.params["mean"],
+            "scale": self.cfg.trainer.preprocessing.normalize.params["std"],
+            "reverse_channels": self.cfg.trainer.preprocessing.train_rgb,
+            "interleaved_to_planar": False,  # TODO: make it modifiable?
+        }
+
+        inputs_dict = self._get_inputs(executable_path)
+        for input_name in inputs_dict:
+            self._add_input(
+                name=input_name,
+                dtype=inputs_dict[input_name]["dtype"],
+                shape=inputs_dict[input_name]["shape"],
+                preprocessing=preprocessing,
+            )
+
+        outputs_dict = self._get_outputs(executable_path)
+        for output_name in outputs_dict:
+            self._add_output(name=output_name, dtype=outputs_dict[output_name]["dtype"])
+
+        heads_dict = self._get_heads(executable_path)
+        for head_name in heads_dict:
+            self._add_head(heads_dict[head_name])
+
+        model = {
+            "metadata": {
+                "name": self.model_name,
+                "path": executable_fname,
+            },
+            "inputs": self.inputs,
+            "outputs": self.outputs,
+            "heads": self.heads,
+        }
+
+        cfg_dict = {
+            "config_version": CONFIG_VERSION.__args__[0],
+            "model": model,
+        }
+
+        self.archive_path = ArchiveGenerator(
+            archive_name=self.archive_name,
+            save_path=self.archive_save_directory,
+            cfg_dict=cfg_dict,
+            executables_paths=[executable_path],  # TODO: what if more executables?
+        ).make_archive()
+
+        logger.info(f"archive saved to {self.archive_path}")
+
+        if self.cfg.archiver.upload_url is not None:
+            self._upload()
+
+        return self.archive_path
+
+    def _get_inputs(self, executable_path: str):
+        """Get inputs of a model executable.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        _, executable_suffix = os.path.splitext(executable_path)
+        if executable_suffix == ".onnx":
+            return self._get_onnx_inputs(executable_path)
+        else:
+            raise NotImplementedError(
+                f"Missing input reading function for {executable_suffix} models."
+            )
+
+    def _get_onnx_inputs(self, executable_path: str):
+        """Get inputs of an ONNX model executable.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        inputs_dict = {}
+        model = onnx.load(executable_path)
+        for input in model.graph.input:
+            tensor_type = input.type.tensor_type
+            dtype_idx = tensor_type.elem_type
+            dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
+            shape = []
+            for d in tensor_type.shape.dim:
+                if d.HasField("dim_value"):
+                    shape.append(d.dim_value)
+                else:
+                    raise ValueError("Unsupported input dimension identifier type")
+            inputs_dict[input.name] = {"dtype": dtype, "shape": shape}
+        return inputs_dict
+
+    def _add_input(
+        self,
+        name: str,
+        dtype: str,
+        shape: list,
+        preprocessing: dict,
+        input_type: str = "image",
+    ) -> None:
+        """Add input to self.inputs.
+
+        @type name: str
+        @param name: Name of the input layer.
+        @type dtype: str
+        @param dtype: Data type of the input data (e.g., 'float32').
+        @type shape: list
+        @param shape: Shape of the input data as a list of integers (e.g. [H,W], [H,W,C], [BS,H,W,C], ...).
+        @type preprocessing: dict
+        @param preprocessing: Preprocessing steps applied to the input data.
+        @type input_type: str
+        @param input_type: Type of input data (e.g., 'image').
+        """
+
+        self.inputs.append(
+            {
+                "name": name,
+                "dtype": dtype,
+                "input_type": input_type,
+                "shape": shape,
+                "preprocessing": preprocessing,
+            }
+        )
+
+    def _get_outputs(self, executable_path):
+        """Get outputs of a model executable.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        _, executable_suffix = os.path.splitext(executable_path)
+        if executable_suffix == ".onnx":
+            return self._get_onnx_outputs(executable_path)
+        else:
+            raise NotImplementedError(
+                f"Missing input reading function for {executable_suffix} models."
+            )
+
+    def _get_onnx_outputs(self, executable_path):
+        """Get outputs of an ONNX model executable.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        outputs_dict = {}
+        model = onnx.load(executable_path)
+        for output in model.graph.output:
+            tensor_type = output.type.tensor_type
+            dtype_idx = tensor_type.elem_type
+            dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
+            outputs_dict[output.name] = {"dtype": dtype}
+        return outputs_dict
+
+    def _add_output(self, name: str, dtype: str) -> None:
+        """Add output to self.outputs.
+
+        @type name: str
+        @param name: Name of the output layer.
+        @type dtype: str
+        @param dtype: Data type of the output data (e.g., 'float32').
+        """
+
+        self.outputs.append({"name": name, "dtype": dtype})
+
+    def _get_classes(self, head_family):
+        if head_family.startswith("Classification"):
+            return self.dataset_metadata._classes["class"]
+        elif head_family.startswith("Object"):
+            return self.dataset_metadata._classes["boxes"]
+        elif head_family.startswith("Segmentation"):
+            return self.dataset_metadata._classes["segmentation"]
+        elif head_family.startswith("Keypoint"):
+            return self.dataset_metadata._classes["keypoints"]
+        else:
+            raise ValueError(
+                f"No classes found for the specified head family ({head_family})"
+            )
+
+    def _get_head_specific_parameters(
+        self, head_name, head_alias, executable_path
+    ) -> dict:
+        """Get parameters specific to head.
+
+        @type head_name: str
+        @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
+        @type head_alias: str
+        @param head_alias: Alias of the head (e.g. 'detection_head').
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        parameters = {}
+        if head_name == "ClassificationHead":
+            parameters["is_softmax"] = getattr(
+                ImplementedHeadsIsSoxtmaxed, head_name
+            ).value
+        elif head_name == "EfficientBBoxHead":
+            parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv6.value
+            head_node = self.lightning_module._modules["nodes"][head_alias]
+            parameters["iou_threshold"] = head_node.iou_thres
+            parameters["conf_threshold"] = head_node.conf_thres
+            parameters["max_det"] = head_node.max_det
+        elif head_name in ["SegmentationHead", "BiSeNetHead"]:
+            parameters["is_softmax"] = getattr(
+                ImplementedHeadsIsSoxtmaxed, head_name
+            ).value
+        elif head_name == "ImplicitKeypointBBoxHead":
+            parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value
+            head_node = self.lightning_module._modules["nodes"][head_alias]
+            parameters["iou_threshold"] = head_node.iou_thres
+            parameters["conf_threshold"] = head_node.conf_thres
+            parameters["max_det"] = head_node.max_det
+            parameters["n_keypoints"] = head_node.n_keypoints
+            parameters["anchors"] = head_node.anchors.tolist()
+
+        else:
+            raise ValueError("Unknown head name")
+        return parameters
+
+    def _get_head_outputs(self, head_name) -> dict:
+        """Get model outputs in a head-specific format.
+
+        @type head_name: str
+        @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
+        """
+
+        head_outputs = {}
+        if head_name == "ClassificationHead":
+            head_outputs["predictions"] = self.outputs[0]["name"]
+        elif head_name == "EfficientBBoxHead":
+            head_outputs["yolo_outputs"] = [output["name"] for output in self.outputs]
+        elif head_name in ["SegmentationHead", "BiSeNetHead"]:
+            head_outputs["predictions"] = self.outputs[0]["name"]
+        elif head_name == "ImplicitKeypointBBoxHead":
+            head_outputs["predictions"] = self.outputs[0]["name"]
+        else:
+            raise ValueError("Unknown head name")
+        return head_outputs
+
+    def _get_heads(self, executable_path):
+        """Get model heads.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+        heads_dict = {}
+
+        for node in self.cfg.model.nodes:
+            node_name = node.name
+            node_alias = node.alias
+            # node_inputs = node.inputs
+            if node_alias in self.lightning_module.outputs:
+                if node_name in ImplementedHeads.__members__:
+                    head_family = getattr(ImplementedHeads, node_name).value
+                    classes = self._get_classes(head_family)
+                    head_outputs = self._get_head_outputs(node_name)
+                    head_dict = {
+                        "family": head_family,
+                        "outputs": head_outputs,
+                        "classes": classes,
+                        "n_classes": len(classes),
+                    }
+                    head_dict.update(
+                        self._get_head_specific_parameters(
+                            node_name, node_alias, executable_path
+                        )
+                    )
+                    heads_dict[node_name] = head_dict
+        return heads_dict
+
+    def _add_head(self, head_metadata: dict) -> str:
+        """Add head to self.heads.
+
+        @type metadata: dict
+        @param metadata: Parameters required by head to run postprocessing.
+        """
+
+        self.heads.append(head_metadata)
+
+    def _upload(self):
+        """Uploads the archive file to specified s3 bucket.
+
+        @raises ValueError: If upload url was not specified in config file.
+        """
+
+        if self.cfg.archiver.upload_url is None:
+            raise ValueError("Upload url must be specified in config file.")
+
+        fs = LuxonisFileSystem(self.cfg.archiver.upload_url, allow_local=False)
+        logger.info(f"Started Archive upload to {fs.full_path}...")
+
+        fs.put_file(
+            local_path=self.archive_path,
+            remote_path=self.archive_name,
+        )
+
+        logger.info("Files upload finished")
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 86b63600..761bc26f 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -234,3 +234,7 @@ def get_best_metric_checkpoint_path(self) -> str:
         @return: Path to best checkpoint with respect to best validation metric
         """
         return self.pl_trainer.checkpoint_callbacks[1].best_model_path  # type: ignore
+
+    def reset_logging(self) -> None:
+        """Close file handlers to release the log file."""
+        reset_logging()
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/efficient_bbox_head.py
index 9f500cd4..a4f3bc93 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/efficient_bbox_head.py
@@ -30,6 +30,7 @@ def __init__(
         n_heads: Literal[2, 3, 4] = 3,
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
+        max_det: int = 300,
         **kwargs,
     ):
         """Head for object detection.
@@ -45,6 +46,9 @@ def __init__(
 
         @type iou_thres: float
         @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+
+        @type max_det: int
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
         super().__init__(task_type=LabelType.BOUNDINGBOX, **kwargs)
 
@@ -52,6 +56,7 @@ def __init__(
 
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
+        self.max_det = max_det
 
         self.stride = self._fit_stride_to_num_heads()
         self.grid_cell_offset = 0.5
@@ -163,5 +168,6 @@ def _process_to_bbox(
             conf_thres=self.conf_thres,
             iou_thres=self.iou_thres,
             bbox_format="xyxy",
+            max_det=self.max_det,
             predicts_objectness=False,
         )
diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py
new file mode 100644
index 00000000..56f98ff3
--- /dev/null
+++ b/luxonis_train/nodes/enums/head_categorization.py
@@ -0,0 +1,21 @@
+from enum import Enum
+
+
+class ImplementedHeads(Enum):
+    """Task categorization for the implemented heads."""
+
+    ClassificationHead = "Classification"
+    EfficientBBoxHead = "ObjectDetectionYOLO"
+    ImplicitKeypointBBoxHead = "KeypointDetectionYOLO"
+    SegmentationHead = "Segmentation"
+    BiSeNetHead = "Segmentation"
+
+
+class ImplementedHeadsIsSoxtmaxed(Enum):
+    """Softmaxed output categorization for the implemented heads."""
+
+    ClassificationHead = False
+    EfficientBBoxHead = None
+    ImplicitKeypointBBoxHead = None
+    SegmentationHead = False
+    BiSeNetHead = False
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index aff2b5a6..7f0c3d61 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -30,6 +30,7 @@ def __init__(
         init_coco_biases: bool = True,
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
+        max_det: int = 300,
         **kwargs,
     ):
         """Head for object and keypoint detection.
@@ -53,6 +54,8 @@ def __init__(
         @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
         @type iou_thres: float
         @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+        @type max_det: int
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
         super().__init__(task_type=LabelType.KEYPOINT, **kwargs)
 
@@ -63,6 +66,7 @@ def __init__(
 
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
+        self.max_det = max_det
 
         n_keypoints = n_keypoints or self.dataset_metadata._n_keypoints
 
@@ -164,6 +168,7 @@ def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
             conf_thres=self.conf_thres,
             iou_thres=self.iou_thres,
             bbox_format="cxcywh",
+            max_det=self.max_det,
         )
 
         return {
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 591376f8..a2d4f332 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -269,6 +269,12 @@ def pad_values(values: float | list[float] | None):
         return self
 
 
+class ArchiveConfig(BaseModel):
+    archive_name: str = "nn_archive"
+    archive_save_directory: str = "output_archive"
+    upload_url: str | None = None
+
+
 class StorageConfig(CustomBaseModel):
     active: bool = True
     storage_type: Literal["local", "remote"] = "local"
@@ -292,6 +298,7 @@ class Config(LuxonisConfig):
     tracker: TrackerConfig = TrackerConfig()
     trainer: TrainerConfig = TrainerConfig()
     exporter: ExportConfig = ExportConfig()
+    archiver: ArchiveConfig = ArchiveConfig()
     tuner: TunerConfig | None = None
     ENVIRON: Environ = Field(Environ(), exclude=True)
 
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 7a18c7f4..4033e89e 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
-        <text x="80" y="14">80%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">79%</text>
+        <text x="80" y="14">79%</text>
     </g>
 </svg>
diff --git a/requirements.txt b/requirements.txt
index eecf828e..3a884284 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,12 @@
 blobconverter>=1.4.2
 lightning>=2.0.0
-luxonis-ml[all]>=0.0.1
+#luxonis-ml[all]>=0.0.1
+luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
 onnx>=1.12.0
 onnxruntime>=1.13.1
 onnxsim>=0.4.10
 optuna>=3.2.0
+optuna_integration>=3.6.0
 psycopg2-binary>=2.9.1
 pycocotools>=2.0.7
 rich>=13.0.0
@@ -12,3 +14,4 @@ s3fs>=2023.0.0
 tensorboard>=2.10.1
 torchvision>=0.16.0
 typer>=0.9.0
+mlflow>=2.10.0
diff --git a/tests/unittests/test_core/__init__.py b/tests/unittests/test_core/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
new file mode 100644
index 00000000..bdbaa5b9
--- /dev/null
+++ b/tests/unittests/test_core/test_archiver.py
@@ -0,0 +1,158 @@
+import io
+import json
+import os
+import random
+import shutil
+import tarfile
+
+import cv2
+import lightning.pytorch as pl
+import numpy as np
+import onnx
+import yaml
+from luxonis_ml.data import LuxonisDataset
+
+import luxonis_train
+from luxonis_train.core import Archiver
+from luxonis_train.core.exporter import Exporter
+from luxonis_train.core.trainer import Trainer
+from luxonis_train.utils.config import Config
+
+
+class TestArchiver:
+    @classmethod
+    def setup_class(cls):
+        """Create and load all files required for testing."""
+
+        luxonis_train_parent_dir = os.path.dirname(
+            os.path.dirname(luxonis_train.__file__)
+        )
+        cls.tmp_path = os.path.join(
+            luxonis_train_parent_dir, "tests", "unittests", "test_core", "tmp"
+        )
+        os.mkdir(cls.tmp_path)
+
+        # make LDF
+        os.mkdir(os.path.join(cls.tmp_path, "images"))
+        cls.ldf_name = "dummyLDF"
+        labels = ["label1", "label2", "label3"]
+
+        def classification_dataset_generator():
+            for i in range(10):
+                img = np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8)
+                img_file_path = os.path.join(cls.tmp_path, "images", f"img{i}.png")
+                cv2.imwrite(img_file_path, img)
+                yield {
+                    "file": img_file_path,
+                    "type": "classification",
+                    "value": True,
+                    "class": random.choice(labels),
+                }
+
+        if LuxonisDataset.exists(cls.ldf_name):
+            print("Deleting existing dataset")
+            LuxonisDataset(cls.ldf_name).delete_dataset()
+        dataset = LuxonisDataset(cls.ldf_name)
+        dataset.add(classification_dataset_generator)
+        dataset.set_classes(list(labels))
+        dataset.make_splits()
+
+        # make config
+        config_dict = {
+            "model": {
+                "name": "test_model",
+                "predefined_model": {"name": "ClassificationModel"},
+            },
+            "dataset": {"name": cls.ldf_name},
+            "tracker": {"save_directory": cls.tmp_path},
+        }
+        cls.config_path = os.path.join(cls.tmp_path, "config.yaml")
+        with open(cls.config_path, "w") as yaml_file:
+            yaml_str = yaml.dump(config_dict)
+            yaml_file.write(yaml_str)
+        cfg = Config.get_config(config_dict)
+
+        # train model
+        cfg.trainer.epochs = 1
+        cfg.trainer.validation_interval = 1
+        cfg.trainer.batch_size = 4
+        trainer = Trainer(cfg=cfg)
+        trainer.train()
+        callbacks = [
+            c
+            for c in trainer.pl_trainer.callbacks
+            if isinstance(c, pl.callbacks.ModelCheckpoint)
+        ]
+        model_checkpoint_path = callbacks[0].best_model_path
+        model_ckpt = os.path.join(trainer.run_save_dir, model_checkpoint_path)
+        trainer.reset_logging()
+
+        # export model to ONNX
+        cfg.model.weights = model_ckpt
+        exporter = Exporter(cfg=cfg)
+        cls.onnx_model_path = os.path.join(cls.tmp_path, "model.onnx")
+        exporter.export(onnx_path=cls.onnx_model_path)
+        exporter.reset_logging()
+
+        # make archive
+        cfg.archiver.archive_save_directory = cls.tmp_path
+        archiver = Archiver(cls.config_path)
+        cls.archive_path = archiver.archive(cls.onnx_model_path)
+        archiver.reset_logging()
+
+        # load archive files into memory
+        with tarfile.open(cls.archive_path, mode="r") as tar:
+            cls.archive_fnames = tar.getnames()
+            for fname in cls.archive_fnames:
+                f = tar.extractfile(fname)
+                if fname.endswith(".json"):
+                    cls.json_dict = json.load(f)
+                elif fname.endswith(".onnx"):
+                    model_bytes = f.read()
+                    model_io = io.BytesIO(model_bytes)
+                    cls.onnx_model = onnx.load(model_io)
+
+    @classmethod
+    def teardown_class(cls):
+        """Remove all created files."""
+        LuxonisDataset(cls.ldf_name).delete_dataset()
+        shutil.rmtree(cls.tmp_path)
+
+    def test_archive_creation(self):
+        """Test if nn_archive was created."""
+        assert os.path.exists(self.archive_path)
+
+    def test_archive_suffix(self):
+        """Test if nn_archive is compressed using xz option (should be the default
+        option)."""
+        assert self.archive_path.endswith("tar.xz")
+
+    def test_archive_contents(self):
+        """Test if nn_archive consists of config.json and model.onnx."""
+        assert (
+            len(self.archive_fnames) == 2
+            and any([fname == "config.json" for fname in self.archive_fnames])
+            and any([fname == "model.onnx" for fname in self.archive_fnames])
+        )
+
+    def test_onnx(self):
+        """Test if archived ONNX model is valid."""
+        assert onnx.checker.check_model(self.onnx_model, full_check=True) is None
+
+    def test_config_inputs(self):
+        """Test if archived config inputs are valid."""
+        config_input_names = []
+        for input in self.json_dict["model"]["inputs"]:
+            config_input_names.append(input["name"])
+        assert set([input.name for input in self.onnx_model.graph.input]) == set(
+            config_input_names
+        )
+
+    def test_config_outputs(self):
+        """Test if archived config outputs are valid."""
+        config_output_names = []
+        for input in self.json_dict["model"]["outputs"]:
+            config_output_names.append(input["name"])
+        assert set([output.name for output in self.onnx_model.graph.output]) == set(
+            config_output_names
+        )

From b3b4e32969d4c7f3c2f337048b5a50f0d33bf900 Mon Sep 17 00:00:00 2001
From: jkbmrz <74824974+jkbmrz@users.noreply.github.com>
Date: Mon, 25 Mar 2024 10:11:42 +0100
Subject: [PATCH 10/75] Extend NN Archive Generation Test Coverage (#18)

* extend NN Archive generation test coverage to cover all implemented heads

* [Automated] Updated coverage badge

---------

Co-authored-by: GitHub Actions <actions@github.com>
---
 media/coverage_badge.svg                   |   4 +-
 requirements.txt                           |   1 +
 tests/unittests/test_core/test_archiver.py | 407 +++++++++++++++------
 3 files changed, 307 insertions(+), 105 deletions(-)

diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 4033e89e..7a18c7f4 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">79%</text>
-        <text x="80" y="14">79%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
+        <text x="80" y="14">80%</text>
     </g>
 </svg>
diff --git a/requirements.txt b/requirements.txt
index 3a884284..5e436e44 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ onnxruntime>=1.13.1
 onnxsim>=0.4.10
 optuna>=3.2.0
 optuna_integration>=3.6.0
+parameterized>=0.9.0
 psycopg2-binary>=2.9.1
 pycocotools>=2.0.7
 rich>=13.0.0
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
index bdbaa5b9..a044be52 100644
--- a/tests/unittests/test_core/test_archiver.py
+++ b/tests/unittests/test_core/test_archiver.py
@@ -9,21 +9,26 @@
 import lightning.pytorch as pl
 import numpy as np
 import onnx
-import yaml
 from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.nn_archive.config_building_blocks.base_models import head_outputs
+from parameterized import parameterized
 
 import luxonis_train
 from luxonis_train.core import Archiver
 from luxonis_train.core.exporter import Exporter
 from luxonis_train.core.trainer import Trainer
+from luxonis_train.nodes.enums.head_categorization import ImplementedHeads
 from luxonis_train.utils.config import Config
 
+HEAD_NAMES = [head_name for head_name in ImplementedHeads.__members__]
+
 
 class TestArchiver:
     @classmethod
     def setup_class(cls):
-        """Create and load all files required for testing."""
+        """Creates all files required for testing."""
 
+        # make tmp dir
         luxonis_train_parent_dir = os.path.dirname(
             os.path.dirname(luxonis_train.__file__)
         )
@@ -32,127 +37,323 @@ def setup_class(cls):
         )
         os.mkdir(cls.tmp_path)
 
-        # make LDF
-        os.mkdir(os.path.join(cls.tmp_path, "images"))
-        cls.ldf_name = "dummyLDF"
-        labels = ["label1", "label2", "label3"]
+        # make LDFs
+        unilabelLDF = "dummyLDF_unilabel"
+        cls._make_dummy_ldf(
+            ldf_name=unilabelLDF,
+            save_path=cls.tmp_path,
+            bbx_anno=True,
+            kpt_anno=True,
+        )
+        multilabelLDF = "dummyLDF_multilabel"
+        cls._make_dummy_ldf(
+            ldf_name=multilabelLDF,
+            save_path=cls.tmp_path,
+            cls_anno=True,
+            bbx_anno=True,
+            sgm_anno=True,
+            multilabel=True,
+        )
+        cls.ldf_names = [unilabelLDF, multilabelLDF]
+
+        for head_name in HEAD_NAMES:
+            if head_name == "ImplicitKeypointBBoxHead":
+                ldf_name = unilabelLDF  # multiclass keypoint detection not yet supported in luxonis-train
+            else:
+                ldf_name = multilabelLDF
+
+            # make config
+            cfg_dict = cls._make_dummy_cfg_dict(
+                head_name=head_name,
+                save_path=cls.tmp_path,
+                ldf_name=ldf_name,
+            )
+            cfg = Config.get_config(cfg_dict)
+
+            # train model
+            cfg.trainer.epochs = 1
+            cfg.trainer.validation_interval = 1
+            cfg.trainer.batch_size = 1
+            trainer = Trainer(cfg=cfg)
+            trainer.train()
+            callbacks = [
+                c
+                for c in trainer.pl_trainer.callbacks
+                if isinstance(c, pl.callbacks.ModelCheckpoint)
+            ]
+            model_checkpoint_path = callbacks[0].best_model_path
+            model_ckpt = os.path.join(trainer.run_save_dir, model_checkpoint_path)
+            trainer.reset_logging()
+
+            # export model to ONNX
+            cfg.model.weights = model_ckpt
+            exporter = Exporter(cfg=cfg)
+            cls.onnx_model_path = os.path.join(cls.tmp_path, "model.onnx")
+            exporter.export(onnx_path=cls.onnx_model_path)
+            exporter.reset_logging()
+
+            # make archive
+            cfg.archiver.archive_save_directory = cls.tmp_path
+            cfg.archiver.archive_name = f"nnarchive_{head_name}"
+            archiver = Archiver(cfg=cfg)
+            cls.archive_path = archiver.archive(cls.onnx_model_path)
+            archiver.reset_logging()
+
+            # clear the loaded config instance
+            Config.clear_instance()
 
-        def classification_dataset_generator():
-            for i in range(10):
-                img = np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8)
-                img_file_path = os.path.join(cls.tmp_path, "images", f"img{i}.png")
+    def _make_dummy_ldf(
+        ldf_name: str,
+        save_path: str,
+        number: int = 3,
+        dim: tuple = (10, 10, 3),
+        cls_anno: bool = False,
+        bbx_anno: bool = False,
+        sgm_anno: bool = False,
+        kpt_anno: bool = False,
+        multilabel: bool = False,
+        split_ratios: list = None,
+    ):
+        """Creates random-pixel images with fictional annotations and parses them to
+        L{LuxonisDataset} format.
+
+        @type ldf_name: str
+        @param ldf_name: Name of the created L{LuxonisDataset} format dataset.
+        @type save_path: str
+        @param save_path: Path to where the created images are saved.
+        @type number: int
+        @param number: Number of images to create.
+        @type dim: Tuple[int, int, int]
+        @param dim: Dimensions of the created images in HWC order.
+        @type cls_anno: bool
+        @param cls_anno: True if created dataset should contain classification annotations.
+        type bbx_anno: bool
+        @param bbx_anno: True if created dataset should contain bounding box annotations.
+        type sgm_anno: bool
+        @param sgm_anno: True if created dataset should contain segmentation annotations.
+        type kpt_anno: bool
+        @param kpt_anno: True if created dataset should contain keypoint annotations.
+        type multilabel: bool
+        @param multilabel: True if created dataset should contain multilabel annotations.
+        type split_ratios: List[float, float, float]
+        @param split_ratios: List of ratios defining the train, val, and test splits.
+        """
+
+        if split_ratios is None:
+            split_ratios = [0.333, 0.333, 0.333]
+
+        os.makedirs(os.path.join(save_path, "images"), exist_ok=True)
+
+        if multilabel:
+            labels = ["label_x", "label_y", "label_z"]
+        else:
+            labels = ["label_x"]
+
+        def dataset_generator():
+            for i in range(number):
+                label = random.choice(labels)
+                img = np.random.randint(0, 256, dim, dtype=np.uint8)
+                img_file_path = os.path.join(save_path, "images", f"img{i}.png")
                 cv2.imwrite(img_file_path, img)
-                yield {
-                    "file": img_file_path,
-                    "type": "classification",
-                    "value": True,
-                    "class": random.choice(labels),
-                }
 
-        if LuxonisDataset.exists(cls.ldf_name):
+                if cls_anno:
+                    yield {
+                        "file": img_file_path,
+                        "type": "classification",
+                        "value": True,
+                        "class": label,
+                    }
+
+                if bbx_anno:
+                    box = (0.25, 0.25, 0.5, 0.5)
+                    yield {
+                        "file": img_file_path,
+                        "type": "box",
+                        "value": box,
+                        "class": label,
+                    }
+
+                if kpt_anno:
+                    keypoints = [
+                        (0.25, 0.25, 2),
+                        (0.75, 0.25, 2),
+                        (0.75, 0.75, 2),
+                        (0.25, 0.75, 2),
+                    ]
+                    yield {
+                        "file": img_file_path,
+                        "type": "keypoints",
+                        "value": keypoints,
+                        "class": label,
+                    }
+
+                if sgm_anno:
+                    polyline = [
+                        (0.25, 0.75),
+                        (0.75, 0.25),
+                        (0.75, 0.75),
+                        (0.25, 0.75),
+                        (0.25, 0.25),
+                    ]
+                    yield {
+                        "file": img_file_path,
+                        "type": "polyline",
+                        "value": polyline,
+                        "class": label,
+                    }
+
+        if LuxonisDataset.exists(ldf_name):
             print("Deleting existing dataset")
-            LuxonisDataset(cls.ldf_name).delete_dataset()
-        dataset = LuxonisDataset(cls.ldf_name)
-        dataset.add(classification_dataset_generator)
+            LuxonisDataset(ldf_name).delete_dataset()
+        dataset = LuxonisDataset(ldf_name)
         dataset.set_classes(list(labels))
-        dataset.make_splits()
-
-        # make config
-        config_dict = {
-            "model": {
-                "name": "test_model",
-                "predefined_model": {"name": "ClassificationModel"},
-            },
-            "dataset": {"name": cls.ldf_name},
-            "tracker": {"save_directory": cls.tmp_path},
-        }
-        cls.config_path = os.path.join(cls.tmp_path, "config.yaml")
-        with open(cls.config_path, "w") as yaml_file:
-            yaml_str = yaml.dump(config_dict)
-            yaml_file.write(yaml_str)
-        cfg = Config.get_config(config_dict)
-
-        # train model
-        cfg.trainer.epochs = 1
-        cfg.trainer.validation_interval = 1
-        cfg.trainer.batch_size = 4
-        trainer = Trainer(cfg=cfg)
-        trainer.train()
-        callbacks = [
-            c
-            for c in trainer.pl_trainer.callbacks
-            if isinstance(c, pl.callbacks.ModelCheckpoint)
-        ]
-        model_checkpoint_path = callbacks[0].best_model_path
-        model_ckpt = os.path.join(trainer.run_save_dir, model_checkpoint_path)
-        trainer.reset_logging()
-
-        # export model to ONNX
-        cfg.model.weights = model_ckpt
-        exporter = Exporter(cfg=cfg)
-        cls.onnx_model_path = os.path.join(cls.tmp_path, "model.onnx")
-        exporter.export(onnx_path=cls.onnx_model_path)
-        exporter.reset_logging()
-
-        # make archive
-        cfg.archiver.archive_save_directory = cls.tmp_path
-        archiver = Archiver(cls.config_path)
-        cls.archive_path = archiver.archive(cls.onnx_model_path)
-        archiver.reset_logging()
-
-        # load archive files into memory
-        with tarfile.open(cls.archive_path, mode="r") as tar:
-            cls.archive_fnames = tar.getnames()
-            for fname in cls.archive_fnames:
-                f = tar.extractfile(fname)
-                if fname.endswith(".json"):
-                    cls.json_dict = json.load(f)
-                elif fname.endswith(".onnx"):
-                    model_bytes = f.read()
-                    model_io = io.BytesIO(model_bytes)
-                    cls.onnx_model = onnx.load(model_io)
+        if kpt_anno:
+            keypoint_labels = [
+                "kp1",
+                "kp2",
+                "kp3",
+                "kp4",
+            ]
+            keypoint_edges = [
+                [0, 1],
+                [1, 2],
+                [2, 3],
+                [3, 0],
+            ]
+            dataset.set_skeletons(
+                {
+                    label: {"labels": keypoint_labels, "edges": keypoint_edges}
+                    for label in labels
+                }
+            )
+        dataset.add(dataset_generator)
+        dataset.make_splits(ratios=split_ratios)
 
-    @classmethod
-    def teardown_class(cls):
-        """Remove all created files."""
-        LuxonisDataset(cls.ldf_name).delete_dataset()
-        shutil.rmtree(cls.tmp_path)
+    def _make_dummy_cfg_dict(head_name: str, ldf_name: str, save_path: str) -> dict:
+        """Creates a configuration dict based on the type of the provided model head.
+
+        @type head_name: str
+        @param head_name: Name of the specified head.
+        @type ldf_name: str
+        @param ldf_name: Name of the L{LuxonisDataset} format dataset on which the
+            training will be performed.
+        @type save_path: str
+        @param save_path: Path to LuxonisTrackerPL save directory.
+        @rtype: dict
+        @return: Created config dict.
+        """
+
+        cfg_dict = {"model": {"name": f"model_w_{head_name}"}}
+        cfg_dict["dataset"] = {"name": ldf_name}
+        cfg_dict["tracker"] = {"save_directory": save_path}
 
-    def test_archive_creation(self):
-        """Test if nn_archive was created."""
-        assert os.path.exists(self.archive_path)
+        if head_name == "ClassificationHead":
+            cfg_dict["model"]["predefined_model"] = {"name": "ClassificationModel"}
+        elif head_name == "EfficientBBoxHead":
+            cfg_dict["model"]["predefined_model"] = {"name": "DetectionModel"}
+        elif head_name == "ImplicitKeypointBBoxHead":
+            cfg_dict["model"]["predefined_model"] = {"name": "KeypointDetectionModel"}
+        elif head_name == "SegmentationHead":
+            cfg_dict["model"]["predefined_model"] = {"name": "SegmentationModel"}
+        elif head_name == "BiSeNetHead":
+            cfg_dict["model"]["nodes"] = [
+                {"name": "MicroNet", "alias": "segmentation_backbone"},
+                {
+                    "name": "BiSeNetHead",
+                    "alias": "segmentation_head",
+                    "inputs": ["segmentation_backbone"],
+                },
+            ]
+            cfg_dict["model"]["losses"] = [
+                {"name": "BCEWithLogitsLoss", "attached_to": "segmentation_head"}
+            ]
+        else:
+            raise NotImplementedError(f"No implementation for {head_name}")
 
-    def test_archive_suffix(self):
-        """Test if nn_archive is compressed using xz option (should be the default
+        return cfg_dict
+
+    @parameterized.expand(HEAD_NAMES)
+    def test_archive_creation(self, head_name):
+        """Tests if NN archive was created using xz compression (should be the default
         option)."""
-        assert self.archive_path.endswith("tar.xz")
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        assert archive_path.endswith("tar.xz")
 
-    def test_archive_contents(self):
-        """Test if nn_archive consists of config.json and model.onnx."""
+    @parameterized.expand(HEAD_NAMES)
+    def test_archive_contents(self, head_name):
+        """Tests if NN archive consists of config.json and model.onnx."""
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        with tarfile.open(archive_path, mode="r") as tar:
+            archive_fnames = tar.getnames()
         assert (
-            len(self.archive_fnames) == 2
-            and any([fname == "config.json" for fname in self.archive_fnames])
-            and any([fname == "model.onnx" for fname in self.archive_fnames])
+            len(archive_fnames) == 2
+            and any([fname == "config.json" for fname in archive_fnames])
+            and any([fname == "model.onnx" for fname in archive_fnames])
         )
 
-    def test_onnx(self):
-        """Test if archived ONNX model is valid."""
-        assert onnx.checker.check_model(self.onnx_model, full_check=True) is None
+    @parameterized.expand(HEAD_NAMES)
+    def test_onnx(self, head_name):
+        """Tests if archive ONNX model is valid."""
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        with tarfile.open(archive_path, mode="r") as tar:
+            f = tar.extractfile("model.onnx")
+            model_bytes = f.read()
+            model_io = io.BytesIO(model_bytes)
+            onnx_model = onnx.load(model_io)
+        assert onnx.checker.check_model(onnx_model, full_check=True) is None
+
+    @parameterized.expand(HEAD_NAMES)
+    def test_config_io(self, head_name):
+        """Tests if archived config inputs and outputs are valid."""
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        with tarfile.open(archive_path, mode="r") as tar:
+            f = tar.extractfile("config.json")
+            json_dict = json.load(f)
+            f = tar.extractfile("model.onnx")
+            model_bytes = f.read()
+            model_io = io.BytesIO(model_bytes)
+            onnx_model = onnx.load(model_io)
 
-    def test_config_inputs(self):
-        """Test if archived config inputs are valid."""
         config_input_names = []
-        for input in self.json_dict["model"]["inputs"]:
+        for input in json_dict["model"]["inputs"]:
             config_input_names.append(input["name"])
-        assert set([input.name for input in self.onnx_model.graph.input]) == set(
+        valid_inputs = set([input.name for input in onnx_model.graph.input]) == set(
             config_input_names
         )
 
-    def test_config_outputs(self):
-        """Test if archived config outputs are valid."""
         config_output_names = []
-        for input in self.json_dict["model"]["outputs"]:
+        for input in json_dict["model"]["outputs"]:
             config_output_names.append(input["name"])
-        assert set([output.name for output in self.onnx_model.graph.output]) == set(
+        valid_outputs = set([output.name for output in onnx_model.graph.output]) == set(
             config_output_names
         )
+
+        assert valid_inputs and valid_outputs
+
+    @parameterized.expand(HEAD_NAMES)
+    def test_head_outputs(self, head_name):
+        """Tests if archived config head outputs are valid."""
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        with tarfile.open(archive_path, mode="r") as tar:
+            f = tar.extractfile("config.json")
+            json_dict = json.load(f)
+        head_output = json_dict["model"]["heads"][0]["outputs"]
+        if head_name == "ClassificationHead":
+            assert head_outputs.OutputsClassification.parse_obj(head_output)
+        elif head_name == "EfficientBBoxHead":
+            assert head_outputs.OutputsYOLO.parse_obj(head_output)
+        elif head_name == "ImplicitKeypointBBoxHead":
+            assert head_outputs.OutputsKeypointDetectionYOLO.parse_obj(head_output)
+        elif head_name == "SegmentationHead":
+            assert head_outputs.OutputsSegmentation.parse_obj(head_output)
+        elif head_name == "BiSeNetHead":
+            assert head_outputs.OutputsSegmentation.parse_obj(head_output)
+        else:
+            raise NotImplementedError(f"Missing tests for {head_name} head")
+
+    @classmethod
+    def teardown_class(cls):
+        """Removes all files created during setup."""
+        for ldf_name in cls.ldf_names:
+            LuxonisDataset(ldf_name).delete_dataset()
+        shutil.rmtree(cls.tmp_path)

From 351e0c58ff281987a2c9642e09c3ca2d3851dfa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 11 Apr 2024 16:11:58 +0200
Subject: [PATCH 11/75] Upload All Checkpoints (#19)

* uploading all checkpoints

* fix names

* removed comment
---
 luxonis_train/callbacks/upload_checkpoint.py | 47 +++++++++++---------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
index a0fa137a..efd7fe02 100644
--- a/luxonis_train/callbacks/upload_checkpoint.py
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -1,5 +1,6 @@
 import logging
 import os
+from pathlib import Path
 from typing import Any
 
 import lightning.pytorch as pl
@@ -25,37 +26,41 @@ def __init__(self, upload_directory: str):
         )
         self.logger = logging.getLogger(__name__)
         self.last_logged_epoch = None
-        self.last_best_checkpoint = None
+        self.last_best_checkpoints = set()
 
     def on_save_checkpoint(
         self,
         trainer: pl.Trainer,
-        pl_module: pl.LightningModule,
+        _: pl.LightningModule,
         checkpoint: dict[str, Any],
     ) -> None:
         # Log only once per epoch in case there are multiple ModelCheckpoint callbacks
         if not self.last_logged_epoch == trainer.current_epoch:
-            model_checkpoint_callbacks = [
-                c
+            checkpoint_paths = [
+                c.best_model_path
                 for c in trainer.callbacks  # type: ignore
                 if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
+                and c.best_model_path
             ]
-            # NOTE: assume that first checkpoint callback is based on val loss
-            curr_best_checkpoint = model_checkpoint_callbacks[0].best_model_path
-
-            if self.last_best_checkpoint != curr_best_checkpoint:
-                self.logger.info(f"Started checkpoint upload to {self.fs.full_path}...")
-                temp_filename = "curr_best_val_loss.ckpt"
-                torch.save(checkpoint, temp_filename)
-                self.fs.put_file(
-                    local_path=temp_filename,
-                    remote_path=temp_filename,
-                    mlflow_instance=trainer.logger.experiment.get(  # type: ignore
-                        "mlflow", None
-                    ),
-                )
-                os.remove(temp_filename)
-                self.logger.info("Checkpoint upload finished")
-                self.last_best_checkpoint = curr_best_checkpoint
+            for curr_best_checkpoint in checkpoint_paths:
+                if curr_best_checkpoint not in self.last_best_checkpoints:
+                    self.logger.info(
+                        f"Started checkpoint upload to {self.fs.full_path}..."
+                    )
+                    temp_filename = (
+                        Path(curr_best_checkpoint).parent.with_suffix(".ckpt").name
+                    )
+                    torch.save(checkpoint, temp_filename)
+
+                    self.fs.put_file(
+                        local_path=temp_filename,
+                        remote_path=temp_filename,
+                        mlflow_instance=trainer.logger.experiment.get(  # type: ignore
+                            "mlflow", None
+                        ),
+                    )
+                    os.remove(temp_filename)
+                    self.logger.info("Checkpoint upload finished")
+                    self.last_best_checkpoints.add(curr_best_checkpoint)
 
             self.last_logged_epoch = trainer.current_epoch

From 9c4cadb932254e7ad559350a2eb55ebc72f20266 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 11 Apr 2024 16:20:48 +0200
Subject: [PATCH 12/75] LuxonisML v0.1.0 (#20)

---
 requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5e436e44..03081b48 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
 blobconverter>=1.4.2
 lightning>=2.0.0
-#luxonis-ml[all]>=0.0.1
-luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
+luxonis-ml[all]>=0.1.0
 onnx>=1.12.0
 onnxruntime>=1.13.1
 onnxsim>=0.4.10

From f425fdb39ae11ead1ff09385ce802729ab96e4dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Mon, 15 Apr 2024 20:22:14 +0200
Subject: [PATCH 13/75] SIGTERM Handling (#21)

* handling SIGTERM signal

* resume argument takes path
---
 luxonis_train/__main__.py                     | 10 ++++--
 .../callbacks/luxonis_progress_bar.py         |  2 +-
 luxonis_train/core/trainer.py                 | 36 ++++++++++++++++++-
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index b1fd3971..94276b60 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -45,11 +45,17 @@ def __str__(self):
 
 
 @app.command()
-def train(config: ConfigType = None, opts: OptsType = None):
+def train(
+    config: ConfigType = None,
+    resume: Annotated[
+        Optional[str], typer.Option(help="Resume training from this checkpoint.")
+    ] = None,
+    opts: OptsType = None,
+):
     """Start training."""
     from luxonis_train.core import Trainer
 
-    Trainer(str(config), opts).train()
+    Trainer(str(config), opts, resume=resume).train()
 
 
 @app.command()
diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py
index fcc130cd..16d173e7 100644
--- a/luxonis_train/callbacks/luxonis_progress_bar.py
+++ b/luxonis_train/callbacks/luxonis_progress_bar.py
@@ -28,7 +28,7 @@ def get_metrics(
     ) -> dict[str, int | str | float | dict[str, float]]:
         # NOTE: there might be a cleaner way of doing this
         items = super().get_metrics(trainer, pl_module)
-        if trainer.training:
+        if trainer.training and pl_module.training_step_outputs:
             items["Loss"] = pl_module.training_step_outputs[-1]["loss"].item()
         return items
 
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index 2b3d6a78..8326ce48 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -1,3 +1,5 @@
+import os.path as osp
+import signal
 import threading
 from logging import getLogger
 from typing import Any, Literal
@@ -21,6 +23,7 @@ def __init__(
         self,
         cfg: str | dict[str, Any] | Config,
         opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
+        resume: str | None = None,
     ):
         """Constructs a new Trainer instance.
 
@@ -30,9 +33,17 @@ def __init__(
         @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
         @param opts: Argument dict provided through command line,
             used for config overriding.
+
+        @type resume: str | None
+        @param resume: Training will resume from this checkpoint.
         """
         super().__init__(cfg, opts)
 
+        if resume is not None:
+            self.resume = str(LuxonisFileSystem.download(resume, self.run_save_dir))
+        else:
+            self.resume = None
+
         self.lightning_module = LuxonisModel(
             cfg=self.cfg,
             dataset_metadata=self.dataset_metadata,
@@ -40,6 +51,29 @@ def __init__(
             input_shape=self.loader_train.input_shape,
         )
 
+        def graceful_exit(signum, frame):
+            logger.info("SIGTERM received, stopping training...")
+            ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
+            self.pl_trainer.save_checkpoint(ckpt_path)
+            self._upload_logs()
+
+            if self.cfg.tracker.is_mlflow:
+                logger.info("Uploading checkpoint to MLFlow.")
+                fs = LuxonisFileSystem(
+                    "mlflow://",
+                    allow_active_mlflow_run=True,
+                    allow_local=False,
+                )
+                fs.put_file(
+                    local_path=ckpt_path,
+                    remote_path="resume.ckpt",
+                    mlflow_instance=self.tracker.experiment.get("mlflow", None),
+                )
+
+            exit(0)
+
+        signal.signal(signal.SIGTERM, graceful_exit)
+
     def _upload_logs(self) -> None:
         if self.cfg.tracker.is_mlflow:
             logger.info("Uploading logs to MLFlow.")
@@ -56,7 +90,7 @@ def _upload_logs(self) -> None:
 
     def _trainer_fit(self, *args, **kwargs):
         try:
-            self.pl_trainer.fit(*args, **kwargs)
+            self.pl_trainer.fit(*args, ckpt_path=self.resume, **kwargs)
         except Exception:
             logger.exception("Encountered exception during training.")
         finally:

From ca570637eefae0912dae338cf4b25871b3bba52f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 24 Apr 2024 02:06:57 +0200
Subject: [PATCH 14/75] Task Label Groups Support (#22)

* handling SIGTERM signal

* resume argument takes path

* basic task group labels support

* updated requirements

* fixed tests

* fixed loader test

* Update luxonis_train/models/luxonis_model.py

Co-authored-by: conorsim <60359299+conorsim@users.noreply.github.com>

---------

Co-authored-by: conorsim <60359299+conorsim@users.noreply.github.com>
---
 luxonis_train/models/luxonis_model.py         | 12 ++-
 luxonis_train/utils/boxutils.py               |  4 +-
 luxonis_train/utils/config.py                 |  1 +
 luxonis_train/utils/loaders/base_loader.py    | 81 ++++++++++---------
 .../utils/loaders/luxonis_loader_torch.py     | 10 ++-
 luxonis_train/utils/types.py                  |  1 +
 requirements.txt                              |  3 +-
 tests/integration/conftest.py                 |  4 +-
 tests/unittests/test_core/test_archiver.py    |  2 +-
 .../test_loaders/test_base_loader.py          |  6 +-
 10 files changed, 71 insertions(+), 53 deletions(-)

diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 7cd396f9..58aeccd1 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -35,7 +35,7 @@
 )
 from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry
 from luxonis_train.utils.tracker import LuxonisTrackerPL
-from luxonis_train.utils.types import Kwargs, Labels, Packet
+from luxonis_train.utils.types import Kwargs, Labels, Packet, TaskLabels
 
 from .luxonis_output import LuxonisOutput
 
@@ -139,10 +139,13 @@ def __init__(
         frozen_nodes: list[tuple[str, int]] = []
         nodes: dict[str, tuple[type[BaseNode], Kwargs]] = {}
 
+        self.node_tasks: dict[str, str] = {}
+
         for node_cfg in self.cfg.model.nodes:
             node_name = node_cfg.name
             Node = BaseNode.REGISTRY.get(node_name)
             node_name = node_cfg.alias or node_name
+            self.node_tasks[node_name] = node_cfg.task_group
             if node_cfg.freezing.active:
                 epochs = self.cfg.trainer.epochs
                 if node_cfg.freezing.unfreeze_after is None:
@@ -244,7 +247,7 @@ def _initiate_nodes(
     def forward(
         self,
         inputs: Tensor,
-        labels: Labels | None = None,
+        task_labels: TaskLabels | None = None,
         images: Tensor | None = None,
         *,
         compute_loss: bool = True,
@@ -259,8 +262,8 @@ def forward(
 
         @type inputs: L{Tensor}
         @param inputs: Input tensor.
-        @type labels: L{Labels} | None
-        @param labels: Labels dictionary. Defaults to C{None}.
+        @type task_labels: L{TaskLabels} | None
+        @param task_labels: Labels dictionary. Defaults to C{None}.
         @type images: L{Tensor} | None
         @param images: Canvas tensor for visualizers. Defaults to C{None}.
         @type compute_loss: bool
@@ -296,6 +299,7 @@ def forward(
             node_inputs = [computed[pred] for pred in input_names]
             outputs = node.run(node_inputs)
             computed[node_name] = outputs
+            labels = task_labels[self.node_tasks[node_name]] if task_labels else None
 
             if compute_loss and node_name in self.losses and labels is not None:
                 for loss_name, loss in self.losses[node_name].items():
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 0d708f79..a59f4cd0 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -404,6 +404,7 @@ def anchors_from_dataset(
     n_anchors: int = 9,
     n_generations: int = 1000,
     ratio_threshold: float = 4.0,
+    task_group: str = "default",
 ) -> tuple[Tensor, float]:
     """Generates anchors based on bounding box annotations present in provided data
     loader. It uses K-Means for initial proposals which are then refined with genetic
@@ -425,7 +426,8 @@ def anchors_from_dataset(
 
     widths = []
     inputs = None
-    for inp, labels in loader:
+    for inp, task_labels in loader:
+        labels = next(iter(task_labels.values()))  # TODO: handle multiple tasks
         boxes = labels[LabelType.BOUNDINGBOX]
         curr_wh = boxes[:, 4:]
         widths.append(curr_wh)
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index a2d4f332..45dde192 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -43,6 +43,7 @@ class ModelNodeConfig(CustomBaseModel):
     inputs: list[str] = []
     params: dict[str, Any] = {}
     freezing: FreezingConfig = FreezingConfig()
+    task_group: str = "default"
 
 
 class PredefinedModelConfig(CustomBaseModel):
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index 93f3fd0c..be12b439 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -8,7 +8,7 @@
 from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.types import Labels, LabelType
 
-LuxonisLoaderTorchOutput = tuple[Tensor, Labels]
+LuxonisLoaderTorchOutput = tuple[Tensor, dict[str, Labels]]
 """LuxonisLoaderTorchOutput is a tuple of images and corresponding labels."""
 
 
@@ -46,7 +46,7 @@ def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
 
 def collate_fn(
     batch: list[LuxonisLoaderTorchOutput],
-) -> tuple[Tensor, dict[LabelType, Tensor]]:
+) -> tuple[Tensor, dict[str, dict[LabelType, Tensor]]]:
     """Default collate function used for training.
 
     @type batch: list[LuxonisLoaderTorchOutput]
@@ -55,41 +55,46 @@ def collate_fn(
     @rtype: tuple[Tensor, dict[LabelType, Tensor]]
     @return: Tuple of images and annotations in the format expected by the model.
     """
-    zipped = zip(*batch)
-    imgs, anno_dicts = zipped
+    imgs, group_dicts = zip(*batch)
+    out_group_dicts = {task: {} for task in group_dicts[0].keys()}
     imgs = torch.stack(imgs, 0)
 
-    present_annotations = anno_dicts[0].keys()
-    out_annotations: dict[LabelType, Tensor] = {
-        anno: torch.empty(0) for anno in present_annotations
-    }
-
-    if LabelType.CLASSIFICATION in present_annotations:
-        class_annos = [anno[LabelType.CLASSIFICATION] for anno in anno_dicts]
-        out_annotations[LabelType.CLASSIFICATION] = torch.stack(class_annos, 0)
-
-    if LabelType.SEGMENTATION in present_annotations:
-        seg_annos = [anno[LabelType.SEGMENTATION] for anno in anno_dicts]
-        out_annotations[LabelType.SEGMENTATION] = torch.stack(seg_annos, 0)
-
-    if LabelType.BOUNDINGBOX in present_annotations:
-        bbox_annos = [anno[LabelType.BOUNDINGBOX] for anno in anno_dicts]
-        label_box: list[Tensor] = []
-        for i, box in enumerate(bbox_annos):
-            l_box = torch.zeros((box.shape[0], 6))
-            l_box[:, 0] = i  # add target image index for build_targets()
-            l_box[:, 1:] = box
-            label_box.append(l_box)
-        out_annotations[LabelType.BOUNDINGBOX] = torch.cat(label_box, 0)
-
-    if LabelType.KEYPOINT in present_annotations:
-        keypoint_annos = [anno[LabelType.KEYPOINT] for anno in anno_dicts]
-        label_keypoints: list[Tensor] = []
-        for i, points in enumerate(keypoint_annos):
-            l_kps = torch.zeros((points.shape[0], points.shape[1] + 1))
-            l_kps[:, 0] = i  # add target image index for build_targets()
-            l_kps[:, 1:] = points
-            label_keypoints.append(l_kps)
-        out_annotations[LabelType.KEYPOINT] = torch.cat(label_keypoints, 0)
-
-    return imgs, out_annotations
+    for task in list(group_dicts[0].keys()):
+        anno_dicts = [group[task] for group in group_dicts]
+
+        present_annotations = anno_dicts[0].keys()
+        out_annotations: dict[LabelType, Tensor] = {
+            anno: torch.empty(0) for anno in present_annotations
+        }
+
+        if LabelType.CLASSIFICATION in present_annotations:
+            class_annos = [anno[LabelType.CLASSIFICATION] for anno in anno_dicts]
+            out_annotations[LabelType.CLASSIFICATION] = torch.stack(class_annos, 0)
+
+        if LabelType.SEGMENTATION in present_annotations:
+            seg_annos = [anno[LabelType.SEGMENTATION] for anno in anno_dicts]
+            out_annotations[LabelType.SEGMENTATION] = torch.stack(seg_annos, 0)
+
+        if LabelType.BOUNDINGBOX in present_annotations:
+            bbox_annos = [anno[LabelType.BOUNDINGBOX] for anno in anno_dicts]
+            label_box: list[Tensor] = []
+            for i, box in enumerate(bbox_annos):
+                l_box = torch.zeros((box.shape[0], 6))
+                l_box[:, 0] = i  # add target image index for build_targets()
+                l_box[:, 1:] = box
+                label_box.append(l_box)
+            out_annotations[LabelType.BOUNDINGBOX] = torch.cat(label_box, 0)
+
+        if LabelType.KEYPOINT in present_annotations:
+            keypoint_annos = [anno[LabelType.KEYPOINT] for anno in anno_dicts]
+            label_keypoints: list[Tensor] = []
+            for i, points in enumerate(keypoint_annos):
+                l_kps = torch.zeros((points.shape[0], points.shape[1] + 1))
+                l_kps[:, 0] = i  # add target image index for build_targets()
+                l_kps[:, 1:] = points
+                label_keypoints.append(l_kps)
+            out_annotations[LabelType.KEYPOINT] = torch.cat(label_keypoints, 0)
+
+        out_group_dicts[task] = out_annotations
+
+    return imgs, out_group_dicts
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index a0e1f324..dfd4091a 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -29,11 +29,13 @@ def input_shape(self) -> Size:
         return Size([1, *img.shape])
 
     def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
-        img, annotations = self.base_loader[idx]
+        img, group_annotations = self.base_loader[idx]
 
         img = np.transpose(img, (2, 0, 1))  # HWC to CHW
         tensor_img = Tensor(img)
-        for key in annotations:
-            annotations[key] = Tensor(annotations[key])  # type: ignore
+        for task in group_annotations:
+            annotations = group_annotations[task]
+            for key in annotations:
+                annotations[key] = Tensor(annotations[key])  # type: ignore
 
-        return tensor_img, annotations
+        return tensor_img, group_annotations
diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py
index dbbf471e..3fb724c3 100644
--- a/luxonis_train/utils/types.py
+++ b/luxonis_train/utils/types.py
@@ -7,6 +7,7 @@
 Kwargs = dict[str, Any]
 OutputTypes = Literal["boxes", "class", "keypoints", "segmentation", "features"]
 Labels = dict[LabelType, Tensor]
+TaskLabels = dict[str, Labels]
 
 AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int]
 """AttachIndexType is used to specify to which output of the prevoius node does the
diff --git a/requirements.txt b/requirements.txt
index 03081b48..7f7e996a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 blobconverter>=1.4.2
 lightning>=2.0.0
-luxonis-ml[all]>=0.1.0
+#luxonis-ml[all]>=0.1.0
+luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
 onnx>=1.12.0
 onnxruntime>=1.13.1
 onnxsim>=0.4.10
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 35c893d4..815a4bd5 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -120,7 +120,7 @@ def COCO_people_subset_generator():
             }
         }
     )
-    dataset.add(COCO_people_subset_generator)  # type: ignore
+    dataset.add(COCO_people_subset_generator())
     dataset.make_splits()
 
 
@@ -161,5 +161,5 @@ def CIFAR10_subset_generator():
 
     dataset.set_classes(classes)
 
-    dataset.add(CIFAR10_subset_generator)  # type: ignore
+    dataset.add(CIFAR10_subset_generator())
     dataset.make_splits()
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
index a044be52..fe10a46e 100644
--- a/tests/unittests/test_core/test_archiver.py
+++ b/tests/unittests/test_core/test_archiver.py
@@ -226,7 +226,7 @@ def dataset_generator():
                     for label in labels
                 }
             )
-        dataset.add(dataset_generator)
+        dataset.add(dataset_generator())
         dataset.make_splits(ratios=split_ratios)
 
     def _make_dummy_cfg_dict(head_name: str, ldf_name: str, save_path: str) -> dict:
diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py
index e48f81ad..b5c8b299 100644
--- a/tests/unittests/test_utils/test_loaders/test_base_loader.py
+++ b/tests/unittests/test_utils/test_loaders/test_base_loader.py
@@ -12,11 +12,11 @@ def test_collate_fn():
     batch = [
         (
             torch.rand(3, 224, 224, dtype=torch.float32),
-            {LabelType.CLASSIFICATION: torch.tensor([1, 0])},
+            {"default": {LabelType.CLASSIFICATION: torch.tensor([1, 0])}},
         ),
         (
             torch.rand(3, 224, 224, dtype=torch.float32),
-            {LabelType.CLASSIFICATION: torch.tensor([0, 1])},
+            {"default": {LabelType.CLASSIFICATION: torch.tensor([0, 1])}},
         ),
     ]
 
@@ -28,6 +28,8 @@ def test_collate_fn():
     assert imgs.dtype == torch.float32
 
     # Check annotations
+    assert "default" in annotations
+    annotations = annotations["default"]
     assert LabelType.CLASSIFICATION in annotations
     assert annotations[LabelType.CLASSIFICATION].shape == (2, 2)
     assert annotations[LabelType.CLASSIFICATION].dtype == torch.int64

From d1d71f059d6ee3f7bdbad22a3978b05b6fa79518 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 8 May 2024 02:06:42 +0200
Subject: [PATCH 15/75] Tensor Core Float16 Precision (#24)

* option to set torch matmul precision for tensor cores

* updated readme
---
 configs/README.md             | 35 ++++++++++++++++++-----------------
 luxonis_train/core/trainer.py |  4 ++++
 luxonis_train/utils/config.py |  1 +
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index 27e2fb6e..c1f4889b 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -142,23 +142,24 @@ To store and load the data we use LuxonisDataset and LuxonisLoader. For specific
 
 Here you can change everything related to actual training of the model.
 
-| Key                     | Type                                    | Default value | Description                                                                                                                                      |
-| ----------------------- | --------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
-| batch_size              | int                                     | 32            | batch size used for training                                                                                                                     |
-| accumulate_grad_batches | int                                     | 1             | number of batches for gradient accumulation                                                                                                      |
-| use_weighted_sampler    | bool                                    | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
-| epochs                  | int                                     | 100           | number of training epochs                                                                                                                        |
-| num_workers             | int                                     | 2             | number of workers for data loading                                                                                                               |
-| train_metrics_interval  | int                                     | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
-| validation_interval     | int                                     | 1             | frequency of computing metrics on validation data                                                                                                |
-| num_log_images          | int                                     | 4             | maximum number of images to visualize and log                                                                                                    |
-| skip_last_batch         | bool                                    | True          | whether to skip last batch while training                                                                                                        |
-| accelerator             | Literal\["auto", "cpu", "gpu"\]         | "auto"        | What accelerator to use for training.                                                                                                            |
-| devices                 | int \| list\[int\] \| str               | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
-| strategy                | Literal\["auto", "ddp"\]                | "auto"        | What strategy to use for training.                                                                                                               |
-| num_sanity_val_steps    | int                                     | 2             | Number of sanity validation steps performed before training.                                                                                     |
-| profiler                | Literal\["simple", "advanced"\] \| None | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
-| verbose                 | bool                                    | True          | Print all intermediate results to console.                                                                                                       |
+| Key                     | Type                                           | Default value | Description                                                                                                                                      |
+| ----------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| batch_size              | int                                            | 32            | batch size used for training                                                                                                                     |
+| accumulate_grad_batches | int                                            | 1             | number of batches for gradient accumulation                                                                                                      |
+| use_weighted_sampler    | bool                                           | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
+| epochs                  | int                                            | 100           | number of training epochs                                                                                                                        |
+| num_workers             | int                                            | 2             | number of workers for data loading                                                                                                               |
+| train_metrics_interval  | int                                            | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
+| validation_interval     | int                                            | 1             | frequency of computing metrics on validation data                                                                                                |
+| num_log_images          | int                                            | 4             | maximum number of images to visualize and log                                                                                                    |
+| skip_last_batch         | bool                                           | True          | whether to skip last batch while training                                                                                                        |
+| accelerator             | Literal\["auto", "cpu", "gpu"\]                | "auto"        | What accelerator to use for training.                                                                                                            |
+| devices                 | int \| list\[int\] \| str                      | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
+| matmul_precision        | Literal\["medium", "high", "highest"\] \| None | None          | Sets the internal precision of float32 matrix multiplications.                                                                                   |
+| strategy                | Literal\["auto", "ddp"\]                       | "auto"        | What strategy to use for training.                                                                                                               |
+| num_sanity_val_steps    | int                                            | 2             | Number of sanity validation steps performed before training.                                                                                     |
+| profiler                | Literal\["simple", "advanced"\] \| None        | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
+| verbose                 | bool                                           | True          | Print all intermediate results to console.                                                                                                       |
 
 ### Preprocessing
 
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index 8326ce48..fc634544 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -4,6 +4,7 @@
 from logging import getLogger
 from typing import Any, Literal
 
+import torch
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from luxonis_ml.utils import LuxonisFileSystem
 
@@ -39,6 +40,9 @@ def __init__(
         """
         super().__init__(cfg, opts)
 
+        if self.cfg.trainer.matmul_precision is not None:
+            torch.set_float32_matmul_precision(self.cfg.trainer.matmul_precision)
+
         if resume is not None:
             self.resume = str(LuxonisFileSystem.download(resume, self.run_save_dir))
         else:
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 45dde192..e94c591e 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -203,6 +203,7 @@ class TrainerConfig(CustomBaseModel):
     strategy: Literal["auto", "ddp"] = "auto"
     num_sanity_val_steps: int = 2
     profiler: Literal["simple", "advanced"] | None = None
+    matmul_precision: Literal["medium", "high", "highest"] | None = None
     verbose: bool = True
 
     batch_size: int = 32

From 08300436944448f22644577c0a96ef77ba5a51fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 14 May 2024 18:55:31 +0200
Subject: [PATCH 16/75] Metrics - Fixed Missing Reset (#25)

* fixed reset not being called

* added metric resets

* removed inheritance

* proper oks reset

* removed unnecessary resets

* added annotations
---
 luxonis_train/attached_modules/metrics/common.py          | 8 ++++++--
 .../attached_modules/metrics/mean_average_precision.py    | 5 ++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py
index 27d1069a..6d16a4b4 100644
--- a/luxonis_train/attached_modules/metrics/common.py
+++ b/luxonis_train/attached_modules/metrics/common.py
@@ -1,6 +1,7 @@
 import logging
 
 import torchmetrics
+from torch import Tensor
 
 from .base_metric import BaseMetric
 
@@ -47,14 +48,17 @@ def __init__(self, **kwargs):
 
         self.metric = self.Metric(**kwargs)
 
-    def update(self, preds, target, *args, **kwargs):
+    def update(self, preds, target, *args, **kwargs) -> None:
         if self.task in ["multiclass"]:
             target = target.argmax(dim=1)
         self.metric.update(preds, target, *args, **kwargs)
 
-    def compute(self):
+    def compute(self) -> Tensor:
         return self.metric.compute()
 
+    def reset(self) -> None:
+        self.metric.reset()
+
 
 class Accuracy(TorchMetricWrapper):
     Metric = torchmetrics.Accuracy
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index 34adbcd9..0a58d061 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -12,7 +12,7 @@
 from .base_metric import BaseMetric
 
 
-class MeanAveragePrecision(BaseMetric, detection.MeanAveragePrecision):
+class MeanAveragePrecision(BaseMetric):
     """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object
     detection predictions.
 
@@ -62,6 +62,9 @@ def prepare(
 
         return output_list, label_list
 
+    def reset(self) -> None:
+        self.metric.reset()
+
     def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
         metric_dict = self.metric.compute()
 

From 5a31f72976875ca9471a97827ff70410ef10b4e7 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Wed, 15 May 2024 20:55:50 +0200
Subject: [PATCH 17/75] Deterministic Training Support (#23)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Added seed to config for reproducibility

* fixed seg drawing when using torch deterministic backend

* added deterministic order of creating nodes

* removed seed from example config

* added reproducability to inspect

* formatting

---------

Co-authored-by: DrejcPesjak <dp8949@student.uni-lj.si>
Co-authored-by: Martin Kozlovský <martin.kozlovsky@luxonis.com>
---
 configs/README.md                                         | 1 +
 luxonis_train/__main__.py                                 | 3 +++
 .../visualizers/segmentation_visualizer.py                | 6 ++----
 luxonis_train/core/core.py                                | 6 ++++++
 luxonis_train/core/tuner.py                               | 7 +++++++
 luxonis_train/utils/config.py                             | 1 +
 luxonis_train/utils/general.py                            | 8 +++++---
 7 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index c1f4889b..01d1ebd3 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -144,6 +144,7 @@ Here you can change everything related to actual training of the model.
 
 | Key                     | Type                                           | Default value | Description                                                                                                                                      |
 | ----------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| seed                    | int                                            | None          | seed for reproducibility                                                                                                                         |
 | batch_size              | int                                            | 32            | batch size used for training                                                                                                                     |
 | accumulate_grad_batches | int                                            | 1             | number of batches for gradient accumulation                                                                                                      |
 | use_weighted_sampler    | bool                                           | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 94276b60..e3b9c7d5 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -103,6 +103,7 @@ def inspect(
     opts: OptsType = None,
 ):
     """Inspect dataset."""
+    from lightning.pytorch import seed_everything
     from luxonis_ml.data import (
         LuxonisDataset,
         TrainAugmentations,
@@ -128,6 +129,8 @@ def inspect(
             overrides[opts[i]] = opts[i + 1]
 
     cfg = Config.get_config(str(config), overrides)
+    if cfg.trainer.seed is not None:
+        seed_everything(cfg.trainer.seed, workers=True)
 
     image_size = cfg.trainer.preprocessing.train_image_size
 
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index 6d8f3c79..2b2dc7a3 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -9,7 +9,6 @@
 from .utils import (
     Color,
     draw_segmentation_labels,
-    draw_segmentation_masks,
     get_color,
     seg_output_to_bool,
 )
@@ -63,10 +62,9 @@ def draw_predictions(
         for i in range(len(canvas)):
             prediction = predictions[i]
             mask = seg_output_to_bool(prediction)
-            mask = mask.to(canvas.device)
-            viz[i] = draw_segmentation_masks(
+            viz[i] = draw_segmentation_labels(
                 canvas[i].clone(), mask, colors=colors, **kwargs
-            )
+            ).to(canvas.device)
         return viz
 
     @staticmethod
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 761bc26f..555e464a 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -92,6 +92,11 @@ def __init__(
         # NOTE: overriding logger in pl so it uses our logger to log device info
         rank_zero_module.log = logger
 
+        deterministic = False
+        if self.cfg.trainer.seed is not None:
+            pl.seed_everything(self.cfg.trainer.seed, workers=True)
+            deterministic = True
+
         self.train_augmentations = TrainAugmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
@@ -122,6 +127,7 @@ def __init__(
             # NOTE: this is likely PL bug,
             # should be configurable inside configure_callbacks(),
             callbacks=LuxonisProgressBar() if self.cfg.use_rich_text else None,
+            deterministic=deterministic,
         )
         self.dataset = LuxonisDataset(
             dataset_name=self.cfg.dataset.name,
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
index c9f8e151..d8e5fa51 100644
--- a/luxonis_train/core/tuner.py
+++ b/luxonis_train/core/tuner.py
@@ -101,6 +101,12 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             [LuxonisProgressBar()] if self.cfg.use_rich_text else []
         )
         callbacks.append(pruner_callback)
+
+        deterministic = False
+        if self.cfg.trainer.seed:
+            pl.seed_everything(cfg.trainer.seed, workers=True)
+            deterministic = True
+
         pl_trainer = pl.Trainer(
             accelerator=cfg.trainer.accelerator,
             devices=cfg.trainer.devices,
@@ -112,6 +118,7 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             num_sanity_val_steps=cfg.trainer.num_sanity_val_steps,
             profiler=cfg.trainer.profiler,
             callbacks=callbacks,
+            deterministic=deterministic,
         )
 
         pl_trainer.fit(
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index e94c591e..685c296f 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -206,6 +206,7 @@ class TrainerConfig(CustomBaseModel):
     matmul_precision: Literal["medium", "high", "highest"] | None = None
     verbose: bool = True
 
+    seed: int | None = None
     batch_size: int = 32
     accumulate_grad_batches: int = 1
     use_weighted_sampler: bool = False
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index 9ea5884d..ebe75ebd 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -265,7 +265,7 @@ def validate_packet(data: Packet[Tensor], protocol: type[BaseModel]) -> Packet[T
 # TEST:
 def traverse_graph(
     graph: dict[str, list[str]], nodes: dict[str, T]
-) -> Generator[tuple[str, T, list[str], set[str]], None, None]:
+) -> Generator[tuple[str, T, list[str], list[str]], None, None]:
     """Traverses the graph in topological order.
 
     @type graph: dict[str, list[str]]
@@ -273,12 +273,14 @@ def traverse_graph(
         names, values are inputs to the node (list of node names).
     @type nodes: dict[str, T]
     @param nodes: Dictionary mapping node names to node objects.
-    @rtype: Generator[tuple[str, T, list[str], set[str]], None, None]
+    @rtype: Generator[tuple[str, T, list[str], list[str]], None, None]
     @return: Generator of tuples containing node name, node object, node dependencies
         and unprocessed nodes.
     @raises RuntimeError: If the graph is malformed.
     """
-    unprocessed_nodes = set(nodes.keys())
+    unprocessed_nodes = sorted(
+        set(nodes.keys())
+    )  # sort the set to allow reproducibility
     processed: set[str] = set()
 
     while unprocessed_nodes:

From 99b18575784ea9a86125884cfb4203d60cff9b86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 21 May 2024 05:30:34 +0200
Subject: [PATCH 18/75] Custom Loaders Support (#27)

* support for custom loaders and datasets

* updated configs

* custom loaders in inspect command

* updated inspect for multi-task labels

* removed custom loader from test config

* deleted comment

* deleted comment

* removed custom dataset

* removed comment

* skipping archiver test untill fixed in luxonis-ml

* [Automated] Updated coverage badge

---------

Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/classification_model.yaml             |   5 +-
 configs/coco_model.yaml                       |   6 +-
 configs/detection_model.yaml                  |   5 +-
 configs/example_export.yaml                   |   5 +-
 configs/example_tuning.yaml                   |   5 +-
 configs/keypoint_bbox_model.yaml              |   5 +-
 configs/resnet_model.yaml                     |   5 +-
 configs/segmentation_model.yaml               |   5 +-
 luxonis_train/__init__.py                     |   1 +
 luxonis_train/__main__.py                     | 113 ++++++++----------
 luxonis_train/callbacks/test_on_train_end.py  |  39 +-----
 luxonis_train/core/__init__.py                |   3 +-
 luxonis_train/core/archiver.py                |   2 +-
 luxonis_train/core/core.py                    |  84 ++++++-------
 luxonis_train/core/exporter.py                |   2 +-
 luxonis_train/core/inferer.py                 |   6 +-
 luxonis_train/core/trainer.py                 |  21 ++--
 luxonis_train/core/tuner.py                   |   7 +-
 luxonis_train/models/luxonis_model.py         |   4 +-
 luxonis_train/utils/config.py                 |  21 +---
 luxonis_train/utils/general.py                |  27 +++--
 luxonis_train/utils/loaders/__init__.py       |  13 +-
 luxonis_train/utils/loaders/base_loader.py    |  32 ++++-
 .../utils/loaders/luxonis_loader_torch.py     |  38 ++++--
 luxonis_train/utils/registry.py               |   3 +
 media/coverage_badge.svg                      |   4 +-
 tests/unittests/test_core/test_archiver.py    |   5 +-
 27 files changed, 239 insertions(+), 227 deletions(-)

diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml
index 62c1014e..5d2eb1f2 100755
--- a/configs/classification_model.yaml
+++ b/configs/classification_model.yaml
@@ -15,8 +15,9 @@ model:
         thickness: 2
         include_plot: True
 
-dataset:
-  name: cifar10_test
+loader:
+  params:
+    dataset_name: cifar10_test
 
 trainer:
   preprocessing:
diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
index 67f3b91d..c8ffff69 100755
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -95,12 +95,14 @@ tracker:
   wandb_entity: luxonis
   is_mlflow: False
 
-dataset:
-  name: coco_test
+loader:
   train_view: train
   val_view: val
   test_view: test
 
+  params:
+    dataset_name: coco_test
+
 trainer:
   accelerator: auto
   devices: auto
diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml
index 8d7f9c25..899e317d 100755
--- a/configs/detection_model.yaml
+++ b/configs/detection_model.yaml
@@ -10,8 +10,9 @@ model:
     params:
       use_neck: True
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/configs/example_export.yaml b/configs/example_export.yaml
index a999a2bd..7aadc30c 100755
--- a/configs/example_export.yaml
+++ b/configs/example_export.yaml
@@ -12,8 +12,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
index 980036ae..41c4d8a8 100755
--- a/configs/example_tuning.yaml
+++ b/configs/example_tuning.yaml
@@ -11,8 +11,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml
index dc4fe3d7..8cdd3149 100755
--- a/configs/keypoint_bbox_model.yaml
+++ b/configs/keypoint_bbox_model.yaml
@@ -8,8 +8,9 @@ model:
   predefined_model:
     name: KeypointDetectionModel
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml
index e768d259..e8353870 100644
--- a/configs/resnet_model.yaml
+++ b/configs/resnet_model.yaml
@@ -29,8 +29,9 @@ model:
         thickness: 2
         include_plot: True
 
-dataset:
-  name: cifar10_test
+loader:
+  params:
+    dataset_name: cifar10_test
 
 trainer:
   batch_size: 4
diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml
index c26fb0cc..b7becbfa 100755
--- a/configs/segmentation_model.yaml
+++ b/configs/segmentation_model.yaml
@@ -11,8 +11,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py
index 59ec7367..066e1110 100644
--- a/luxonis_train/__init__.py
+++ b/luxonis_train/__init__.py
@@ -1,4 +1,5 @@
 from .attached_modules import *
+from .core import *
 from .models import *
 from .utils import *
 
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index e3b9c7d5..f749439f 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -5,8 +5,10 @@
 from typing import Annotated, Optional
 
 import cv2
-import torch
 import typer
+from torch.utils.data import DataLoader
+
+from luxonis_train.utils.registry import LOADERS
 
 app = typer.Typer(help="Luxonis Train CLI", add_completion=False)
 
@@ -105,7 +107,6 @@ def inspect(
     """Inspect dataset."""
     from lightning.pytorch import seed_everything
     from luxonis_ml.data import (
-        LuxonisDataset,
         TrainAugmentations,
         ValAugmentations,
     )
@@ -117,7 +118,7 @@ def inspect(
         get_unnormalized_images,
     )
     from luxonis_train.utils.config import Config
-    from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
+    from luxonis_train.utils.loaders import collate_fn
     from luxonis_train.utils.types import LabelType
 
     overrides = {}
@@ -134,43 +135,21 @@ def inspect(
 
     image_size = cfg.trainer.preprocessing.train_image_size
 
-    dataset = LuxonisDataset(
-        dataset_name=cfg.dataset.name,
-        team_id=cfg.dataset.team_id,
-        dataset_id=cfg.dataset.id,
-        bucket_type=cfg.dataset.bucket_type,
-        bucket_storage=cfg.dataset.bucket_storage,
-    )
-    augmentations = (
-        TrainAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
-        if view == "train"
-        else ValAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
+    augmentations = (TrainAugmentations if view == "train" else ValAugmentations)(
+        image_size=image_size,
+        augmentations=[i.model_dump() for i in cfg.trainer.preprocessing.augmentations],
+        train_rgb=cfg.trainer.preprocessing.train_rgb,
+        keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
     )
 
-    loader_train = LuxonisLoaderTorch(
-        dataset,
-        view=view,
-        augmentations=augmentations,
+    loader = LOADERS.get(cfg.loader.name)(
+        view=view, augmentations=augmentations, **cfg.loader.params
     )
 
-    pytorch_loader_train = torch.utils.data.DataLoader(
-        loader_train,
-        batch_size=4,
-        num_workers=1,
+    pytorch_loader = DataLoader(
+        loader,
+        batch_size=1,
+        num_workers=0,
         collate_fn=collate_fn,
     )
 
@@ -178,35 +157,41 @@ def inspect(
         os.makedirs(save_dir, exist_ok=True)
 
     counter = 0
-    for data in pytorch_loader_train:
-        imgs, label_dict = data
-        images = get_unnormalized_images(cfg, imgs)
-        for i, img in enumerate(images):
-            for label_type, labels in label_dict.items():
-                if label_type == LabelType.CLASSIFICATION:
-                    continue
-                elif label_type == LabelType.BOUNDINGBOX:
-                    img = draw_bounding_box_labels(
-                        img, labels[labels[:, 0] == i][:, 2:], colors="yellow", width=1
-                    )
-                elif label_type == LabelType.KEYPOINT:
-                    img = draw_keypoint_labels(
-                        img, labels[labels[:, 0] == i][:, 1:], colors="red"
+    for data in pytorch_loader:
+        imgs, task_dict = data
+        for task, label_dict in task_dict.items():
+            images = get_unnormalized_images(cfg, imgs)
+            for i, img in enumerate(images):
+                for label_type, labels in label_dict.items():
+                    if label_type == LabelType.CLASSIFICATION:
+                        continue
+                    elif label_type == LabelType.BOUNDINGBOX:
+                        img = draw_bounding_box_labels(
+                            img,
+                            labels[labels[:, 0] == i][:, 2:],
+                            colors="yellow",
+                            width=1,
+                        )
+                    elif label_type == LabelType.KEYPOINT:
+                        img = draw_keypoint_labels(
+                            img, labels[labels[:, 0] == i][:, 1:], colors="red"
+                        )
+                    elif label_type == LabelType.SEGMENTATION:
+                        img = draw_segmentation_labels(
+                            img, labels[i], alpha=0.8, colors="#5050FF"
+                        )
+
+                img_arr = img.permute(1, 2, 0).numpy()
+                img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
+                if save_dir is not None:
+                    counter += 1
+                    cv2.imwrite(
+                        os.path.join(save_dir, f"{counter}_{task}.png"), img_arr
                     )
-                elif label_type == LabelType.SEGMENTATION:
-                    img = draw_segmentation_labels(
-                        img, labels[i], alpha=0.8, colors="#5050FF"
-                    )
-
-            img_arr = img.permute(1, 2, 0).numpy()
-            img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
-            if save_dir is not None:
-                counter += 1
-                cv2.imwrite(os.path.join(save_dir, f"{counter}.png"), img_arr)
-            else:
-                cv2.imshow("img", img_arr)
-                if cv2.waitKey() == ord("q"):
-                    exit()
+                else:
+                    cv2.imshow(task, img_arr)
+        if save_dir is None and cv2.waitKey() == ord("q"):
+            exit()
 
 
 @app.command()
diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py
index 8cf23e3c..3f8da1db 100644
--- a/luxonis_train/callbacks/test_on_train_end.py
+++ b/luxonis_train/callbacks/test_on_train_end.py
@@ -1,9 +1,6 @@
 import lightning.pytorch as pl
-from luxonis_ml.data import LuxonisDataset, ValAugmentations
-from torch.utils.data import DataLoader
 
-from luxonis_train.utils.config import Config
-from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
+import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
 
 
@@ -11,33 +8,7 @@
 class TestOnTrainEnd(pl.Callback):
     """Callback to perform a test run at the end of the training."""
 
-    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
-        cfg: Config = pl_module.cfg
-
-        dataset = LuxonisDataset(
-            dataset_name=cfg.dataset.name,
-            team_id=cfg.dataset.team_id,
-            dataset_id=cfg.dataset.id,
-            bucket_type=cfg.dataset.bucket_type,
-            bucket_storage=cfg.dataset.bucket_storage,
-        )
-
-        loader_test = LuxonisLoaderTorch(
-            dataset,
-            view=cfg.dataset.test_view,
-            augmentations=ValAugmentations(
-                image_size=cfg.trainer.preprocessing.train_image_size,
-                augmentations=[
-                    i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-                ],
-                train_rgb=cfg.trainer.preprocessing.train_rgb,
-                keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-            ),
-        )
-        pytorch_loader_test = DataLoader(
-            loader_test,
-            batch_size=cfg.trainer.batch_size,
-            num_workers=cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-        )
-        trainer.test(pl_module, pytorch_loader_test)
+    def on_train_end(
+        self, trainer: pl.Trainer, pl_module: "luxonis_train.models.LuxonisModel"
+    ) -> None:
+        trainer.test(pl_module, pl_module._core.pytorch_loaders["test"])
diff --git a/luxonis_train/core/__init__.py b/luxonis_train/core/__init__.py
index d3e89663..7e60f321 100644
--- a/luxonis_train/core/__init__.py
+++ b/luxonis_train/core/__init__.py
@@ -1,7 +1,8 @@
 from .archiver import Archiver
+from .core import Core
 from .exporter import Exporter
 from .inferer import Inferer
 from .trainer import Trainer
 from .tuner import Tuner
 
-__all__ = ["Exporter", "Trainer", "Tuner", "Inferer", "Archiver"]
+__all__ = ["Exporter", "Trainer", "Tuner", "Inferer", "Archiver", "Core"]
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index 58fc231f..a0706846 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -45,7 +45,7 @@ def __init__(
             cfg=self.cfg,
             dataset_metadata=self.dataset_metadata,
             save_dir=self.run_save_dir,
-            input_shape=self.loader_train.input_shape,
+            input_shape=self.loaders["train"].input_shape,
         )
 
         self.model_name = self.cfg.model.name
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 555e464a..60beb624 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -7,14 +7,16 @@
 import lightning_utilities.core.rank_zero as rank_zero_module
 import rich.traceback
 import torch
+import torch.utils.data as torch_data
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-from luxonis_ml.data import LuxonisDataset, TrainAugmentations, ValAugmentations
+from luxonis_ml.data import TrainAugmentations, ValAugmentations
 from luxonis_ml.utils import reset_logging, setup_logging
 
 from luxonis_train.callbacks import LuxonisProgressBar
 from luxonis_train.utils.config import Config
 from luxonis_train.utils.general import DatasetMetadata
-from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
+from luxonis_train.utils.loaders import collate_fn
+from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.tracker import LuxonisTrackerPL
 
 logger = getLogger(__name__)
@@ -129,42 +131,19 @@ def __init__(
             callbacks=LuxonisProgressBar() if self.cfg.use_rich_text else None,
             deterministic=deterministic,
         )
-        self.dataset = LuxonisDataset(
-            dataset_name=self.cfg.dataset.name,
-            team_id=self.cfg.dataset.team_id,
-            dataset_id=self.cfg.dataset.id,
-            bucket_type=self.cfg.dataset.bucket_type,
-            bucket_storage=self.cfg.dataset.bucket_storage,
-        )
-
-        self.loader_train = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.train_view,
-            augmentations=self.train_augmentations,
-        )
-        self.loader_val = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.val_view,
-            augmentations=self.val_augmentations,
-        )
-        self.loader_test = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.test_view,
-            augmentations=self.val_augmentations,
-        )
 
-        self.pytorch_loader_val = torch.utils.data.DataLoader(
-            self.loader_val,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-        )
-        self.pytorch_loader_test = torch.utils.data.DataLoader(
-            self.loader_test,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-        )
+        self.loaders = {
+            view: LOADERS.get(self.cfg.loader.name)(
+                augmentations=self.train_augmentations
+                if view == "train"
+                else self.val_augmentations,
+                view=self.cfg.loader.train_view
+                if view == "train"
+                else self.cfg.loader.val_view,
+                **self.cfg.loader.params,
+            )
+            for view in ["train", "val", "test"]
+        }
         sampler = None
         if self.cfg.trainer.use_weighted_sampler:
             classes_count = self.dataset.get_classes()[1]
@@ -175,21 +154,26 @@ def __init__(
             else:
                 weights = [1 / i for i in classes_count.values()]
                 num_samples = sum(classes_count.values())
-                sampler = torch.utils.data.WeightedRandomSampler(weights, num_samples)
-
-        self.pytorch_loader_train = torch.utils.data.DataLoader(
-            self.loader_train,
-            shuffle=True,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-            drop_last=self.cfg.trainer.skip_last_batch,
-            sampler=sampler,
-        )
+                sampler = torch_data.WeightedRandomSampler(weights, num_samples)
+
+        self.pytorch_loaders = {
+            view: torch_data.DataLoader(
+                self.loaders[view],
+                batch_size=self.cfg.trainer.batch_size,
+                num_workers=self.cfg.trainer.num_workers,
+                collate_fn=collate_fn,
+                shuffle=view == "train",
+                drop_last=self.cfg.trainer.skip_last_batch
+                if view == "train"
+                else False,
+                sampler=sampler if view == "train" else None,
+            )
+            for view in ["train", "val", "test"]
+        }
         self.error_message = None
 
-        self.dataset_metadata = DatasetMetadata.from_dataset(self.dataset)
-        self.dataset_metadata.set_loader(self.pytorch_loader_train)
+        self.dataset_metadata = DatasetMetadata.from_loader(self.loaders["train"])
+        self.dataset_metadata.set_loader(self.pytorch_loaders["train"])
 
         self.cfg.save_data(os.path.join(self.run_save_dir, "config.yaml"))
 
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 0efd6d56..5318931f 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -42,7 +42,7 @@ def __init__(
             )
         self.local_path = self.cfg.model.weights
         if input_shape is None:
-            self.input_shape = self.loader_val.input_shape
+            self.input_shape = self.loaders["val"].input_shape
         else:
             self.input_shape = Size(input_shape)
 
diff --git a/luxonis_train/core/inferer.py b/luxonis_train/core/inferer.py
index b4d13b77..710c4bb2 100644
--- a/luxonis_train/core/inferer.py
+++ b/luxonis_train/core/inferer.py
@@ -22,11 +22,11 @@ def __init__(
         opts += ["trainer.batch_size", "1"]
         super().__init__(cfg, opts)
         if view == "train":
-            self.loader = self.pytorch_loader_train
+            self.loader = self.pytorch_loaders["train"]
         elif view == "test":
-            self.loader = self.pytorch_loader_test
+            self.loader = self.pytorch_loaders["test"]
         else:
-            self.loader = self.pytorch_loader_val
+            self.loader = self.pytorch_loaders["val"]
         self.save_dir = save_dir
         if self.save_dir is not None:
             self.save_dir.mkdir(exist_ok=True, parents=True)
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index fc634544..ef20dc9e 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -52,11 +52,12 @@ def __init__(
             cfg=self.cfg,
             dataset_metadata=self.dataset_metadata,
             save_dir=self.run_save_dir,
-            input_shape=self.loader_train.input_shape,
+            input_shape=self.loaders["train"].input_shape,
         )
+        self.lightning_module._core = self
 
-        def graceful_exit(signum, frame):
-            logger.info("SIGTERM received, stopping training...")
+        def graceful_exit(signum: int, _):
+            logger.info(f"{signal.Signals(signum).name} received, stopping training...")
             ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
             self.pl_trainer.save_checkpoint(ckpt_path)
             self._upload_logs()
@@ -111,8 +112,8 @@ def train(self, new_thread: bool = False) -> None:
             logger.info("Starting training...")
             self._trainer_fit(
                 self.lightning_module,
-                self.pytorch_loader_train,
-                self.pytorch_loader_val,
+                self.pytorch_loaders["train"],
+                self.pytorch_loaders["val"],
             )
             logger.info("Training finished")
             logger.info(f"Checkpoints saved in: {self.get_save_dir()}")
@@ -128,8 +129,8 @@ def thread_exception_hook(args):
                 target=self._trainer_fit,
                 args=(
                     self.lightning_module,
-                    self.pytorch_loader_train,
-                    self.pytorch_loader_val,
+                    self.pytorch_loaders["train"],
+                    self.pytorch_loaders["val"],
                 ),
                 daemon=True,
             )
@@ -145,11 +146,11 @@ def test(
         """
 
         if view == "test":
-            loader = self.pytorch_loader_test
+            loader = self.pytorch_loaders["test"]
         elif view == "val":
-            loader = self.pytorch_loader_val
+            loader = self.pytorch_loaders["val"]
         elif view == "train":
-            loader = self.pytorch_loader_train
+            loader = self.pytorch_loaders["train"]
 
         if not new_thread:
             self.pl_trainer.test(self.lightning_module, loader)
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
index d8e5fa51..4635789c 100644
--- a/luxonis_train/core/tuner.py
+++ b/luxonis_train/core/tuner.py
@@ -92,8 +92,9 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             cfg=cfg,
             dataset_metadata=self.dataset_metadata,
             save_dir=run_save_dir,
-            input_shape=self.loader_train.input_shape,
+            input_shape=self.loaders["train"].input_shape,
         )
+        lightning_module._core = self
         pruner_callback = PyTorchLightningPruningCallback(
             trial, monitor="val_loss/loss"
         )
@@ -123,8 +124,8 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
 
         pl_trainer.fit(
             lightning_module,  # type: ignore
-            self.pytorch_loader_train,
-            self.pytorch_loader_val,
+            self.pytorch_loaders["train"],
+            self.pytorch_loaders["val"],
         )
         pruner_callback.check_pruned()
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 58aeccd1..e1dec644 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -12,6 +12,7 @@
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from torch import Size, Tensor, nn
 
+import luxonis_train
 from luxonis_train.attached_modules import (
     BaseAttachedModule,
     BaseLoss,
@@ -90,6 +91,7 @@ class LuxonisModel(pl.LightningModule):
     """
 
     _trainer: pl.Trainer
+    _core: "luxonis_train.core.Core"
     logger: LuxonisTrackerPL
 
     def __init__(
@@ -496,7 +498,7 @@ def process_losses(
         training_step_output["loss"] = final_loss.detach().cpu()
         return final_loss, training_step_output
 
-    def training_step(self, train_batch: tuple[Tensor, Labels]) -> Tensor:
+    def training_step(self, train_batch: tuple[Tensor, TaskLabels]) -> Tensor:
         """Performs one step of training with provided batch."""
         outputs = self.forward(*train_batch)
         assert outputs.losses, "Losses are empty, check if you have defined any loss"
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 685c296f..40638103 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -1,11 +1,9 @@
 import logging
 import sys
-from enum import Enum
 from typing import Annotated, Any, Literal
 
-from luxonis_ml.data import BucketStorage, BucketType
 from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
-from pydantic import BaseModel, ConfigDict, Field, field_serializer, model_validator
+from pydantic import BaseModel, ConfigDict, Field, model_validator
 
 from luxonis_train.utils.general import is_acyclic
 from luxonis_train.utils.registry import MODELS
@@ -131,21 +129,12 @@ class TrackerConfig(CustomBaseModel):
     is_mlflow: bool = False
 
 
-class DatasetConfig(CustomBaseModel):
-    name: str | None = None
-    id: str | None = None
-    team_name: str | None = None
-    team_id: str | None = None
-    bucket_type: BucketType = BucketType.INTERNAL
-    bucket_storage: BucketStorage = BucketStorage.LOCAL
-    json_mode: bool = False
+class LoaderConfig(CustomBaseModel):
+    name: str = "LuxonisLoaderTorch"
     train_view: str = "train"
     val_view: str = "val"
     test_view: str = "test"
-
-    @field_serializer("bucket_storage", "bucket_type")
-    def get_enum_value(self, v: Enum, _) -> str:
-        return str(v.value)
+    params: dict[str, Any] = {}
 
 
 class NormalizeAugmentationConfig(CustomBaseModel):
@@ -297,7 +286,7 @@ class TunerConfig(CustomBaseModel):
 class Config(LuxonisConfig):
     use_rich_text: bool = True
     model: ModelConfig
-    dataset: DatasetConfig = DatasetConfig()
+    loader: LoaderConfig = LoaderConfig()
     tracker: TrackerConfig = TrackerConfig()
     trainer: TrainerConfig = TrainerConfig()
     exporter: ExportConfig = ExportConfig()
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index ebe75ebd..bf3d0e8f 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -2,12 +2,12 @@
 import math
 from typing import Generator, TypeVar
 
-from luxonis_ml.data import LuxonisDataset
 from pydantic import BaseModel
 from torch import Size, Tensor
 from torch.utils.data import DataLoader
 
 from luxonis_train.utils.boxutils import anchors_from_dataset
+from luxonis_train.utils.loaders import BaseLoaderTorch
 from luxonis_train.utils.types import LabelType, Packet
 
 
@@ -154,7 +154,7 @@ def set_loader(self, loader: DataLoader) -> None:
         self.loader = loader
 
     @classmethod
-    def from_dataset(cls, dataset: LuxonisDataset) -> "DatasetMetadata":
+    def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
         """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}.
 
         @type dataset: LuxonisDataset
@@ -162,22 +162,23 @@ def from_dataset(cls, dataset: LuxonisDataset) -> "DatasetMetadata":
         @rtype: DatasetMetadata
         @return: Instance of L{DatasetMetadata} created from the provided dataset.
         """
-        _, classes = dataset.get_classes()
-        skeletons = dataset.get_skeletons()
+        classes = loader.get_classes()
+        skeletons = loader.get_skeletons()
 
         keypoint_names = None
         connectivity = None
 
-        if len(skeletons) == 1:
-            name = list(skeletons.keys())[0]
-            keypoint_names = skeletons[name]["labels"]
-            connectivity = skeletons[name]["edges"]
+        if skeletons is not None:
+            if len(skeletons) == 1:
+                name = list(skeletons.keys())[0]
+                keypoint_names = skeletons[name]["labels"]
+                connectivity = skeletons[name]["edges"]
 
-        elif len(skeletons) > 1:
-            raise NotImplementedError(
-                "The dataset defines multiclass keypoint detection. "
-                "This is not yet supported."
-            )
+            elif len(skeletons) > 1:
+                raise NotImplementedError(
+                    "The dataset defines multiclass keypoint detection. "
+                    "This is not yet supported."
+                )
 
         return cls(
             classes=classes,
diff --git a/luxonis_train/utils/loaders/__init__.py b/luxonis_train/utils/loaders/__init__.py
index fe5cc4e8..d25e3856 100644
--- a/luxonis_train/utils/loaders/__init__.py
+++ b/luxonis_train/utils/loaders/__init__.py
@@ -1,4 +1,13 @@
-from .base_loader import collate_fn
+from .base_loader import (
+    BaseLoaderTorch,
+    LuxonisLoaderTorchOutput,
+    collate_fn,
+)
 from .luxonis_loader_torch import LuxonisLoaderTorch
 
-__all__ = ["LuxonisLoaderTorch", "collate_fn"]
+__all__ = [
+    "LuxonisLoaderTorch",
+    "collate_fn",
+    "BaseLoaderTorch",
+    "LuxonisLoaderTorchOutput",
+]
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index be12b439..f96f65e1 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -1,6 +1,7 @@
-from abc import ABC, abstractmethod, abstractproperty
+from abc import ABC, abstractmethod
 
 import torch
+from luxonis_ml.data import Augmentations
 from luxonis_ml.utils.registry import AutoRegisterMeta
 from torch import Size, Tensor
 from torch.utils.data import Dataset
@@ -22,7 +23,16 @@ class BaseLoaderTorch(
     """Base abstract loader class that enforces LuxonisLoaderTorchOutput output label
     structure."""
 
-    @abstractproperty
+    def __init__(
+        self,
+        view: str,
+        augmentations: Augmentations | None = None,
+    ):
+        self.view = view
+        self.augmentations = augmentations
+
+    @property
+    @abstractmethod
     def input_shape(self) -> Size:
         """Input shape in [N,C,H,W] format."""
         ...
@@ -43,6 +53,24 @@ def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
         """
         ...
 
+    @abstractmethod
+    def get_classes(self) -> dict[LabelType, list[str]]:
+        """Gets classes according to computer vision task.
+
+        @rtype: dict[LabelType, list[str]]
+        @return: A dictionary mapping tasks to their classes.
+        """
+        pass
+
+    def get_skeletons(self) -> dict[str, dict] | None:
+        """Returns the dictionary defining the semantic skeleton for each class using
+        keypoints.
+
+        @rtype: Dict[str, Dict]
+        @return: A dictionary mapping classes to their skeleton definitions.
+        """
+        return None
+
 
 def collate_fn(
     batch: list[LuxonisLoaderTorchOutput],
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index dfd4091a..6a375436 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -1,5 +1,11 @@
 import numpy as np
-from luxonis_ml.data import Augmentations, LuxonisDataset, LuxonisLoader
+from luxonis_ml.data import (
+    BucketStorage,
+    BucketType,
+    LabelType,
+    LuxonisDataset,
+    LuxonisLoader,
+)
 from torch import Size, Tensor
 
 from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput
@@ -8,16 +14,27 @@
 class LuxonisLoaderTorch(BaseLoaderTorch):
     def __init__(
         self,
-        dataset: LuxonisDataset,
-        view: str = "train",
+        dataset_name: str | None = None,
+        team_id: str | None = None,
+        dataset_id: str | None = None,
+        bucket_type: BucketType = BucketType.INTERNAL,
+        bucket_storage: BucketStorage = BucketStorage.LOCAL,
         stream: bool = False,
-        augmentations: Augmentations | None = None,
+        **kwargs,
     ):
+        super().__init__(**kwargs)
+        self.dataset = LuxonisDataset(
+            dataset_name=dataset_name,
+            team_id=team_id,
+            dataset_id=dataset_id,
+            bucket_type=bucket_type,
+            bucket_storage=bucket_storage,
+        )
         self.base_loader = LuxonisLoader(
-            dataset=dataset,
-            view=view,
+            dataset=self.dataset,
+            view=self.view,
             stream=stream,
-            augmentations=augmentations,
+            augmentations=self.augmentations,
         )
 
     def __len__(self) -> int:
@@ -39,3 +56,10 @@ def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
                 annotations[key] = Tensor(annotations[key])  # type: ignore
 
         return tensor_img, group_annotations
+
+    def get_classes(self) -> dict[LabelType, list[str]]:
+        _, classes = self.dataset.get_classes()
+        return {LabelType(task): classes[task] for task in classes}
+
+    def get_skeletons(self) -> dict[str, dict] | None:
+        return self.dataset.get_skeletons()
diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py
index 7f76df7c..6da8893a 100644
--- a/luxonis_train/utils/registry.py
+++ b/luxonis_train/utils/registry.py
@@ -3,6 +3,9 @@
 
 from luxonis_ml.utils.registry import Registry
 
+LOADERS = Registry(name="loaders")
+"""Registry for all loaders."""
+
 CALLBACKS = Registry(name="callbacks")
 """Registry for all callbacks."""
 
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 7a18c7f4..b750dd9c 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
-        <text x="80" y="14">80%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
+        <text x="80" y="14">77%</text>
     </g>
 </svg>
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
index fe10a46e..52449e6a 100644
--- a/tests/unittests/test_core/test_archiver.py
+++ b/tests/unittests/test_core/test_archiver.py
@@ -4,11 +4,13 @@
 import random
 import shutil
 import tarfile
+import unittest
 
 import cv2
 import lightning.pytorch as pl
 import numpy as np
 import onnx
+import pytest
 from luxonis_ml.data import LuxonisDataset
 from luxonis_ml.nn_archive.config_building_blocks.base_models import head_outputs
 from parameterized import parameterized
@@ -23,7 +25,8 @@
 HEAD_NAMES = [head_name for head_name in ImplementedHeads.__members__]
 
 
-class TestArchiver:
+@pytest.mark.skip()
+class TestArchiver(unittest.TestCase):
     @classmethod
     def setup_class(cls):
         """Creates all files required for testing."""

From b6b46889002da00fd7f832c4a1c1b3d957175a1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 21 May 2024 15:46:31 +0200
Subject: [PATCH 19/75] enums handling (#31)

---
 luxonis_train/utils/loaders/luxonis_loader_torch.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index 6a375436..b2eeb168 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -1,3 +1,5 @@
+from typing import Literal
+
 import numpy as np
 from luxonis_ml.data import (
     BucketStorage,
@@ -17,8 +19,8 @@ def __init__(
         dataset_name: str | None = None,
         team_id: str | None = None,
         dataset_id: str | None = None,
-        bucket_type: BucketType = BucketType.INTERNAL,
-        bucket_storage: BucketStorage = BucketStorage.LOCAL,
+        bucket_type: Literal["internal", "external"] = "internal",
+        bucket_storage: Literal["local", "s3", "gcs", "azure"] = "local",
         stream: bool = False,
         **kwargs,
     ):
@@ -27,8 +29,8 @@ def __init__(
             dataset_name=dataset_name,
             team_id=team_id,
             dataset_id=dataset_id,
-            bucket_type=bucket_type,
-            bucket_storage=bucket_storage,
+            bucket_type=BucketType(bucket_type),
+            bucket_storage=BucketStorage(bucket_storage),
         )
         self.base_loader = LuxonisLoader(
             dataset=self.dataset,

From 72afb721ac093b269947dd5168a92016820beeca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Fri, 24 May 2024 18:28:46 +0200
Subject: [PATCH 20/75] GPUStatsMonitor (#29)

Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/coco_model.yaml                      |   1 -
 luxonis_train/__main__.py                    |   6 +-
 luxonis_train/callbacks/README.md            |   9 +-
 luxonis_train/callbacks/__init__.py          |   2 +
 luxonis_train/callbacks/gpu_stats_monitor.py | 293 +++++++++++++++++++
 luxonis_train/core/core.py                   |   2 +-
 luxonis_train/models/luxonis_model.py        |  21 +-
 media/coverage_badge.svg                     |   4 +-
 requirements.txt                             |   1 +
 9 files changed, 326 insertions(+), 13 deletions(-)
 create mode 100644 luxonis_train/callbacks/gpu_stats_monitor.py

diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
index c8ffff69..cad138a5 100755
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -155,7 +155,6 @@ trainer:
         monitor: val/loss
         mode: min
         verbose: true
-    - name: DeviceStatsMonitor
     - name: ExportOnTrainEnd
     - name: TestOnTrainEnd
 
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index f749439f..7b8e0251 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -10,7 +10,11 @@
 
 from luxonis_train.utils.registry import LOADERS
 
-app = typer.Typer(help="Luxonis Train CLI", add_completion=False)
+app = typer.Typer(
+    help="Luxonis Train CLI",
+    add_completion=False,
+    pretty_exceptions_show_locals=False,
+)
 
 
 class View(str, Enum):
diff --git a/luxonis_train/callbacks/README.md b/luxonis_train/callbacks/README.md
index be441017..6c4d635b 100644
--- a/luxonis_train/callbacks/README.md
+++ b/luxonis_train/callbacks/README.md
@@ -15,11 +15,12 @@ List of all supported callbacks.
 
 List of supported callbacks from `lightning.pytorch`.
 
+- [GPUStatsMonitor](https://pytorch-lightning.readthedocs.io/en/1.5.10/api/pytorch_lightning.callbacks.gpu_stats_monitor.html)
 - [DeviceStatsMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.DeviceStatsMonitor.html#lightning.pytorch.callbacks.DeviceStatsMonitor)
-- [ EarlyStopping ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html#lightning.pytorch.callbacks.EarlyStopping)
-- [ LearningRateMonitor ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor)
-- [ ModelCheckpoint ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint)
-- [ RichModelSummary ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary)
+- [EarlyStopping](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html#lightning.pytorch.callbacks.EarlyStopping)
+- [LearningRateMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor)
+- [ModelCheckpoint](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint)
+- [RichModelSummary](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary)
   - Added automatically if `use_rich_text` is set to `True` in [config](../../configs/README.md#topleveloptions).
 
 ## ExportOnTrainEnd
diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index ae1fe86e..84d2d1cf 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -10,6 +10,7 @@
 
 from .archive_on_train_end import ArchiveOnTrainEnd
 from .export_on_train_end import ExportOnTrainEnd
+from .gpu_stats_monitor import GPUStatsMonitor
 from .luxonis_progress_bar import LuxonisProgressBar
 from .metadata_logger import MetadataLogger
 from .module_freezer import ModuleFreezer
@@ -31,4 +32,5 @@
     "ModuleFreezer",
     "TestOnTrainEnd",
     "UploadCheckpoint",
+    "GPUStatsMonitor",
 ]
diff --git a/luxonis_train/callbacks/gpu_stats_monitor.py b/luxonis_train/callbacks/gpu_stats_monitor.py
new file mode 100644
index 00000000..9479d4d2
--- /dev/null
+++ b/luxonis_train/callbacks/gpu_stats_monitor.py
@@ -0,0 +1,293 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+GPU Stats Monitor
+=================
+
+Monitor and logs GPU stats during training.
+
+"""
+
+import os
+import shutil
+import subprocess
+import time
+from typing import Any, Dict, List, Optional, Tuple
+
+import pytorch_lightning as pl
+import torch
+from lightning.pytorch.accelerators import CUDAAccelerator  # type: ignore
+from pytorch_lightning.utilities import rank_zero_only
+from pytorch_lightning.utilities.exceptions import (
+    MisconfigurationException,  # type: ignore
+)
+from pytorch_lightning.utilities.parsing import AttributeDict
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+
+from luxonis_train.utils.registry import CALLBACKS
+
+
+@CALLBACKS.register_module()
+class GPUStatsMonitor(pl.Callback):
+    """Automatically monitors and logs GPU stats during training stage.
+    C{GPUStatsMonitor} is a callback and in order to use it you need to assign a logger
+    in the C{Trainer}.
+
+    Args:
+        memory_utilization: Set to C{True} to monitor used, free and percentage of memory
+            utilization at the start and end of each step. Default: C{True}.
+        gpu_utilization: Set to C{True} to monitor percentage of GPU utilization
+            at the start and end of each step. Default: C{True}.
+        intra_step_time: Set to C{True} to monitor the time of each step. Default: {False}.
+        inter_step_time: Set to C{True} to monitor the time between the end of one step
+            and the start of the next step. Default: C{False}.
+        fan_speed: Set to C{True} to monitor percentage of fan speed. Default: C{False}.
+        temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius.
+            Default: C{False}.
+
+    Raises:
+        MisconfigurationException:
+            If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger.
+
+    Example::
+
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import GPUStatsMonitor
+        >>> gpu_stats = GPUStatsMonitor() # doctest: +SKIP
+        >>> trainer = Trainer(callbacks=[gpu_stats]) # doctest: +SKIP
+
+    GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows:
+
+    - **fan.speed** – The fan speed value is the percent of maximum speed that the device's fan is currently
+      intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed.
+      If the fan is physically blocked and unable to spin, this output will not match the actual fan speed.
+      Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.
+    - **memory.used** – Total memory allocated by active contexts.
+    - **memory.free** – Total free memory.
+    - **utilization.gpu** – Percent of time over the past sample period during which one or more kernels was
+      executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product.
+    - **utilization.memory** – Percent of time over the past sample period during which global (device) memory was
+      being read or written. The sample period may be between 1 second and 1/6 second depending on the product.
+    - **temperature.gpu** – Core GPU temperature, in degrees C.
+    - **temperature.memory** – HBM memory temperature, in degrees C.
+    """
+
+    def __init__(
+        self,
+        memory_utilization: bool = True,
+        gpu_utilization: bool = True,
+        intra_step_time: bool = False,
+        inter_step_time: bool = False,
+        fan_speed: bool = False,
+        temperature: bool = False,
+    ):
+        super().__init__()
+
+        if shutil.which("nvidia-smi") is None:
+            raise MisconfigurationException(
+                "Cannot use GPUStatsMonitor callback because NVIDIA driver is not installed."
+            )
+
+        self._log_stats = AttributeDict(
+            {
+                "memory_utilization": memory_utilization,
+                "gpu_utilization": gpu_utilization,
+                "intra_step_time": intra_step_time,
+                "inter_step_time": inter_step_time,
+                "fan_speed": fan_speed,
+                "temperature": temperature,
+            }
+        )
+
+        # The logical device IDs for selected devices
+        self._device_ids: List[int] = []  # will be assigned later in setup()
+
+        # The unmasked real GPU IDs
+        self._gpu_ids: List[str] = []  # will be assigned later in setup()
+
+    @staticmethod
+    def is_available() -> bool:
+        if shutil.which("nvidia-smi") is None:
+            return False
+        return CUDAAccelerator.is_available()
+
+    def setup(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        stage: Optional[str] = None,
+    ) -> None:
+        if not trainer.logger:
+            raise MisconfigurationException(
+                "Cannot use GPUStatsMonitor callback with Trainer that has no logger."
+            )
+
+        if not CUDAAccelerator.is_available():
+            raise MisconfigurationException(
+                "You are using GPUStatsMonitor teh CUDA Accelerator is not available."
+            )
+
+        # The logical device IDs for selected devices
+        # ignoring mypy check because `trainer.data_parallel_device_ids` is None when using CPU
+        self._device_ids = sorted(set(trainer.device_ids))
+
+        # The unmasked real GPU IDs
+        self._gpu_ids = self._get_gpu_ids(self._device_ids)
+
+    def on_train_epoch_start(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"
+    ) -> None:
+        self._snap_intra_step_time: Optional[float] = None
+        self._snap_inter_step_time: Optional[float] = None
+
+    @rank_zero_only
+    def on_train_batch_start(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        batch: Any,
+        batch_idx: int,
+    ) -> None:
+        if self._log_stats.intra_step_time:
+            self._snap_intra_step_time = time.time()
+
+        if not trainer._logger_connector.should_update_logs:
+            return
+
+        gpu_stat_keys = self._get_gpu_stat_keys()
+        gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
+        logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys)
+
+        if self._log_stats.inter_step_time and self._snap_inter_step_time:
+            # First log at beginning of second step
+            logs["batch_time/inter_step (ms)"] = (
+                time.time() - self._snap_inter_step_time
+            ) * 1000
+
+        assert trainer.logger is not None
+        trainer.logger.log_metrics(logs, step=trainer.global_step)
+
+    @rank_zero_only
+    def on_train_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: STEP_OUTPUT,
+        batch: Any,
+        batch_idx: int,
+    ) -> None:
+        if self._log_stats.inter_step_time:
+            self._snap_inter_step_time = time.time()
+
+        if not trainer._logger_connector.should_update_logs:
+            return
+
+        gpu_stat_keys = self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys()
+        gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
+        logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys)
+
+        if self._log_stats.intra_step_time and self._snap_intra_step_time:
+            logs["batch_time/intra_step (ms)"] = (
+                time.time() - self._snap_intra_step_time
+            ) * 1000
+
+        assert trainer.logger is not None
+        trainer.logger.log_metrics(logs, step=trainer.global_step)
+
+    @staticmethod
+    def _get_gpu_ids(device_ids: List[int]) -> List[str]:
+        """Get the unmasked real GPU IDs."""
+        # All devices if `CUDA_VISIBLE_DEVICES` unset
+        default = ",".join(str(i) for i in range(torch.cuda.device_count()))
+        cuda_visible_devices: List[str] = os.getenv(
+            "CUDA_VISIBLE_DEVICES", default=default
+        ).split(",")
+        return [cuda_visible_devices[device_id].strip() for device_id in device_ids]
+
+    def _get_gpu_stats(self, queries: List[str]) -> List[List[float]]:
+        if not queries:
+            return []
+
+        """Run nvidia-smi to get the gpu stats"""
+        gpu_query = ",".join(queries)
+        format = "csv,nounits,noheader"
+        gpu_ids = ",".join(self._gpu_ids)
+        result = subprocess.run(
+            [
+                # it's ok to supress the warning here since we ensure nvidia-smi exists during init
+                shutil.which("nvidia-smi"),  # type: ignore
+                f"--query-gpu={gpu_query}",
+                f"--format={format}",
+                f"--id={gpu_ids}",
+            ],
+            encoding="utf-8",
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,  # for backward compatibility with python version 3.6
+            check=True,
+        )
+
+        def _to_float(x: str) -> float:
+            try:
+                return float(x)
+            except ValueError:
+                return 0.0
+
+        stats = [
+            [_to_float(x) for x in s.split(", ")]
+            for s in result.stdout.strip().split(os.linesep)
+        ]
+        return stats
+
+    @staticmethod
+    def _parse_gpu_stats(
+        device_ids: List[int], stats: List[List[float]], keys: List[Tuple[str, str]]
+    ) -> Dict[str, float]:
+        """Parse the gpu stats into a loggable dict."""
+        logs = {}
+        for i, device_id in enumerate(device_ids):
+            for j, (x, unit) in enumerate(keys):
+                if unit == "%":
+                    unit = "percent"
+                logs[f"GPU_{device_id}/{x} - {unit}"] = stats[i][j]
+        return logs
+
+    def _get_gpu_stat_keys(self) -> List[Tuple[str, str]]:
+        """Get the GPU stats keys."""
+        stat_keys = []
+
+        if self._log_stats.gpu_utilization:
+            stat_keys.append(("utilization.gpu", "%"))
+
+        if self._log_stats.memory_utilization:
+            stat_keys.extend(
+                [
+                    ("memory.used", "MB"),
+                    ("memory.free", "MB"),
+                    ("utilization.memory", "%"),
+                ]
+            )
+
+        return stat_keys
+
+    def _get_gpu_device_stat_keys(self) -> List[Tuple[str, str]]:
+        """Get the device stats keys."""
+        stat_keys = []
+
+        if self._log_stats.fan_speed:
+            stat_keys.append(("fan.speed", "%"))
+
+        if self._log_stats.temperature:
+            stat_keys.extend([("temperature.gpu", "°C"), ("temperature.memory", "°C")])
+
+        return stat_keys
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 60beb624..6b02242f 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -68,7 +68,7 @@ def __init__(
         opts = opts or []
 
         if self.cfg.use_rich_text:
-            rich.traceback.install(suppress=[pl, torch])
+            rich.traceback.install(suppress=[pl, torch], show_locals=False)
 
         self.rank = rank_zero_only.rank
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index e1dec644..d3ed26a2 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -24,6 +24,8 @@
     get_unnormalized_images,
 )
 from luxonis_train.callbacks import (
+    DeviceStatsMonitor,
+    GPUStatsMonitor,
     LuxonisProgressBar,
     ModuleFreezer,
 )
@@ -620,9 +622,9 @@ def configure_callbacks(self) -> list[pl.Callback]:
         self.best_val_metric_checkpoints_path = f"{self.save_dir}/best_val_metric"
         model_name = self.cfg.model.name
 
-        callbacks: list[pl.Callback] = []
+        user_callbacks = [c.name for c in self.cfg.trainer.callbacks]
 
-        callbacks.append(
+        callbacks: list[pl.Callback] = [
             ModelCheckpoint(
                 monitor="val/loss",
                 dirpath=self.min_val_loss_checkpoints_path,
@@ -630,8 +632,19 @@ def configure_callbacks(self) -> list[pl.Callback]:
                 auto_insert_metric_name=False,
                 save_top_k=self.cfg.trainer.save_top_k,
                 mode="min",
-            )
-        )
+            ),
+        ]
+        if "DeviceStatsMonitor" not in user_callbacks:
+            callbacks.append(DeviceStatsMonitor(cpu_stats=True))
+
+        if "GPUStatsMonitor" not in user_callbacks:
+            if GPUStatsMonitor.is_available():
+                callbacks.append(GPUStatsMonitor())
+            else:
+                logger.warning(
+                    "GPUStatsMonitor is not available for this machine."
+                    "Verify that `nvidia-smi` is installed."
+                )
 
         if self.main_metric is not None:
             main_metric = self.main_metric.replace("/", "_")
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index b750dd9c..90299371 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
-        <text x="80" y="14">77%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">76%</text>
+        <text x="80" y="14">76%</text>
     </g>
 </svg>
diff --git a/requirements.txt b/requirements.txt
index 7f7e996a..6dc87275 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,3 +16,4 @@ tensorboard>=2.10.1
 torchvision>=0.16.0
 typer>=0.9.0
 mlflow>=2.10.0
+psutil>=5.0.0

From 5893c3ef48c908d8e0d1446cdb7fd219559d56c6 Mon Sep 17 00:00:00 2001
From: Jernej Sabadin <116955183+JSabadin@users.noreply.github.com>
Date: Fri, 24 May 2024 18:29:24 +0200
Subject: [PATCH 21/75] More Efficient Keypoint Export (#28)

---
 luxonis_train/nodes/implicit_keypoint_bbox_head.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index 7f0c3d61..76a66eb6 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -197,10 +197,9 @@ def _build_predictions(
         kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(x_keypoints)
         kpt_x = (kpt_x + grid_x) * stride
         kpt_y = (kpt_y + grid_y) * stride
-        out_kpt = torch.stack([kpt_x, kpt_y, kpt_vis.sigmoid()], dim=-1).reshape(
-            *kpt_x.shape[:-1], -1
-        )
-
+        kpt_vis_sig = kpt_vis.sigmoid()
+        out_kpt = torch.cat((kpt_x, kpt_y, kpt_vis_sig), dim=-1)
+        out_kpt = out_kpt.reshape(*kpt_x.shape[:-1], -1)
         out = torch.cat((out_bbox, out_kpt), dim=-1)
 
         return out.reshape(batch_size, -1, self.n_out)

From 4110f78fe50a3ba5cadc0954f0b651712d3b3bf2 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Fri, 24 May 2024 18:34:47 +0200
Subject: [PATCH 22/75] Added active param to augmentations (#32)

---
 luxonis_train/__main__.py     |  4 +++-
 luxonis_train/core/core.py    | 28 +++++++++++++++++-----------
 luxonis_train/utils/config.py |  8 ++++++++
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 7b8e0251..c76f28c1 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -141,7 +141,9 @@ def inspect(
 
     augmentations = (TrainAugmentations if view == "train" else ValAugmentations)(
         image_size=image_size,
-        augmentations=[i.model_dump() for i in cfg.trainer.preprocessing.augmentations],
+        augmentations=[
+            i.model_dump() for i in cfg.trainer.preprocessing.get_active_augmentations()
+        ],
         train_rgb=cfg.trainer.preprocessing.train_rgb,
         keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
     )
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 6b02242f..d23787fc 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -102,7 +102,8 @@ def __init__(
         self.train_augmentations = TrainAugmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
-                i.model_dump() for i in self.cfg.trainer.preprocessing.augmentations
+                i.model_dump()
+                for i in self.cfg.trainer.preprocessing.get_active_augmentations()
             ],
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
@@ -110,7 +111,8 @@ def __init__(
         self.val_augmentations = ValAugmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
-                i.model_dump() for i in self.cfg.trainer.preprocessing.augmentations
+                i.model_dump()
+                for i in self.cfg.trainer.preprocessing.get_active_augmentations()
             ],
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
@@ -134,12 +136,16 @@ def __init__(
 
         self.loaders = {
             view: LOADERS.get(self.cfg.loader.name)(
-                augmentations=self.train_augmentations
-                if view == "train"
-                else self.val_augmentations,
-                view=self.cfg.loader.train_view
-                if view == "train"
-                else self.cfg.loader.val_view,
+                augmentations=(
+                    self.train_augmentations
+                    if view == "train"
+                    else self.val_augmentations
+                ),
+                view=(
+                    self.cfg.loader.train_view
+                    if view == "train"
+                    else self.cfg.loader.val_view
+                ),
                 **self.cfg.loader.params,
             )
             for view in ["train", "val", "test"]
@@ -163,9 +169,9 @@ def __init__(
                 num_workers=self.cfg.trainer.num_workers,
                 collate_fn=collate_fn,
                 shuffle=view == "train",
-                drop_last=self.cfg.trainer.skip_last_batch
-                if view == "train"
-                else False,
+                drop_last=(
+                    self.cfg.trainer.skip_last_batch if view == "train" else False
+                ),
                 sampler=sampler if view == "train" else None,
             )
             for view in ["train", "val", "test"]
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 40638103..dc2f737d 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -147,6 +147,7 @@ class NormalizeAugmentationConfig(CustomBaseModel):
 
 class AugmentationConfig(CustomBaseModel):
     name: str
+    active: bool = True
     params: dict[str, Any] = {}
 
 
@@ -167,6 +168,13 @@ def check_normalize(self):
             )
         return self
 
+    def get_active_augmentations(self) -> list[AugmentationConfig]:
+        """Returns list of augmentations that are active
+        @rtype: list[AugmentationConfig]
+        @return: Filtered list of active augmentation configs
+        """
+        return [aug for aug in self.augmentations if aug.active]
+
 
 class CallbackConfig(CustomBaseModel):
     name: str

From 36a92a665b71c336ddd5648bd4d350ee3376ea7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 30 May 2024 18:54:06 +0200
Subject: [PATCH 23/75] Fix Archiver Pre-Processing (#34)

---
 .github/workflows/tests.yaml                 |  2 ++
 luxonis_train/callbacks/test_on_train_end.py | 14 ++++++++++++++
 luxonis_train/core/archiver.py               |  7 +++++--
 luxonis_train/utils/config.py                |  3 ++-
 tests/integration/test_sanity.py             |  4 ++++
 5 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index b5c0e44f..0b4f51da 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -50,6 +50,8 @@ jobs:
       run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml
 
     - name: Run tests [Windows, macOS]
+      env:
+        PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0
       if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10'
       run: pytest tests --junit-xml pytest.xml
 
diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py
index 3f8da1db..bf7db341 100644
--- a/luxonis_train/callbacks/test_on_train_end.py
+++ b/luxonis_train/callbacks/test_on_train_end.py
@@ -1,4 +1,5 @@
 import lightning.pytorch as pl
+from lightning.pytorch.callbacks import ModelCheckpoint
 
 import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
@@ -11,4 +12,17 @@ class TestOnTrainEnd(pl.Callback):
     def on_train_end(
         self, trainer: pl.Trainer, pl_module: "luxonis_train.models.LuxonisModel"
     ) -> None:
+        # `trainer.test` would delete the paths so we need to save them
+        best_paths = {
+            hash(callback.monitor): callback.best_model_path
+            for callback in trainer.callbacks  # type: ignore
+            if isinstance(callback, ModelCheckpoint)
+        }
+
         trainer.test(pl_module, pl_module._core.pytorch_loaders["test"])
+
+        # Restore the paths
+        for callback in trainer.callbacks:  # type: ignore
+            if isinstance(callback, ModelCheckpoint):
+                if hash(callback.monitor) in best_paths:
+                    callback.best_model_path = best_paths[hash(callback.monitor)]
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index a0706846..1473df1c 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -72,9 +72,12 @@ def archive(self, executable_path: str):
         _, executable_suffix = os.path.splitext(executable_fname)
         self.archive_name += f"_{executable_suffix[1:]}"
 
+        def _mult(lst: list[float | int]) -> list[float]:
+            return [round(x * 255.0, 5) for x in lst]
+
         preprocessing = {  # TODO: keep preprocessing same for each input?
-            "mean": self.cfg.trainer.preprocessing.normalize.params["mean"],
-            "scale": self.cfg.trainer.preprocessing.normalize.params["std"],
+            "mean": _mult(self.cfg.trainer.preprocessing.normalize.params["mean"]),
+            "scale": _mult(self.cfg.trainer.preprocessing.normalize.params["std"]),
             "reverse_channels": self.cfg.trainer.preprocessing.train_rgb,
             "interleaved_to_planar": False,  # TODO: make it modifiable?
         }
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index dc2f737d..875819e2 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -169,7 +169,8 @@ def check_normalize(self):
         return self
 
     def get_active_augmentations(self) -> list[AugmentationConfig]:
-        """Returns list of augmentations that are active
+        """Returns list of augmentations that are active.
+
         @rtype: list[AugmentationConfig]
         @return: Filtered list of active augmentation configs
         """
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index 8b6f872b..efb3ded7 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -22,6 +22,8 @@ def test_sanity(config_file):
         "1",
         "trainer.callbacks",
         "[]",
+        "trainer.batch_size",
+        "1",
     ]
     result = subprocess.run(
         ["luxonis_train", "train", "--config", f"configs/{config_file}", *opts],
@@ -80,6 +82,8 @@ def test_tuner():
             "[]",
             "tuner.n_trials",
             "4",
+            "trainer.batch_size",
+            "1",
         ],
     )
     assert result.returncode == 0

From 1d9998b1416b08e7b0a1d6423606a8467441393c Mon Sep 17 00:00:00 2001
From: Jernej Sabadin <116955183+JSabadin@users.noreply.github.com>
Date: Fri, 31 May 2024 12:57:06 +0200
Subject: [PATCH 24/75] EfficientRep Variants (#33)

---
 luxonis_train/nodes/efficientrep.py | 34 ++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/luxonis_train/nodes/efficientrep.py b/luxonis_train/nodes/efficientrep.py
index 4e92222f..24e43397 100644
--- a/luxonis_train/nodes/efficientrep.py
+++ b/luxonis_train/nodes/efficientrep.py
@@ -5,6 +5,7 @@
 """
 
 import logging
+from typing import Literal
 
 from torch import Tensor, nn
 
@@ -23,6 +24,7 @@
 class EfficientRep(BaseNode[Tensor, list[Tensor]]):
     def __init__(
         self,
+        variant: Literal["s", "n", "m", "l"] = "n",
         channels_list: list[int] | None = None,
         num_repeats: list[int] | None = None,
         depth_mul: float = 0.33,
@@ -31,21 +33,33 @@ def __init__(
     ):
         """EfficientRep backbone.
 
+        @type variant: Literal["s", "n", "m", "l"]
+        @param variant: EfficientRep variant. Defaults to "n".
         @type channels_list: list[int] | None
-        @param channels_list: List of number of channels for each block. Defaults to
-            C{[64, 128, 256, 512, 1024]}.
+        @param channels_list: List of number of channels for each block. If unspecified,
+            defaults to [64, 128, 256, 512, 1024].
         @type num_repeats: list[int] | None
-        @param num_repeats: List of number of repeats of RepVGGBlock. Defaults to C{[1,
-            6, 12, 18, 6]}.
+        @param num_repeats: List of number of repeats of RepVGGBlock. If unspecified,
+            defaults to [1, 6, 12, 18, 6].
         @type depth_mul: float
-        @param depth_mul: Depth multiplier. Defaults to 0.33.
+        @param depth_mul: Depth multiplier. Depending on the variant, defaults to 0.33.
         @type width_mul: float
-        @param width_mul: Width multiplier. Defaults to 0.25.
+        @param width_mul: Width multiplier. Depending on the variant, defaults to 0.25.
         @type kwargs: Any
         @param kwargs: Additional arguments to pass to L{BaseNode}.
         """
         super().__init__(**kwargs)
 
+        if variant not in EFFICIENTREP_VARIANTS:
+            raise ValueError(
+                f"EfficientRep model variant should be in {list(EFFICIENTREP_VARIANTS.keys())}"
+            )
+
+        (
+            depth_mul,
+            width_mul,
+        ) = EFFICIENTREP_VARIANTS[variant]
+
         channels_list = channels_list or [64, 128, 256, 512, 1024]
         num_repeats = num_repeats or [1, 6, 12, 18, 6]
         channels_list = [make_divisible(i * width_mul, 8) for i in channels_list]
@@ -110,3 +124,11 @@ def forward(self, inputs: Tensor) -> list[Tensor]:
             x = block(x)
             outputs.append(x)
         return outputs
+
+
+EFFICIENTREP_VARIANTS = {
+    "n": (0.33, 0.25),
+    "s": (0.33, 0.50),
+    "m": (0.60, 0.75),
+    "l": (1.0, 1.0),
+}

From c2e98b713331ed48ad4f8855d93c6dea80b6ccd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 6 Jun 2024 21:35:02 +0200
Subject: [PATCH 25/75] Support for LuxonisML - Annotation Refactor (#37)

Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/resnet_multitask_model.yaml           | 110 +++++++++++++++
 luxonis_train/__main__.py                     |  10 +-
 .../attached_modules/base_attached_module.py  |  68 +++++++---
 .../losses/adaptive_detection_loss.py         |   2 +-
 .../losses/implicit_keypoint_bbox_loss.py     |   6 +-
 .../attached_modules/losses/keypoint_loss.py  |   4 +-
 .../attached_modules/metrics/common.py        |   8 +-
 .../metrics/mean_average_precision.py         |   4 +-
 .../mean_average_precision_keypoints.py       |   8 +-
 .../metrics/object_keypoint_similarity.py     |   6 +-
 .../visualizers/keypoint_visualizer.py        |   9 +-
 .../visualizers/segmentation_visualizer.py    |   2 +-
 luxonis_train/core/archiver.py                |   2 +-
 luxonis_train/core/core.py                    |  15 ++-
 luxonis_train/models/luxonis_model.py         |  12 +-
 luxonis_train/nodes/base_node.py              |  21 ++-
 luxonis_train/nodes/bisenet_head.py           |   2 +-
 luxonis_train/nodes/classification_head.py    |   6 +-
 luxonis_train/nodes/efficient_bbox_head.py    |   6 +-
 .../nodes/implicit_keypoint_bbox_head.py      |   4 +-
 luxonis_train/nodes/segmentation_head.py      |   2 +-
 luxonis_train/utils/boxutils.py               |  14 +-
 luxonis_train/utils/config.py                 |   2 +-
 luxonis_train/utils/general.py                |  43 +++---
 luxonis_train/utils/loaders/base_loader.py    |  54 +++-----
 .../utils/loaders/luxonis_loader_torch.py     |  15 +--
 luxonis_train/utils/types.py                  |  26 ++--
 media/coverage_badge.svg                      |   4 +-
 tests/integration/conftest.py                 | 127 ++++--------------
 .../test_loaders/test_base_loader.py          |  14 +-
 30 files changed, 324 insertions(+), 282 deletions(-)
 create mode 100644 configs/resnet_multitask_model.yaml

diff --git a/configs/resnet_multitask_model.yaml b/configs/resnet_multitask_model.yaml
new file mode 100644
index 00000000..844c83d4
--- /dev/null
+++ b/configs/resnet_multitask_model.yaml
@@ -0,0 +1,110 @@
+
+model:
+  name: resnet50_classification
+  nodes:
+    - name: ResNet
+      params:
+        variant: "50"
+        download_weights: True
+
+    - name: ClassificationHead
+      alias: ClassificationHead_1
+      task: classification_1
+      inputs:
+        - ResNet
+
+    - name: ClassificationHead
+      alias: ClassificationHead_2
+      task: classification_2
+      inputs:
+        - ResNet
+
+    - name: ClassificationHead
+      alias: ClassificationHead_3
+      task: classification_3
+      inputs:
+        - ResNet
+
+  losses:
+    - name: CrossEntropyLoss
+      alias: CrossEntropyLoss_1
+      attached_to: ClassificationHead_1
+
+    - name: CrossEntropyLoss
+      alias: CrossEntropyLoss_2
+      attached_to: ClassificationHead_2
+
+    - name: CrossEntropyLoss
+      alias: CrossEntropyLoss_3
+      attached_to: ClassificationHead_3
+
+  metrics:
+    - name: Accuracy
+      is_main_metric: true
+      alias: Accuracy_1
+      attached_to: ClassificationHead_1
+
+    - name: Accuracy
+      alias: Accuracy_2
+      attached_to: ClassificationHead_2
+
+    - name: Accuracy
+      alias: Accuracy_3
+      attached_to: ClassificationHead_3
+
+  visualizers:
+    - name: ClassificationVisualizer
+      alias: ClassificationVisualizer_1
+      attached_to: ClassificationHead_1
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+    - name: ClassificationVisualizer
+      alias: ClassificationVisualizer_2
+      attached_to: ClassificationHead_2
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+    - name: ClassificationVisualizer
+      alias: ClassificationVisualizer_3
+      attached_to: ClassificationHead_3
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+loader:
+  params:
+    dataset_name: cifar10_task_test
+
+trainer:
+  batch_size: 4
+  epochs: &epochs 200
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  preprocessing:
+    train_image_size: [&height 224, &width 224]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.02
+
+  scheduler:
+    name: ConstantLR
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index c76f28c1..759bc87c 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -110,10 +110,7 @@ def inspect(
 ):
     """Inspect dataset."""
     from lightning.pytorch import seed_everything
-    from luxonis_ml.data import (
-        TrainAugmentations,
-        ValAugmentations,
-    )
+    from luxonis_ml.data import Augmentations
 
     from luxonis_train.attached_modules.visualizers.utils import (
         draw_bounding_box_labels,
@@ -139,13 +136,14 @@ def inspect(
 
     image_size = cfg.trainer.preprocessing.train_image_size
 
-    augmentations = (TrainAugmentations if view == "train" else ValAugmentations)(
+    augmentations = Augmentations(
         image_size=image_size,
         augmentations=[
             i.model_dump() for i in cfg.trainer.preprocessing.get_active_augmentations()
         ],
         train_rgb=cfg.trainer.preprocessing.train_rgb,
         keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
+        only_normalize=view != "train",
     )
 
     loader = LOADERS.get(cfg.loader.name)(
@@ -178,7 +176,7 @@ def inspect(
                             colors="yellow",
                             width=1,
                         )
-                    elif label_type == LabelType.KEYPOINT:
+                    elif label_type == LabelType.KEYPOINTS:
                         img = draw_keypoint_labels(
                             img, labels[labels[:, 0] == i][:, 1:], colors="red"
                         )
diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py
index a015e09f..1e446fbb 100644
--- a/luxonis_train/attached_modules/base_attached_module.py
+++ b/luxonis_train/attached_modules/base_attached_module.py
@@ -74,6 +74,44 @@ def node(self) -> BaseNode:
             )
         return self._node
 
+    def get_label(self, labels: Labels) -> tuple[Tensor, LabelType]:
+        if len(self.required_labels) != 1:
+            if self.task in labels:
+                return labels[self.task]
+            raise NotImplementedError(
+                f"{self.__class__.__name__} requires multiple labels, "
+                "the default `prepare` implementation does not support this."
+            )
+        for label, label_type in labels.values():
+            if label_type == self.required_labels[0]:
+                return label, label_type
+        raise IncompatibleException.from_missing_task(
+            self.required_labels[0].value, list(labels.keys()), self.__class__.__name__
+        )
+
+    def get_input_tensors(self, inputs: Packet[Tensor]) -> list[Tensor]:
+        if self.protocol is not None:
+            return inputs[self.protocol.get_task()]
+        if self.node._task_type is not None:
+            return inputs[self.node._task_type.value]
+        return inputs[self.node.task]
+
+    @property
+    def task(self) -> str:
+        """Task of the node that this module is attached to.
+
+        @rtype: str
+        """
+        task = self.node._task
+        if task is None:
+            if self.required_labels and len(self.required_labels) == 1:
+                return self.required_labels[0].value
+            raise RuntimeError(
+                "Attempt to access `task` reference, but the node does not have a task. ",
+                f"You have to specify the task in the configuration for node {self.node.__class__.__name__}.",
+            )
+        return task
+
     def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
         """Prepares node outputs for the forward pass of the module.
 
@@ -102,20 +140,13 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
                 "This module requires multiple labels, the default `prepare` "
                 "implementation does not support this."
             )
-        if not self.required_labels:
-            if "boxes" in inputs and LabelType.BOUNDINGBOX in labels:
-                return inputs["boxes"], labels[LabelType.BOUNDINGBOX]  # type: ignore
-            if "classes" in inputs and LabelType.CLASSIFICATION in labels:
-                return inputs["classes"][0], labels[LabelType.CLASSIFICATION]  # type: ignore
-            if "keypoints" in inputs and LabelType.KEYPOINT in labels:
-                return inputs["keypoints"], labels[LabelType.KEYPOINT]  # type: ignore
-            if "segmentation" in inputs and LabelType.SEGMENTATION in labels:
-                return inputs["segmentation"][0], labels[LabelType.SEGMENTATION]  # type: ignore
-            raise IncompatibleException(
-                f"No matching labels and outputs found for {self.__class__.__name__}"
-            )
-        label_type = self.required_labels[0]
-        return inputs[label_type.value], labels[label_type]  # type: ignore
+        x = self.get_input_tensors(inputs)
+        label, label_type = self.get_label(labels)
+        if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
+            if isinstance(x, list) and len(x) == 1:
+                x = x[0]
+
+        return x, label  # type: ignore
 
     def validate(self, inputs: Packet[Tensor], labels: Labels) -> None:
         """Validates that the inputs and labels are compatible with the module.
@@ -126,11 +157,10 @@ def validate(self, inputs: Packet[Tensor], labels: Labels) -> None:
         @param labels: Labels from the dataset. @raises L{IncompatibleException}: If the
             inputs are not compatible with the module.
         """
-        for label in self.required_labels:
-            if label not in labels:
-                raise IncompatibleException.from_missing_label(
-                    label, list(labels.keys()), self.__class__.__name__
-                )
+        if self.node.task is not None and self.node.task not in labels:
+            raise IncompatibleException.from_missing_task(
+                self.node.task, list(labels.keys()), self.__class__.__name__
+            )
 
         if self.protocol is not None:
             try:
diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index af1a7e6a..521b6d8e 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -104,7 +104,7 @@ def prepare(
         batch_size = pred_scores.shape[0]
         device = pred_scores.device
 
-        target = labels[LabelType.BOUNDINGBOX].to(device)
+        target = labels[self.task][0].to(device)
         gt_bboxes_scale = torch.tensor(
             [
                 self.original_img_size[1],
diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
index 7169d2a4..555d0d30 100644
--- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
@@ -89,7 +89,7 @@ def __init__(
         """
 
         super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINT],
+            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINTS],
             **kwargs,
         )
 
@@ -165,8 +165,8 @@ def prepare(
         """
         predictions = outputs["features"]
 
-        kpts = labels[LabelType.KEYPOINT]
-        boxes = labels[LabelType.BOUNDINGBOX]
+        kpts = labels["keypoints"][0]
+        boxes = labels["boundingbox"][0]
 
         nkpts = (kpts.shape[1] - 2) // 3
         targets = torch.zeros((len(boxes), nkpts * 2 + self.box_offset + 1))
diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py
index 4728b045..b1ddd8ba 100644
--- a/luxonis_train/attached_modules/losses/keypoint_loss.py
+++ b/luxonis_train/attached_modules/losses/keypoint_loss.py
@@ -29,7 +29,7 @@ def __init__(
         **kwargs,
     ):
         super().__init__(
-            protocol=Protocol, required_labels=[LabelType.KEYPOINT], **kwargs
+            protocol=Protocol, required_labels=[LabelType.KEYPOINTS], **kwargs
         )
         self.b_cross_entropy = BCEWithLogitsLoss(
             pos_weight=torch.tensor([bce_power]), **kwargs
@@ -38,7 +38,7 @@ def __init__(
         self.visibility_weight = visibility_weight
 
     def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Tensor, Tensor]:
-        return torch.cat(inputs["keypoints"], dim=0), labels[LabelType.KEYPOINT]
+        return torch.cat(inputs["keypoints"], dim=0), labels[LabelType.KEYPOINTS]
 
     def forward(
         self, prediction: Tensor, target: Tensor
diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py
index 6d16a4b4..8d181840 100644
--- a/luxonis_train/attached_modules/metrics/common.py
+++ b/luxonis_train/attached_modules/metrics/common.py
@@ -27,9 +27,9 @@ def __init__(self, **kwargs):
                 f"assuming {task}."
             )
             kwargs["task"] = task
-        self.task = task
+        self._task = task
 
-        if self.task == "multiclass":
+        if self._task == "multiclass":
             if "num_classes" not in kwargs:
                 if self.node is None:
                     raise ValueError(
@@ -37,7 +37,7 @@ def __init__(self, **kwargs):
                         "multiclass torchmetrics."
                     )
                 kwargs["num_classes"] = self.node.n_classes
-        elif self.task == "multilabel":
+        elif self._task == "multilabel":
             if "num_labels" not in kwargs:
                 if self.node is None:
                     raise ValueError(
@@ -49,7 +49,7 @@ def __init__(self, **kwargs):
         self.metric = self.Metric(**kwargs)
 
     def update(self, preds, target, *args, **kwargs) -> None:
-        if self.task in ["multiclass"]:
+        if self._task in ["multiclass"]:
             target = target.argmax(dim=1)
         self.metric.update(preds, target, *args, **kwargs)
 
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index 0a58d061..680b0e5a 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -38,8 +38,8 @@ def update(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        label = labels[LabelType.BOUNDINGBOX]
-        output_nms = outputs["boxes"]
+        label = labels[self.task][0]
+        output_nms = self.get_input_tensors(outputs)
 
         image_size = self.node.original_in_shape[2:]
 
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
index 3740f58e..42b1395d 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
@@ -68,7 +68,7 @@ def __init__(
         """
         super().__init__(
             protocol=Protocol,
-            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINT],
+            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINTS],
             **kwargs,
         )
 
@@ -97,8 +97,8 @@ def __init__(
         self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None)
 
     def prepare(self, outputs: Packet[Tensor], labels: Labels):
-        kpts = labels[LabelType.KEYPOINT]
-        boxes = labels[LabelType.BOUNDINGBOX]
+        kpts = labels["keypoints"][0]
+        boxes = labels["boundingbox"][0]
         nkpts = (kpts.shape[1] - 2) // 3
         label = torch.zeros((len(boxes), nkpts * 3 + 6))
         label[:, :2] = boxes[:, :2]
@@ -112,7 +112,7 @@ def prepare(self, outputs: Packet[Tensor], labels: Labels):
         image_size = self.node.original_in_shape[2:]
 
         output_kpts: list[Tensor] = outputs["keypoints"]
-        output_bboxes: list[Tensor] = outputs["boxes"]
+        output_bboxes: list[Tensor] = outputs["boundingbox"]
         for i in range(len(output_kpts)):
             output_list_kpt_map.append(
                 {
diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
index c5e4a19b..959108c4 100644
--- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
+++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
@@ -46,7 +46,7 @@ def __init__(
         **kwargs,
     ) -> None:
         super().__init__(
-            required_labels=[LabelType.KEYPOINT], protocol=KeypointProtocol, **kwargs
+            required_labels=[LabelType.KEYPOINTS], protocol=KeypointProtocol, **kwargs
         )
 
         if n_keypoints is None and self.node is None:
@@ -67,8 +67,8 @@ def __init__(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        kpts_labels = labels[LabelType.KEYPOINT]
-        bbox_labels = labels[LabelType.BOUNDINGBOX]
+        kpts_labels = labels["keypoints"][0]
+        bbox_labels = labels["boundingbox"][0]
         num_keypoints = (kpts_labels.shape[1] - 2) // 3
         label = torch.zeros((len(bbox_labels), num_keypoints * 3 + 6))
         label[:, :2] = bbox_labels[:, :2]
diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
index beebaf3f..6594912f 100644
--- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
@@ -4,9 +4,7 @@
 from torch import Tensor
 
 from luxonis_train.utils.types import (
-    Labels,
     LabelType,
-    Packet,
 )
 
 from .base_visualizer import BaseVisualizer
@@ -42,17 +40,12 @@ def __init__(
         @param nonvisible_color: Color of nonvisible keypoints. If C{None}, nonvisible
             keypoints are not drawn. Defaults to C{None}.
         """
-        super().__init__(required_labels=[LabelType.KEYPOINT], **kwargs)
+        super().__init__(required_labels=[LabelType.KEYPOINTS], **kwargs)
         self.visibility_threshold = visibility_threshold
         self.connectivity = connectivity
         self.visible_color = visible_color
         self.nonvisible_color = nonvisible_color
 
-    def prepare(
-        self, output: Packet[Tensor], label: Labels
-    ) -> tuple[list[Tensor], Tensor]:
-        return output["keypoints"], label[LabelType.KEYPOINT]
-
     @staticmethod
     def draw_predictions(
         canvas: Tensor,
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index 2b2dc7a3..f5348873 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -45,7 +45,7 @@ def __init__(
         self.alpha = alpha
 
     def prepare(self, output: Packet[Tensor], label: Labels) -> tuple[Tensor, Tensor]:
-        return output["segmentation"][0], label[LabelType.SEGMENTATION]
+        return output[self.node.task][0], label[self.task][0]
 
     @staticmethod
     def draw_predictions(
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index 1473df1c..a42d2ec7 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -243,7 +243,7 @@ def _get_classes(self, head_family):
         if head_family.startswith("Classification"):
             return self.dataset_metadata._classes["class"]
         elif head_family.startswith("Object"):
-            return self.dataset_metadata._classes["boxes"]
+            return self.dataset_metadata._classes["boundingbox"]
         elif head_family.startswith("Segmentation"):
             return self.dataset_metadata._classes["segmentation"]
         elif head_family.startswith("Keypoint"):
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index d23787fc..1ac3fce0 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -9,7 +9,7 @@
 import torch
 import torch.utils.data as torch_data
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-from luxonis_ml.data import TrainAugmentations, ValAugmentations
+from luxonis_ml.data import Augmentations
 from luxonis_ml.utils import reset_logging, setup_logging
 
 from luxonis_train.callbacks import LuxonisProgressBar
@@ -99,7 +99,7 @@ def __init__(
             pl.seed_everything(self.cfg.trainer.seed, workers=True)
             deterministic = True
 
-        self.train_augmentations = TrainAugmentations(
+        self.train_augmentations = Augmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
                 i.model_dump()
@@ -108,7 +108,7 @@ def __init__(
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
         )
-        self.val_augmentations = ValAugmentations(
+        self.val_augmentations = Augmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
                 i.model_dump()
@@ -116,6 +116,7 @@ def __init__(
             ],
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
+            only_normalize=True,
         )
 
         self.pl_trainer = pl.Trainer(
@@ -152,7 +153,7 @@ def __init__(
         }
         sampler = None
         if self.cfg.trainer.use_weighted_sampler:
-            classes_count = self.dataset.get_classes()[1]
+            classes_count = self.loaders["train"].get_classes()[1]
             if len(classes_count) == 0:
                 logger.warning(
                     "WeightedRandomSampler only available for classification tasks. Using default sampler instead."
@@ -183,15 +184,15 @@ def __init__(
 
         self.cfg.save_data(os.path.join(self.run_save_dir, "config.yaml"))
 
-    def set_train_augmentations(self, aug: TrainAugmentations) -> None:
+    def set_train_augmentations(self, aug: Augmentations) -> None:
         """Sets augmentations used for training dataset."""
         self.train_augmentations = aug
 
-    def set_val_augmentations(self, aug: ValAugmentations) -> None:
+    def set_val_augmentations(self, aug: Augmentations) -> None:
         """Sets augmentations used for validation dataset."""
         self.val_augmentations = aug
 
-    def set_test_augmentations(self, aug: ValAugmentations) -> None:
+    def set_test_augmentations(self, aug: Augmentations) -> None:
         """Sets augmentations used for test dataset."""
         self.test_augmentations = aug
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index d3ed26a2..e2568ec0 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -38,7 +38,7 @@
 )
 from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry
 from luxonis_train.utils.tracker import LuxonisTrackerPL
-from luxonis_train.utils.types import Kwargs, Labels, Packet, TaskLabels
+from luxonis_train.utils.types import Kwargs, Labels, Packet
 
 from .luxonis_output import LuxonisOutput
 
@@ -143,13 +143,10 @@ def __init__(
         frozen_nodes: list[tuple[str, int]] = []
         nodes: dict[str, tuple[type[BaseNode], Kwargs]] = {}
 
-        self.node_tasks: dict[str, str] = {}
-
         for node_cfg in self.cfg.model.nodes:
             node_name = node_cfg.name
             Node = BaseNode.REGISTRY.get(node_name)
             node_name = node_cfg.alias or node_name
-            self.node_tasks[node_name] = node_cfg.task_group
             if node_cfg.freezing.active:
                 epochs = self.cfg.trainer.epochs
                 if node_cfg.freezing.unfreeze_after is None:
@@ -159,7 +156,7 @@ def __init__(
                 else:
                     unfreeze_after = int(node_cfg.freezing.unfreeze_after * epochs)
                 frozen_nodes.append((node_name, unfreeze_after))
-            nodes[node_name] = (Node, node_cfg.params)
+            nodes[node_name] = (Node, {**node_cfg.params, "task": node_cfg.task})
             if not node_cfg.inputs:
                 self.input_shapes[node_name] = [Size(input_shape)]
             self.graph[node_name] = node_cfg.inputs
@@ -251,7 +248,7 @@ def _initiate_nodes(
     def forward(
         self,
         inputs: Tensor,
-        task_labels: TaskLabels | None = None,
+        labels: Labels | None = None,
         images: Tensor | None = None,
         *,
         compute_loss: bool = True,
@@ -303,7 +300,6 @@ def forward(
             node_inputs = [computed[pred] for pred in input_names]
             outputs = node.run(node_inputs)
             computed[node_name] = outputs
-            labels = task_labels[self.node_tasks[node_name]] if task_labels else None
 
             if compute_loss and node_name in self.losses and labels is not None:
                 for loss_name, loss in self.losses[node_name].items():
@@ -500,7 +496,7 @@ def process_losses(
         training_step_output["loss"] = final_loss.detach().cpu()
         return final_loss, training_step_output
 
-    def training_step(self, train_batch: tuple[Tensor, TaskLabels]) -> Tensor:
+    def training_step(self, train_batch: tuple[Tensor, Labels]) -> Tensor:
         """Performs one step of training with provided batch."""
         outputs = self.forward(*train_batch)
         assert outputs.losses, "Losses are empty, check if you have defined any loss"
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index c3124f82..327c8d8f 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -91,7 +91,8 @@ def __init__(
         in_protocols: list[type[BaseModel]] | None = None,
         n_classes: int | None = None,
         in_sizes: Size | list[Size] | None = None,
-        task_type: LabelType | None = None,
+        task: str | None = None,
+        _task_type: LabelType | None = None,
     ):
         super().__init__()
 
@@ -111,7 +112,10 @@ def __init__(
             self.attach_index = attach_index
 
         self.in_protocols = in_protocols or [FeaturesProtocol]
-        self.task_type = task_type
+        self._task_type = _task_type
+        if task is None and self._task_type is not None:
+            task = self._task_type.value
+        self._task = task
 
         self._input_shapes = input_shapes
         self._original_in_shape = original_in_shape
@@ -130,15 +134,22 @@ def _non_set_error(self, name: str) -> ValueError:
             "but it was not set during initialization. "
         )
 
+    @property
+    def task(self) -> str:
+        """Getter for the task."""
+        if self._task is None:
+            raise self._non_set_error("task")
+        return self._task
+
     @property
     def n_classes(self) -> int:
         """Getter for the number of classes."""
-        return self.dataset_metadata.n_classes(self.task_type)
+        return self.dataset_metadata.n_classes(self.task)
 
     @property
     def class_names(self) -> list[str]:
         """Getter for the class names."""
-        return self.dataset_metadata.class_names(self.task_type)
+        return self.dataset_metadata.class_names(self.task)
 
     @property
     def input_shapes(self) -> list[Packet[Size]]:
@@ -312,7 +323,7 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]:
                 raise IncompatibleException(
                     "Default `wrap` expects a single tensor or a list of tensors."
                 )
-        return {"features": outputs}
+        return {self._task or "features": outputs}
 
     def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
         """Combines the forward pass with the wrapping and unwrapping of the inputs.
diff --git a/luxonis_train/nodes/bisenet_head.py b/luxonis_train/nodes/bisenet_head.py
index a3b11df6..9185d823 100644
--- a/luxonis_train/nodes/bisenet_head.py
+++ b/luxonis_train/nodes/bisenet_head.py
@@ -30,7 +30,7 @@ def __init__(
         @param intermediate_channels: How many intermediate channels to use.
             Defaults to C{64}.
         """
-        super().__init__(task_type=LabelType.SEGMENTATION, **kwargs)
+        super().__init__(task=LabelType.SEGMENTATION, **kwargs)
 
         original_height = self.original_in_shape[2]
         upscale_factor = 2 ** infer_upscale_factor(self.in_height, original_height)
diff --git a/luxonis_train/nodes/classification_head.py b/luxonis_train/nodes/classification_head.py
index d96e6b72..7e55a590 100644
--- a/luxonis_train/nodes/classification_head.py
+++ b/luxonis_train/nodes/classification_head.py
@@ -19,7 +19,9 @@ def __init__(
         @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults
             to C{0.2}.
         """
-        super().__init__(task_type=LabelType.CLASSIFICATION, **kwargs)
+        super().__init__(
+            _task_type=kwargs.pop("_task_type", LabelType.CLASSIFICATION), **kwargs
+        )
 
         self.head = nn.Sequential(
             nn.AdaptiveAvgPool2d(1),
@@ -32,4 +34,4 @@ def forward(self, inputs: Tensor) -> Tensor:
         return self.head(inputs)
 
     def wrap(self, output: Tensor) -> Packet[Tensor]:
-        return {"classes": [output]}
+        return {"classification": [output]}
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/efficient_bbox_head.py
index a4f3bc93..97ee1bfc 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/efficient_bbox_head.py
@@ -50,7 +50,7 @@ def __init__(
         @type max_det: int
         @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
-        super().__init__(task_type=LabelType.BOUNDINGBOX, **kwargs)
+        super().__init__(_task_type=LabelType.BOUNDINGBOX, **kwargs)
 
         self.n_heads = n_heads
 
@@ -97,7 +97,7 @@ def wrap(
                 conf, _ = out_cls.max(1, keepdim=True)
                 out = torch.cat([out_reg, conf, out_cls], dim=1)
                 outputs.append(out)
-            return {"boxes": outputs}
+            return {"boundingbox": outputs}
 
         cls_tensor = torch.cat(
             [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
@@ -116,7 +116,7 @@ def wrap(
         else:
             boxes = self._process_to_bbox((features, cls_tensor, reg_tensor))
             return {
-                "boxes": boxes,
+                "boundingbox": boxes,
                 "features": features,
                 "class_scores": [cls_tensor],
                 "distributions": [reg_tensor],
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index 76a66eb6..431dcf46 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -57,7 +57,7 @@ def __init__(
         @type max_det: int
         @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
-        super().__init__(task_type=LabelType.KEYPOINT, **kwargs)
+        super().__init__(_task_type=LabelType.KEYPOINTS, **kwargs)
 
         if anchors is None:
             logger.info("No anchors provided, generating them automatically.")
@@ -172,7 +172,7 @@ def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
         )
 
         return {
-            "boxes": [detection[:, :6] for detection in nms],
+            "boundingbox": [detection[:, :6] for detection in nms],
             "keypoints": [
                 detection[:, 6:].reshape(-1, self.n_keypoints, 3) for detection in nms
             ],
diff --git a/luxonis_train/nodes/segmentation_head.py b/luxonis_train/nodes/segmentation_head.py
index a3420491..5955953d 100644
--- a/luxonis_train/nodes/segmentation_head.py
+++ b/luxonis_train/nodes/segmentation_head.py
@@ -27,7 +27,7 @@ def __init__(self, **kwargs):
         @type kwargs: Any
         @param kwargs: Additional arguments to pass to L{BaseNode}.
         """
-        super().__init__(task_type=LabelType.SEGMENTATION, **kwargs)
+        super().__init__(_task_type=LabelType.SEGMENTATION, **kwargs)
 
         original_height = self.original_in_shape[2]
         num_up = infer_upscale_factor(self.in_height, original_height, strict=False)
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index a59f4cd0..15fca04f 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -6,6 +6,7 @@
 import torch
 from scipy.cluster.vq import kmeans
 from torch import Tensor
+from torch.utils.data import DataLoader
 from torchvision.ops import (
     batched_nms,
     box_convert,
@@ -400,11 +401,10 @@ def non_max_suppression(
 
 
 def anchors_from_dataset(
-    loader: torch.utils.data.DataLoader,
+    loader: DataLoader,
     n_anchors: int = 9,
     n_generations: int = 1000,
     ratio_threshold: float = 4.0,
-    task_group: str = "default",
 ) -> tuple[Tensor, float]:
     """Generates anchors based on bounding box annotations present in provided data
     loader. It uses K-Means for initial proposals which are then refined with genetic
@@ -426,11 +426,11 @@ def anchors_from_dataset(
 
     widths = []
     inputs = None
-    for inp, task_labels in loader:
-        labels = next(iter(task_labels.values()))  # TODO: handle multiple tasks
-        boxes = labels[LabelType.BOUNDINGBOX]
-        curr_wh = boxes[:, 4:]
-        widths.append(curr_wh)
+    for inp, labels in loader:
+        for tensor, label_type in labels.values():
+            if label_type == LabelType.BOUNDINGBOX:
+                curr_wh = tensor[:, 4:]
+                widths.append(curr_wh)
         inputs = inp
     assert inputs is not None, "No inputs found in data loader"
     _, _, h, w = inputs.shape  # assuming all images are same size
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 875819e2..31fd55ee 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -41,7 +41,7 @@ class ModelNodeConfig(CustomBaseModel):
     inputs: list[str] = []
     params: dict[str, Any] = {}
     freezing: FreezingConfig = FreezingConfig()
-    task_group: str = "default"
+    task: str | None = None
 
 
 class PredefinedModelConfig(CustomBaseModel):
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index bf3d0e8f..21c35df0 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -71,11 +71,11 @@ def classes(self) -> dict[LabelType, list[str]]:
             )
         return self._classes
 
-    def n_classes(self, label_type: LabelType | None) -> int:
-        """Gets the number of classes for the specified label type.
+    def n_classes(self, task: str | None) -> int:
+        """Gets the number of classes for the specified task.
 
-        @type label_type: L{LabelType} | None
-        @param label_type: Label type to get the number of classes for.
+        @type task: str | None
+        @param task: Task to get the number of classes for.
         @rtype: int
         @return: Number of classes for the specified label type.
         @raises ValueError: If the dataset loader was not provided during
@@ -83,12 +83,10 @@ def n_classes(self, label_type: LabelType | None) -> int:
         @raises ValueError: If the dataset contains different number of classes for
             different label types.
         """
-        if label_type is not None:
-            if label_type not in self.classes:
-                raise ValueError(
-                    f"Task type {label_type.name} is not present in the dataset."
-                )
-            return len(self.classes[label_type])
+        if task is not None:
+            if task not in self.classes:
+                raise ValueError(f"Task '{task}' is not present in the dataset.")
+            return len(self.classes[task])
         n_classes = len(list(self.classes.values())[0])
         for classes in self.classes.values():
             if len(classes) != n_classes:
@@ -97,11 +95,11 @@ def n_classes(self, label_type: LabelType | None) -> int:
                 )
         return n_classes
 
-    def class_names(self, label_type: LabelType | None) -> list[str]:
-        """Gets the class names for the specified label type.
+    def class_names(self, task: str | None) -> list[str]:
+        """Gets the class names for the specified task.
 
-        @type label_type: L{LabelType} | None
-        @param label_type: Label type to get the class names for.
+        @type task: str | None
+        @param task: Task to get the class names for.
         @rtype: list[str]
         @return: List of class names for the specified label type.
         @raises ValueError: If the dataset loader was not provided during
@@ -109,12 +107,10 @@ def class_names(self, label_type: LabelType | None) -> list[str]:
         @raises ValueError: If the dataset contains different class names for different
             label types.
         """
-        if label_type is not None:
-            if label_type not in self.classes:
-                raise ValueError(
-                    f"Task type {label_type.name} is not present in the dataset."
-                )
-            return self.classes[label_type]
+        if task is not None:
+            if task not in self.classes:
+                raise ValueError(f"Task type {task} is not present in the dataset.")
+            return self.classes[task]
         class_names = list(self.classes.values())[0]
         for classes in self.classes.values():
             if classes != class_names:
@@ -170,9 +166,10 @@ def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
 
         if skeletons is not None:
             if len(skeletons) == 1:
-                name = list(skeletons.keys())[0]
-                keypoint_names = skeletons[name]["labels"]
-                connectivity = skeletons[name]["edges"]
+                task_name = next(iter(skeletons))
+                class_name = next(iter(skeletons[task_name]))
+                keypoint_names = skeletons[task_name][class_name]["labels"]
+                connectivity = skeletons[task_name][class_name]["edges"]
 
             elif len(skeletons) > 1:
                 raise NotImplementedError(
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index f96f65e1..c3f5e141 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -9,7 +9,7 @@
 from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.types import Labels, LabelType
 
-LuxonisLoaderTorchOutput = tuple[Tensor, dict[str, Labels]]
+LuxonisLoaderTorchOutput = tuple[Tensor, Labels]
 """LuxonisLoaderTorchOutput is a tuple of images and corresponding labels."""
 
 
@@ -74,7 +74,7 @@ def get_skeletons(self) -> dict[str, dict] | None:
 
 def collate_fn(
     batch: list[LuxonisLoaderTorchOutput],
-) -> tuple[Tensor, dict[str, dict[LabelType, Tensor]]]:
+) -> tuple[Tensor, Labels]:
     """Default collate function used for training.
 
     @type batch: list[LuxonisLoaderTorchOutput]
@@ -83,46 +83,26 @@ def collate_fn(
     @rtype: tuple[Tensor, dict[LabelType, Tensor]]
     @return: Tuple of images and annotations in the format expected by the model.
     """
-    imgs, group_dicts = zip(*batch)
-    out_group_dicts = {task: {} for task in group_dicts[0].keys()}
-    imgs = torch.stack(imgs, 0)
+    imgs: tuple[Tensor, ...]
+    labels: tuple[Labels, ...]
+    imgs, labels = zip(*batch)
 
-    for task in list(group_dicts[0].keys()):
-        anno_dicts = [group[task] for group in group_dicts]
+    out_labels = {}
 
-        present_annotations = anno_dicts[0].keys()
-        out_annotations: dict[LabelType, Tensor] = {
-            anno: torch.empty(0) for anno in present_annotations
-        }
+    for task in labels[0].keys():
+        label_type = labels[0][task][1]
+        annos = [label[task][0] for label in labels]
+        if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
+            out_labels[task] = torch.stack(annos, 0), label_type
 
-        if LabelType.CLASSIFICATION in present_annotations:
-            class_annos = [anno[LabelType.CLASSIFICATION] for anno in anno_dicts]
-            out_annotations[LabelType.CLASSIFICATION] = torch.stack(class_annos, 0)
-
-        if LabelType.SEGMENTATION in present_annotations:
-            seg_annos = [anno[LabelType.SEGMENTATION] for anno in anno_dicts]
-            out_annotations[LabelType.SEGMENTATION] = torch.stack(seg_annos, 0)
-
-        if LabelType.BOUNDINGBOX in present_annotations:
-            bbox_annos = [anno[LabelType.BOUNDINGBOX] for anno in anno_dicts]
+        elif label_type in [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]:
             label_box: list[Tensor] = []
-            for i, box in enumerate(bbox_annos):
-                l_box = torch.zeros((box.shape[0], 6))
+            for i, box in enumerate(annos):
+                l_box = torch.zeros((box.shape[0], box.shape[1] + 1))
                 l_box[:, 0] = i  # add target image index for build_targets()
                 l_box[:, 1:] = box
                 label_box.append(l_box)
-            out_annotations[LabelType.BOUNDINGBOX] = torch.cat(label_box, 0)
-
-        if LabelType.KEYPOINT in present_annotations:
-            keypoint_annos = [anno[LabelType.KEYPOINT] for anno in anno_dicts]
-            label_keypoints: list[Tensor] = []
-            for i, points in enumerate(keypoint_annos):
-                l_kps = torch.zeros((points.shape[0], points.shape[1] + 1))
-                l_kps[:, 0] = i  # add target image index for build_targets()
-                l_kps[:, 1:] = points
-                label_keypoints.append(l_kps)
-            out_annotations[LabelType.KEYPOINT] = torch.cat(label_keypoints, 0)
-
-        out_group_dicts[task] = out_annotations
+            out_labels[task] = torch.cat(label_box, 0), label_type
 
-    return imgs, out_group_dicts
+    # exit()
+    return torch.stack(imgs, 0), out_labels
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index b2eeb168..a6b9bf82 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -4,7 +4,6 @@
 from luxonis_ml.data import (
     BucketStorage,
     BucketType,
-    LabelType,
     LuxonisDataset,
     LuxonisLoader,
 )
@@ -48,20 +47,18 @@ def input_shape(self) -> Size:
         return Size([1, *img.shape])
 
     def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
-        img, group_annotations = self.base_loader[idx]
+        img, labels = self.base_loader[idx]
 
         img = np.transpose(img, (2, 0, 1))  # HWC to CHW
         tensor_img = Tensor(img)
-        for task in group_annotations:
-            annotations = group_annotations[task]
-            for key in annotations:
-                annotations[key] = Tensor(annotations[key])  # type: ignore
+        for task, (array, label_type) in labels.items():
+            labels[task] = (Tensor(array), label_type)  # type: ignore
 
-        return tensor_img, group_annotations
+        return tensor_img, labels
 
-    def get_classes(self) -> dict[LabelType, list[str]]:
+    def get_classes(self) -> dict[str, list[str]]:
         _, classes = self.dataset.get_classes()
-        return {LabelType(task): classes[task] for task in classes}
+        return {task: classes[task] for task in classes}
 
     def get_skeletons(self) -> dict[str, dict] | None:
         return self.dataset.get_skeletons()
diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py
index 3fb724c3..5bebc7e4 100644
--- a/luxonis_train/utils/types.py
+++ b/luxonis_train/utils/types.py
@@ -1,13 +1,12 @@
 from typing import Annotated, Any, Literal, TypeVar
 
-from luxonis_ml.enums import LabelType
+from luxonis_ml.data import LabelType
 from pydantic import BaseModel, Field, ValidationError
 from torch import Size, Tensor
 
 Kwargs = dict[str, Any]
-OutputTypes = Literal["boxes", "class", "keypoints", "segmentation", "features"]
-Labels = dict[LabelType, Tensor]
-TaskLabels = dict[str, Labels]
+OutputTypes = Literal["boundingbox", "class", "keypoints", "segmentation", "features"]
+Labels = dict[str, tuple[Tensor, LabelType]]
 
 AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int]
 """AttachIndexType is used to specify to which output of the prevoius node does the
@@ -36,12 +35,10 @@ def from_validation_error(cls, val_error: ValidationError, class_name: str):
         )
 
     @classmethod
-    def from_missing_label(
-        cls, label: LabelType, present_labels: list[LabelType], class_name: str
-    ):
+    def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str):
         return cls(
-            f"{class_name} requires {label} label, but it was not found in "
-            f"the label dictionary. Available labels: {present_labels}."
+            f"{class_name} requires {task} label, but it was not found in "
+            f"the label dictionary. Available labels: {present_tasks}."
         )
 
 
@@ -49,6 +46,15 @@ class BaseProtocol(BaseModel):
     class Config:
         arbitrary_types_allowed = True
 
+    @classmethod
+    def get_task(cls) -> str:
+        if len(cls.__annotations__) == 1:
+            return list(cls.__annotations__)[0]
+        raise ValueError(
+            "Protocol must have exactly one field for automatic task inference. "
+            "Implement custom `prepare` method in your attached module."
+        )
+
 
 class SegmentationProtocol(BaseProtocol):
     segmentation: Annotated[list[Tensor], Field(min_length=1)]
@@ -59,7 +65,7 @@ class KeypointProtocol(BaseProtocol):
 
 
 class BBoxProtocol(BaseProtocol):
-    boxes: Annotated[list[Tensor], Field(min_length=1)]
+    boundingbox: Annotated[list[Tensor], Field(min_length=1)]
 
 
 class FeaturesProtocol(BaseProtocol):
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 90299371..b750dd9c 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">76%</text>
-        <text x="80" y="14">76%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
+        <text x="80" y="14">77%</text>
     </g>
 </svg>
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 815a4bd5..73909431 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,15 +1,11 @@
-import glob
-import json
 import os
-import zipfile
 from pathlib import Path
 
-import cv2
 import gdown
-import numpy as np
 import pytest
 import torchvision
 from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.data.parsers import LuxonisParser
 from luxonis_ml.utils import environ
 
 Path(environ.LUXONISML_BASE_PATH).mkdir(exist_ok=True)
@@ -24,7 +20,7 @@ def create_dataset(name: str) -> LuxonisDataset:
 
 @pytest.fixture(scope="session", autouse=True)
 def create_coco_dataset():
-    dataset = create_dataset("coco_test")
+    dataset_name = "coco_test"
     url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
     output_folder = "../data/"
     output_zip = os.path.join(output_folder, "COCO_people_subset.zip")
@@ -37,96 +33,12 @@ def create_coco_dataset():
     ):
         gdown.download(url, output_zip, quiet=False)
 
-        with zipfile.ZipFile(output_zip, "r") as zip_ref:
-            zip_ref.extractall(output_folder)
-
-    def COCO_people_subset_generator():
-        img_dir = os.path.join(output_folder, "person_val2017_subset")
-        annot_file = os.path.join(output_folder, "person_keypoints_val2017.json")
-        im_paths = glob.glob(img_dir + "/*.jpg")
-        nums = np.array([int(Path(path).stem) for path in im_paths])
-        idxs = np.argsort(nums)
-        im_paths = list(np.array(im_paths)[idxs])
-        with open(annot_file) as file:
-            data = json.load(file)
-        imgs = data["images"]
-        anns = data["annotations"]
-
-        for path in im_paths:
-            gran = Path(path).name
-            img = [img for img in imgs if img["file_name"] == gran][0]
-            img_id = img["id"]
-            img_anns = [ann for ann in anns if ann["image_id"] == img_id]
-
-            im = cv2.imread(path)
-            height, width, _ = im.shape
-
-            if len(img_anns):
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "classification",
-                    "value": True,
-                }
-
-            for ann in img_anns:
-                seg = ann["segmentation"]
-                if isinstance(seg, list):
-                    poly = []
-                    for s in seg:
-                        poly_arr = np.array(s).reshape(-1, 2)
-                        poly += [
-                            (poly_arr[i, 0] / width, poly_arr[i, 1] / height)
-                            for i in range(len(poly_arr))
-                        ]
-                    yield {
-                        "file": path,
-                        "class": "person",
-                        "type": "polyline",
-                        "value": poly,
-                    }
-
-                x, y, w, h = ann["bbox"]
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "box",
-                    "value": (x / width, y / height, w / width, h / height),
-                }
-
-                kps = np.array(ann["keypoints"]).reshape(-1, 3)
-                keypoint = []
-                for kp in kps:
-                    keypoint.append(
-                        (float(kp[0] / width), float(kp[1] / height), int(kp[2]))
-                    )
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "keypoints",
-                    "value": keypoint,
-                }
-
-    dataset.set_classes(["person"])
-
-    annot_file = os.path.join(output_folder, "person_keypoints_val2017.json")
-    with open(annot_file) as file:
-        data = json.load(file)
-    dataset.set_skeletons(
-        {
-            "person": {
-                "labels": data["categories"][0]["keypoints"],
-                "edges": (np.array(data["categories"][0]["skeleton"]) - 1).tolist(),
-            }
-        }
-    )
-    dataset.add(COCO_people_subset_generator())
-    dataset.make_splits()
+    parser = LuxonisParser(output_zip, dataset_name=dataset_name, delete_existing=True)
+    parser.parse(random_split=True)
 
 
-@pytest.fixture(scope="session", autouse=True)
-def create_cifar10_dataset():
-    dataset = create_dataset("cifar10_test")
+def _create_cifar10(dataset_name: str, task_names: list[str]) -> None:
+    dataset = create_dataset(dataset_name)
     output_folder = "../data/"
     if not os.path.exists(output_folder):
         os.makedirs(output_folder)
@@ -152,14 +64,25 @@ def CIFAR10_subset_generator():
                 break
             path = os.path.join(output_folder, f"cifar_{i}.png")
             image.save(path)
-            yield {
-                "file": path,
-                "class": classes[label],
-                "type": "classification",
-                "value": True,
-            }
-
-    dataset.set_classes(classes)
+            for task_name in task_names:
+                yield {
+                    "file": path,
+                    "annotation": {
+                        "type": "classification",
+                        "task": task_name,
+                        "class": classes[label],
+                    },
+                }
 
     dataset.add(CIFAR10_subset_generator())
     dataset.make_splits()
+
+
+@pytest.fixture(scope="session", autouse=True)
+def create_cifar10_dataset():
+    _create_cifar10("cifar10_test", ["classification"])
+
+
+@pytest.fixture(scope="session", autouse=True)
+def create_cifar10_task_dataset():
+    _create_cifar10("cifar10_task_test", [f"classification_{i}" for i in [1, 2, 3]])
diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py
index b5c8b299..a54be4b6 100644
--- a/tests/unittests/test_utils/test_loaders/test_base_loader.py
+++ b/tests/unittests/test_utils/test_loaders/test_base_loader.py
@@ -12,27 +12,25 @@ def test_collate_fn():
     batch = [
         (
             torch.rand(3, 224, 224, dtype=torch.float32),
-            {"default": {LabelType.CLASSIFICATION: torch.tensor([1, 0])}},
+            {"classification": (torch.tensor([1, 0]), LabelType.CLASSIFICATION)},
         ),
         (
             torch.rand(3, 224, 224, dtype=torch.float32),
-            {"default": {LabelType.CLASSIFICATION: torch.tensor([0, 1])}},
+            {"classification": (torch.tensor([0, 1]), LabelType.CLASSIFICATION)},
         ),
     ]
 
     # Call collate_fn
-    imgs, annotations = collate_fn(batch)
+    imgs, annotations = collate_fn(batch)  # type: ignore
 
     # Check images tensor
     assert imgs.shape == (2, 3, 224, 224)
     assert imgs.dtype == torch.float32
 
     # Check annotations
-    assert "default" in annotations
-    annotations = annotations["default"]
-    assert LabelType.CLASSIFICATION in annotations
-    assert annotations[LabelType.CLASSIFICATION].shape == (2, 2)
-    assert annotations[LabelType.CLASSIFICATION].dtype == torch.int64
+    assert "classification" in annotations
+    assert annotations["classification"][0].shape == (2, 2)
+    assert annotations["classification"][0].dtype == torch.int64
 
     # TODO: test also segmentation, boundingbox and keypoint
 

From abe7d3dc8fa18a106bc96687ef07c746feceea9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Fri, 7 Jun 2024 20:02:03 +0200
Subject: [PATCH 26/75] Changed Imports in Config (#38)

Co-authored-by: GitHub Actions <actions@github.com>
---
 luxonis_train/utils/config.py | 7 ++++---
 media/coverage_badge.svg      | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 31fd55ee..74a8e6a5 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -5,9 +5,6 @@
 from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
 from pydantic import BaseModel, ConfigDict, Field, model_validator
 
-from luxonis_train.utils.general import is_acyclic
-from luxonis_train.utils.registry import MODELS
-
 logger = logging.getLogger(__name__)
 
 
@@ -65,6 +62,8 @@ class ModelConfig(CustomBaseModel):
 
     @model_validator(mode="after")
     def check_predefined_model(self):
+        from luxonis_train.utils.registry import MODELS
+
         if self.predefined_model:
             logger.info(f"Using predefined model: `{self.predefined_model.name}`")
             model = MODELS.get(self.predefined_model.name)(
@@ -85,6 +84,8 @@ def check_predefined_model(self):
 
     @model_validator(mode="after")
     def check_graph(self):
+        from luxonis_train.utils.general import is_acyclic
+
         graph = {node.alias or node.name: node.inputs for node in self.nodes}
         if not is_acyclic(graph):
             raise ValueError("Model graph is not acyclic.")
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index b750dd9c..90299371 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
-        <text x="80" y="14">77%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">76%</text>
+        <text x="80" y="14">76%</text>
     </g>
 </svg>

From 669a02321345542e1b28b6da00ac9bd3334cab3e Mon Sep 17 00:00:00 2001
From: CaptainTrojan <49991681+CaptainTrojan@users.noreply.github.com>
Date: Thu, 13 Jun 2024 04:55:36 +0200
Subject: [PATCH 27/75] Multi Input Support (#36)

Co-authored-by: GitHub Actions <actions@github.com>
Co-authored-by: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
---
 .github/workflows/tests.yaml                  |   2 +-
 configs/example_multi_input.yaml              |  99 +++++++++++
 .../attached_modules/base_attached_module.py  |  15 +-
 .../losses/adaptive_detection_loss.py         |   2 +-
 .../metrics/mean_average_precision.py         |   2 +-
 .../mean_average_precision_keypoints.py       |   2 +-
 .../metrics/object_keypoint_similarity.py     |   2 +-
 .../attached_modules/visualizers/utils.py     |   5 +-
 luxonis_train/core/core.py                    |   1 +
 luxonis_train/models/luxonis_model.py         | 144 ++++++++++------
 luxonis_train/models/luxonis_output.py        |   4 +-
 luxonis_train/nodes/base_node.py              |  14 +-
 luxonis_train/nodes/bisenet_head.py           |   2 +-
 luxonis_train/nodes/classification_head.py    |   4 +-
 luxonis_train/nodes/efficient_bbox_head.py    |   2 +-
 .../nodes/implicit_keypoint_bbox_head.py      |   2 +-
 luxonis_train/nodes/segmentation_head.py      |   3 +-
 luxonis_train/utils/boxutils.py               |   4 +-
 luxonis_train/utils/config.py                 |   4 +-
 luxonis_train/utils/general.py                |   4 +-
 luxonis_train/utils/loaders/base_loader.py    |  71 ++++++--
 .../utils/loaders/luxonis_loader_torch.py     |  11 +-
 media/coverage_badge.svg                      |   4 +-
 tests/integration/test_multi_input.py         | 156 ++++++++++++++++++
 .../test_loaders/test_base_loader.py          |  64 +++++--
 25 files changed, 508 insertions(+), 115 deletions(-)
 create mode 100644 configs/example_multi_input.yaml
 create mode 100644 tests/integration/test_multi_input.py

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 0b4f51da..af77c60f 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, windows-latest, macOS-latest]
+        os: [ubuntu-latest, windows-latest]
         version: ['3.10', '3.11']
 
     runs-on: ${{ matrix.os }}
diff --git a/configs/example_multi_input.yaml b/configs/example_multi_input.yaml
new file mode 100644
index 00000000..7d4d252b
--- /dev/null
+++ b/configs/example_multi_input.yaml
@@ -0,0 +1,99 @@
+loader:
+
+  # Yields "left", "right", "disparity", and "pointcloud" inputs.
+  # See implementation in `tests/integration/test_multi_input.py`.
+  name: CustomMultiInputLoader
+
+  # Name of the key in the batch that contains image-like data.
+  # Needs to be set for visualizers and evaluators to work.
+  image_source: left
+
+use_rich_text: True
+
+model:
+  name: example_multi_input
+  nodes:
+    - name: FullBackbone
+      alias: full_backbone
+
+    - name: RGBDBackbone
+      alias: rgbd_backbone
+      input_sources:
+        - left
+        - right
+        - disparity
+
+    - name: PointcloudBackbone
+      alias: pointcloud_backbone
+      input_sources:
+        - pointcloud
+
+    - name: FusionNeck
+      alias: fusion_neck
+      inputs:
+        - rgbd_backbone
+        - pointcloud_backbone
+      input_sources:
+        - disparity
+
+    - name: FusionNeck2
+      alias: fusion_neck_2
+      inputs:
+        - rgbd_backbone
+        - pointcloud_backbone
+        - full_backbone
+
+    - name: CustomSegHead1
+      alias: head_1
+      inputs:
+        - fusion_neck
+
+    - name: CustomSegHead2
+      alias: head_2
+      inputs:
+        - fusion_neck
+        - fusion_neck_2
+      input_sources:
+        - disparity
+
+  losses:
+    - name: BCEWithLogitsLoss
+      alias: loss_1
+      attached_to: head_1
+
+    - name: BCEWithLogitsLoss
+      alias: loss_2
+      attached_to: head_2
+
+  metrics:
+    - name: JaccardIndex
+      alias: jaccard_index_1
+      attached_to: head_1
+      is_main_metric: True
+      params:
+        task: binary
+
+    - name: JaccardIndex
+      alias: jaccard_index_2
+      attached_to: head_2
+      params:
+        task: binary
+
+trainer:
+  batch_size: 8
+  epochs: 3
+  num_workers: 4
+  validation_interval: 3
+  num_log_images: -1
+
+  callbacks:
+    - name: ExportOnTrainEnd
+
+  optimizer:
+    name: Adam
+    params:
+      lr: 0.01
+
+exporter:
+  onnx:
+    opset_version: 11
diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py
index 1e446fbb..6ac47820 100644
--- a/luxonis_train/attached_modules/base_attached_module.py
+++ b/luxonis_train/attached_modules/base_attached_module.py
@@ -1,3 +1,4 @@
+import logging
 from abc import ABC
 from typing import Generic
 
@@ -16,6 +17,8 @@
     Packet,
 )
 
+logger = logging.getLogger(__name__)
+
 Ts = TypeVarTuple("Ts")
 
 
@@ -143,8 +146,16 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
         x = self.get_input_tensors(inputs)
         label, label_type = self.get_label(labels)
         if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
-            if isinstance(x, list) and len(x) == 1:
-                x = x[0]
+            if isinstance(x, list):
+                if len(x) == 1:
+                    x = x[0]
+                else:
+                    logger.warning(
+                        f"Module {self.__class__.__name__} expects a single tensor as input, "
+                        f"but got {len(x)} tensors. Using the last tensor. "
+                        f"If this is not the desired behavior, please override the `prepare` method of the attached module or the `wrap` method of {self.node.__class__.__name__}."
+                    )
+                    x = x[-1]
 
         return x, label  # type: ignore
 
diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index 521b6d8e..21291bfa 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -82,7 +82,7 @@ def __init__(
         self.stride = self.node.stride
         self.grid_cell_size = self.node.grid_cell_size
         self.grid_cell_offset = self.node.grid_cell_offset
-        self.original_img_size = self.node.original_in_shape[2:]
+        self.original_img_size = self.node.original_in_shape[1:]
 
         self.n_warmup_epochs = n_warmup_epochs
         self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes)
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index 680b0e5a..67c010ec 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -41,7 +41,7 @@ def prepare(
         label = labels[self.task][0]
         output_nms = self.get_input_tensors(outputs)
 
-        image_size = self.node.original_in_shape[2:]
+        image_size = self.node.original_in_shape[1:]
 
         output_list: list[dict[str, Tensor]] = []
         label_list: list[dict[str, Tensor]] = []
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
index 42b1395d..31bc7557 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
@@ -109,7 +109,7 @@ def prepare(self, outputs: Packet[Tensor], labels: Labels):
 
         output_list_kpt_map = []
         label_list_kpt_map = []
-        image_size = self.node.original_in_shape[2:]
+        image_size = self.node.original_in_shape[1:]
 
         output_kpts: list[Tensor] = outputs["keypoints"]
         output_bboxes: list[Tensor] = outputs["boundingbox"]
diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
index 959108c4..c1768012 100644
--- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
+++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
@@ -79,7 +79,7 @@ def prepare(
 
         output_list_oks = []
         label_list_oks = []
-        image_size = self.node.original_in_shape[2:]
+        image_size = self.node.original_in_shape[1:]
 
         for i, pred_kpt in enumerate(outputs["keypoints"]):
             output_list_oks.append({"keypoints": pred_kpt})
diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py
index aa1a90d3..c55b12ce 100644
--- a/luxonis_train/attached_modules/visualizers/utils.py
+++ b/luxonis_train/attached_modules/visualizers/utils.py
@@ -220,7 +220,10 @@ def unnormalize(
     return out_img
 
 
-def get_unnormalized_images(cfg: Config, images: Tensor) -> Tensor:
+def get_unnormalized_images(cfg: Config, inputs: dict[str, Tensor]) -> Tensor:
+    # Get images from inputs according to config
+    images = inputs[cfg.loader.image_source]
+
     normalize_params = cfg.trainer.preprocessing.normalize.params
     mean = std = None
     if cfg.trainer.preprocessing.normalize.active:
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 1ac3fce0..c1b1fb56 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -147,6 +147,7 @@ def __init__(
                     if view == "train"
                     else self.cfg.loader.val_view
                 ),
+                image_source=self.cfg.loader.image_source,
                 **self.cfg.loader.params,
             )
             for view in ["train", "val", "test"]
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index e2568ec0..2daf61cb 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -33,7 +33,7 @@
 from luxonis_train.utils.config import AttachedModuleConfig, Config
 from luxonis_train.utils.general import (
     DatasetMetadata,
-    get_shape_packet,
+    to_shape_packet,
     traverse_graph,
 )
 from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry
@@ -100,7 +100,7 @@ def __init__(
         self,
         cfg: Config,
         save_dir: str,
-        input_shape: list[int] | Size,
+        input_shape: dict[str, Size],
         dataset_metadata: DatasetMetadata | None = None,
         **kwargs,
     ):
@@ -110,8 +110,9 @@ def __init__(
         @param cfg: Config object.
         @type save_dir: str
         @param save_dir: Directory to save checkpoints.
-        @type input_shape: list[int] | L{Size}
-        @param input_shape: Shape of the input tensor.
+        @type input_shape: dict[str, Size]
+        @param input_shape: Dictionary of input shapes. Keys are input names, values are
+            shapes.
         @type dataset_metadata: L{DatasetMetadata} | None
         @param dataset_metadata: Dataset metadata.
         @type kwargs: Any
@@ -123,11 +124,13 @@ def __init__(
         self._export: bool = False
 
         self.cfg = cfg
-        self.original_in_shape = Size(input_shape)
+        self.original_in_shape = input_shape
+        self.image_source = cfg.loader.image_source
         self.dataset_metadata = dataset_metadata or DatasetMetadata()
         self.frozen_nodes: list[tuple[nn.Module, int]] = []
         self.graph: dict[str, list[str]] = {}
-        self.input_shapes: dict[str, list[Size]] = {}
+        self.loader_input_shapes: dict[str, dict[str, Size]] = {}
+        self.node_input_sources: dict[str, list[str]] = defaultdict(list)
         self.loss_weights: dict[str, float] = {}
         self.main_metric: str | None = None
         self.save_dir = save_dir
@@ -157,8 +160,36 @@ def __init__(
                     unfreeze_after = int(node_cfg.freezing.unfreeze_after * epochs)
                 frozen_nodes.append((node_name, unfreeze_after))
             nodes[node_name] = (Node, {**node_cfg.params, "task": node_cfg.task})
-            if not node_cfg.inputs:
-                self.input_shapes[node_name] = [Size(input_shape)]
+
+            # Handle inputs for this node
+            if node_cfg.input_sources:
+                self.node_input_sources[node_name] = node_cfg.input_sources
+
+            if not node_cfg.inputs and not node_cfg.input_sources:
+                # If no inputs (= preceding nodes) nor any input_sources (= loader outputs) are specified,
+                # assume the node is the starting node and takes all inputs from the loader.
+
+                self.loader_input_shapes[node_name] = {
+                    k: Size(v) for k, v in input_shape.items()
+                }
+                self.node_input_sources[node_name] = list(input_shape.keys())
+            else:
+                # For each input_source, check if the loader provides the required output.
+                # If yes, add the shape to the input_shapes dict. If not, raise an error.
+                self.loader_input_shapes[node_name] = {}
+                for input_source in node_cfg.input_sources:
+                    if input_source not in input_shape:
+                        raise ValueError(
+                            f"Node {node_name} requires input source {input_source}, "
+                            "which is not provided by the loader."
+                        )
+
+                    self.loader_input_shapes[node_name][input_source] = Size(
+                        input_shape[input_source]
+                    )
+
+                # Inputs (= preceding nodes) are handled in the _initiate_nodes method.
+
             self.graph[node_name] = node_cfg.inputs
 
         self.nodes = self._initiate_nodes(nodes)
@@ -210,44 +241,48 @@ def _initiate_nodes(
         """
         initiated_nodes: dict[str, BaseNode] = {}
 
-        dummy_outputs: dict[str, Packet[Tensor]] = {
-            f"__{node_name}_input__": {
-                "features": [torch.zeros(2, *shape[1:]) for shape in shapes]
-            }
-            for node_name, shapes in self.input_shapes.items()
+        dummy_inputs: dict[str, Packet[Tensor]] = {
+            source_name: {"features": [torch.zeros(2, *shape)]}
+            for shapes in self.loader_input_shapes.values()
+            for source_name, shape in shapes.items()
         }
 
         for node_name, (Node, node_kwargs), node_input_names, _ in traverse_graph(
             self.graph, nodes
         ):
-            node_input_shapes: list[Packet[Size]] = []
             node_dummy_inputs: list[Packet[Tensor]] = []
+            """List of dummy input packets for the node.
 
-            if not node_input_names:
-                node_input_names = [f"__{node_name}_input__"]
+            The first one is always from the loader.
+            """
+            node_input_shapes: list[Packet[Size]] = []
+            """Corresponding list of input shapes."""
 
+            node_input_names += self.node_input_sources[node_name]
             for node_input_name in node_input_names:
-                dummy_output = dummy_outputs[node_input_name]
-                shape_packet = get_shape_packet(dummy_output)
+                dummy_input = dummy_inputs[node_input_name]
+
+                node_dummy_inputs.append(dummy_input)
+
+                shape_packet = to_shape_packet(dummy_input)
                 node_input_shapes.append(shape_packet)
-                node_dummy_inputs.append(dummy_output)
 
-                node = Node(
-                    input_shapes=node_input_shapes,
-                    original_in_shape=self.original_in_shape,
-                    dataset_metadata=self.dataset_metadata,
-                    **node_kwargs,
-                )
-                node_outputs = node.run(node_dummy_inputs)
+            node = Node(
+                input_shapes=node_input_shapes,
+                original_in_shape=self.original_in_shape[self.image_source],
+                dataset_metadata=self.dataset_metadata,
+                **node_kwargs,
+            )
+            node_outputs = node.run(node_dummy_inputs)
 
-                dummy_outputs[node_name] = node_outputs
-                initiated_nodes[node_name] = node
+            dummy_inputs[node_name] = node_outputs
+            initiated_nodes[node_name] = node
 
         return nn.ModuleDict(initiated_nodes)
 
     def forward(
         self,
-        inputs: Tensor,
+        inputs: dict[str, Tensor],
         labels: Labels | None = None,
         images: Tensor | None = None,
         *,
@@ -277,27 +312,23 @@ def forward(
         @rtype: L{LuxonisOutput}
         @return: Output of the model.
         """
-        input_node_name = list(self.input_shapes.keys())[0]
-        input_dict = {input_node_name: [inputs]}
-
         losses: dict[
             str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]]
         ] = defaultdict(dict)
         visualizations: dict[str, dict[str, Tensor]] = defaultdict(dict)
 
-        computed: dict[str, Packet[Tensor]] = {
-            f"__{node_name}_input__": {"features": input_tensors}
-            for node_name, input_tensors in input_dict.items()
-        }
+        computed: dict[str, Packet[Tensor]] = {}
         for node_name, node, input_names, unprocessed in traverse_graph(
             self.graph, cast(dict[str, BaseNode], self.nodes)
         ):
-            # Special input for the first node. Will be changed when
-            # multiple inputs will be supported in `luxonis-ml.data`.
-            if not input_names:
-                input_names = [f"__{node_name}_input__"]
+            input_names += self.node_input_sources[node_name]
 
-            node_inputs = [computed[pred] for pred in input_names]
+            node_inputs: list[Packet[Tensor]] = []
+            for pred in input_names:
+                if pred in computed:
+                    node_inputs.append(computed[pred])
+                else:
+                    node_inputs.append({"features": [inputs[pred]]})
             outputs = node.run(node_inputs)
             computed[node_name] = outputs
 
@@ -389,18 +420,22 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]:
         """
 
         inputs = {
-            name: [torch.zeros(shape).to(self.device) for shape in shapes]
-            for name, shapes in self.input_shapes.items()
+            input_name: torch.zeros([1, *shape]).to(self.device)
+            for shapes in self.loader_input_shapes.values()
+            for input_name, shape in shapes.items()
+        }
+
+        inputs_deep_clone = {
+            k: torch.zeros(elem.shape).to(self.device) for k, elem in inputs.items()
         }
 
-        # TODO: multiple inputs
-        inp = list(inputs.values())[0][0]
+        inputs_for_onnx = {"inputs": inputs_deep_clone}
 
         for module in self.modules():
             if isinstance(module, BaseNode):
                 module.set_export_mode()
 
-        outputs = self.forward(inp.clone()).outputs
+        outputs = self.forward(inputs_deep_clone).outputs
         output_order = sorted(
             [
                 (node_name, output_name, i)
@@ -440,10 +475,13 @@ def export_forward(inputs) -> tuple[Tensor, ...]:
             )
 
         self.forward = export_forward  # type: ignore
+
+        if "input_names" not in kwargs:
+            kwargs["input_names"] = list(inputs.keys())
         if "output_names" not in kwargs:
             kwargs["output_names"] = output_names
 
-        self.to_onnx(save_path, inp, **kwargs)
+        self.to_onnx(save_path, inputs_for_onnx, **kwargs)
 
         self.forward = old_forward  # type: ignore
 
@@ -496,7 +534,7 @@ def process_losses(
         training_step_output["loss"] = final_loss.detach().cpu()
         return final_loss, training_step_output
 
-    def training_step(self, train_batch: tuple[Tensor, Labels]) -> Tensor:
+    def training_step(self, train_batch: tuple[dict[str, Tensor], Labels]) -> Tensor:
         """Performs one step of training with provided batch."""
         outputs = self.forward(*train_batch)
         assert outputs.losses, "Losses are empty, check if you have defined any loss"
@@ -505,11 +543,15 @@ def training_step(self, train_batch: tuple[Tensor, Labels]) -> Tensor:
         self.training_step_outputs.append(training_step_output)
         return loss
 
-    def validation_step(self, val_batch: tuple[Tensor, Labels]) -> dict[str, Tensor]:
+    def validation_step(
+        self, val_batch: tuple[dict[str, Tensor], Labels]
+    ) -> dict[str, Tensor]:
         """Performs one step of validation with provided batch."""
         return self._evaluation_step("val", val_batch)
 
-    def test_step(self, test_batch: tuple[Tensor, Labels]) -> dict[str, Tensor]:
+    def test_step(
+        self, test_batch: tuple[dict[str, Tensor], Labels]
+    ) -> dict[str, Tensor]:
         """Performs one step of testing with provided batch."""
         return self._evaluation_step("test", test_batch)
 
@@ -549,7 +591,7 @@ def get_status_percentage(self) -> float:
             return (self.current_epoch / self.cfg.trainer.epochs) * 100
 
     def _evaluation_step(
-        self, mode: Literal["test", "val"], batch: tuple[Tensor, Labels]
+        self, mode: Literal["test", "val"], batch: tuple[dict[str, Tensor], Labels]
     ) -> dict[str, Tensor]:
         inputs, labels = batch
         images = None
diff --git a/luxonis_train/models/luxonis_output.py b/luxonis_train/models/luxonis_output.py
index e6b8e16c..d69943fc 100644
--- a/luxonis_train/models/luxonis_output.py
+++ b/luxonis_train/models/luxonis_output.py
@@ -3,7 +3,7 @@
 
 from torch import Tensor
 
-from luxonis_train.utils.general import get_shape_packet
+from luxonis_train.utils.general import to_shape_packet
 from luxonis_train.utils.types import Packet
 
 
@@ -16,7 +16,7 @@ class LuxonisOutput:
 
     def __str__(self) -> str:
         outputs = {
-            node_name: get_shape_packet(packet)
+            node_name: to_shape_packet(packet)
             for node_name, packet in self.outputs.items()
         }
         viz = {
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 327c8d8f..8ee03591 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -191,13 +191,13 @@ def in_sizes(self) -> Size | list[Size]:
 
         Example:
 
-            >>> input_shapes = [{"features": [Size(1, 64, 128, 128), Size(1, 3, 224, 224)]}]
+            >>> input_shapes = [{"features": [Size(64, 128, 128), Size(3, 224, 224)]}]
             >>> attach_index = -1
-            >>> in_sizes = Size(1, 3, 224, 224)
+            >>> in_sizes = Size(3, 224, 224)
 
-            >>> input_shapes = [{"features": [Size(1, 64, 128, 128), Size(1, 3, 224, 224)]}]
+            >>> input_shapes = [{"features": [Size(64, 128, 128), Size(3, 224, 224)]}]
             >>> attach_index = "all"
-            >>> in_sizes = [Size(1, 64, 128, 128), Size(1, 3, 224, 224)]
+            >>> in_sizes = [Size(64, 128, 128), Size(3, 224, 224)]
 
         @type: Size | list[Size]
         @raises IncompatibleException: If the C{input_shapes} are too complicated for
@@ -230,7 +230,7 @@ def in_channels(self) -> int | list[int]:
         @raises IncompatibleException: If the C{input_shapes} are too complicated for
             the default implementation.
         """
-        return self._get_nth_size(1)
+        return self._get_nth_size(-3)
 
     @property
     def in_height(self) -> int | list[int]:
@@ -243,7 +243,7 @@ def in_height(self) -> int | list[int]:
         @raises IncompatibleException: If the C{input_shapes} are too complicated for
             the default implementation.
         """
-        return self._get_nth_size(2)
+        return self._get_nth_size(-2)
 
     @property
     def in_width(self) -> int | list[int]:
@@ -256,7 +256,7 @@ def in_width(self) -> int | list[int]:
         @raises IncompatibleException: If the C{input_shapes} are too complicated for
             the default implementation.
         """
-        return self._get_nth_size(3)
+        return self._get_nth_size(-1)
 
     @property
     def export(self) -> bool:
diff --git a/luxonis_train/nodes/bisenet_head.py b/luxonis_train/nodes/bisenet_head.py
index 9185d823..8bac3573 100644
--- a/luxonis_train/nodes/bisenet_head.py
+++ b/luxonis_train/nodes/bisenet_head.py
@@ -32,7 +32,7 @@ def __init__(
         """
         super().__init__(task=LabelType.SEGMENTATION, **kwargs)
 
-        original_height = self.original_in_shape[2]
+        original_height = self.original_in_shape[1]
         upscale_factor = 2 ** infer_upscale_factor(self.in_height, original_height)
         out_channels = self.n_classes * upscale_factor * upscale_factor
 
diff --git a/luxonis_train/nodes/classification_head.py b/luxonis_train/nodes/classification_head.py
index 7e55a590..ceadbc60 100644
--- a/luxonis_train/nodes/classification_head.py
+++ b/luxonis_train/nodes/classification_head.py
@@ -19,9 +19,7 @@ def __init__(
         @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults
             to C{0.2}.
         """
-        super().__init__(
-            _task_type=kwargs.pop("_task_type", LabelType.CLASSIFICATION), **kwargs
-        )
+        super().__init__(_task_type=LabelType.CLASSIFICATION, **kwargs)
 
         self.head = nn.Sequential(
             nn.AdaptiveAvgPool2d(1),
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/efficient_bbox_head.py
index 97ee1bfc..e7b23288 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/efficient_bbox_head.py
@@ -126,7 +126,7 @@ def _fit_stride_to_num_heads(self):
         """Returns correct stride for number of heads and attach index."""
         stride = torch.tensor(
             [
-                self.original_in_shape[2] / x[2]  # type: ignore
+                self.original_in_shape[1] / x[1]  # type: ignore
                 for x in self.in_sizes[: self.n_heads]
             ],
             dtype=torch.int,
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index 431dcf46..dde27ed5 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -218,7 +218,7 @@ def _fit_to_num_heads(self, channel_list: list):
         out_channel_list = channel_list[: self.num_heads]
         stride = torch.tensor(
             [
-                self.original_in_shape[2] / h
+                self.original_in_shape[1] / h
                 for h in cast(list[int], self.in_height)[: self.num_heads]
             ],
             dtype=torch.int,
diff --git a/luxonis_train/nodes/segmentation_head.py b/luxonis_train/nodes/segmentation_head.py
index 5955953d..67461eb0 100644
--- a/luxonis_train/nodes/segmentation_head.py
+++ b/luxonis_train/nodes/segmentation_head.py
@@ -4,7 +4,6 @@
 @license: U{BSD-3 <https://github.com/pytorch/vision/blob/main/LICENSE>}
 """
 
-
 import torch.nn as nn
 from torch import Tensor
 
@@ -29,7 +28,7 @@ def __init__(self, **kwargs):
         """
         super().__init__(_task_type=LabelType.SEGMENTATION, **kwargs)
 
-        original_height = self.original_in_shape[2]
+        original_height = self.original_in_shape[1]
         num_up = infer_upscale_factor(self.in_height, original_height, strict=False)
 
         modules = []
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 15fca04f..3a26cc4f 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -433,7 +433,9 @@ def anchors_from_dataset(
                 widths.append(curr_wh)
         inputs = inp
     assert inputs is not None, "No inputs found in data loader"
-    _, _, h, w = inputs.shape  # assuming all images are same size
+    _, _, h, w = inputs[
+        loader.dataset.image_source  # type: ignore
+    ].shape  # assuming all images are same size
     img_size = torch.tensor([w, h])
     wh = torch.vstack(widths) * img_size
 
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 74a8e6a5..96d132ab 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -35,7 +35,8 @@ class FreezingConfig(CustomBaseModel):
 class ModelNodeConfig(CustomBaseModel):
     name: str
     alias: str | None = None
-    inputs: list[str] = []
+    inputs: list[str] = []  # From preceding nodes
+    input_sources: list[str] = []  # From data loader
     params: dict[str, Any] = {}
     freezing: FreezingConfig = FreezingConfig()
     task: str | None = None
@@ -132,6 +133,7 @@ class TrackerConfig(CustomBaseModel):
 
 class LoaderConfig(CustomBaseModel):
     name: str = "LuxonisLoaderTorch"
+    image_source: str = "image"
     train_view: str = "train"
     val_view: str = "val"
     test_view: str = "test"
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index 21c35df0..099beb66 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -1,5 +1,6 @@
 import logging
 import math
+from copy import deepcopy
 from typing import Generator, TypeVar
 
 from pydantic import BaseModel
@@ -210,7 +211,7 @@ def infer_upscale_factor(
         )
 
 
-def get_shape_packet(packet: Packet[Tensor]) -> Packet[Size]:
+def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]:
     shape_packet: Packet[Size] = {}
     for name, value in packet.items():
         shape_packet[name] = [x.shape for x in value]
@@ -281,6 +282,7 @@ def traverse_graph(
     )  # sort the set to allow reproducibility
     processed: set[str] = set()
 
+    graph = deepcopy(graph)
     while unprocessed_nodes:
         unprocessed_nodes_copy = unprocessed_nodes.copy()
         for node_name in unprocessed_nodes_copy:
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index c3f5e141..c4f22428 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -9,8 +9,8 @@
 from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.types import Labels, LabelType
 
-LuxonisLoaderTorchOutput = tuple[Tensor, Labels]
-"""LuxonisLoaderTorchOutput is a tuple of images and corresponding labels."""
+LuxonisLoaderTorchOutput = tuple[dict[str, Tensor], Labels]
+"""LuxonisLoaderTorchOutput is a tuple of source tensors and corresponding labels."""
 
 
 class BaseLoaderTorch(
@@ -27,14 +27,57 @@ def __init__(
         self,
         view: str,
         augmentations: Augmentations | None = None,
+        image_source: str | None = None,
     ):
         self.view = view
         self.augmentations = augmentations
+        self._image_source = image_source
+
+    @property
+    def image_source(self) -> str:
+        """Name of the input image group.
+
+        Example: 'image'
+        """
+        if self._image_source is None:
+            raise ValueError("image_source is not set")
+        return self._image_source
 
     @property
     @abstractmethod
-    def input_shape(self) -> Size:
-        """Input shape in [N,C,H,W] format."""
+    def input_shape(self) -> dict[str, Size]:
+        """
+        Shape of each loader group (sub-element), WITHOUT batch dimension.
+        Examples:
+
+        1. Single image input:
+            {
+                'image': torch.Size([3, 224, 224]),
+            }
+
+        2. Image and segmentation input:
+            {
+                'image': torch.Size([3, 224, 224]),
+                'segmentation': torch.Size([1, 224, 224]),
+            }
+
+        3. Left image, right image and disparity input:
+            {
+                'left': torch.Size([3, 224, 224]),
+                'right': torch.Size([3, 224, 224]),
+                'disparity': torch.Size([1, 224, 224]),
+            }
+
+        4. Image, keypoints, and point cloud input:
+            {
+                'image': torch.Size([3, 224, 224]),
+                'keypoints': torch.Size([17, 2]),
+                'point_cloud': torch.Size([20000, 3]),
+            }
+
+        @rtype: dict[str, Size]
+        @return: A dictionary mapping group names to their shapes.
+        """
         ...
 
     @abstractmethod
@@ -74,18 +117,21 @@ def get_skeletons(self) -> dict[str, dict] | None:
 
 def collate_fn(
     batch: list[LuxonisLoaderTorchOutput],
-) -> tuple[Tensor, Labels]:
+) -> tuple[dict[str, Tensor], Labels]:
     """Default collate function used for training.
 
     @type batch: list[LuxonisLoaderTorchOutput]
-    @param batch: List of images and their annotations in the LuxonisLoaderTorchOutput
-        format.
-    @rtype: tuple[Tensor, dict[LabelType, Tensor]]
-    @return: Tuple of images and annotations in the format expected by the model.
+    @param batch: List of loader outputs (dict of Tensors) and labels (dict of Tensors)
+        in the LuxonisLoaderTorchOutput format.
+    @rtype: tuple[dict[str, Tensor], dict[LabelType, Tensor]]
+    @return: Tuple of inputs and annotations in the format expected by the model.
     """
-    imgs: tuple[Tensor, ...]
+    inputs: tuple[dict[str, Tensor], ...]
     labels: tuple[Labels, ...]
-    imgs, labels = zip(*batch)
+    inputs, labels = zip(*batch)
+
+    out_inputs = {k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys()}
+    out_labels = {task: {} for task in labels[0].keys()}
 
     out_labels = {}
 
@@ -104,5 +150,4 @@ def collate_fn(
                 label_box.append(l_box)
             out_labels[task] = torch.cat(label_box, 0), label_type
 
-    # exit()
-    return torch.stack(imgs, 0), out_labels
+    return out_inputs, out_labels
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index a6b9bf82..094bc96a 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -42,19 +42,20 @@ def __len__(self) -> int:
         return len(self.base_loader)
 
     @property
-    def input_shape(self) -> Size:
-        img, _ = self[0]
-        return Size([1, *img.shape])
+    def input_shape(self) -> dict[str, Size]:
+        img = self[0][0][self.image_source]
+        return {self.image_source: img.shape}
 
     def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
         img, labels = self.base_loader[idx]
 
         img = np.transpose(img, (2, 0, 1))  # HWC to CHW
         tensor_img = Tensor(img)
+        tensor_labels = {}
         for task, (array, label_type) in labels.items():
-            labels[task] = (Tensor(array), label_type)  # type: ignore
+            tensor_labels[task] = (Tensor(array), label_type)
 
-        return tensor_img, labels
+        return {self.image_source: tensor_img}, tensor_labels
 
     def get_classes(self) -> dict[str, list[str]]:
         _, classes = self.dataset.get_classes()
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 90299371..b750dd9c 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">76%</text>
-        <text x="80" y="14">76%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
+        <text x="80" y="14">77%</text>
     </g>
 </svg>
diff --git a/tests/integration/test_multi_input.py b/tests/integration/test_multi_input.py
new file mode 100644
index 00000000..8f1eef23
--- /dev/null
+++ b/tests/integration/test_multi_input.py
@@ -0,0 +1,156 @@
+import os
+import shutil
+from pathlib import Path
+
+import pytest
+import torch
+from torch import Tensor
+from torch.nn.parameter import Parameter
+
+from luxonis_train.core import Exporter, Inferer, Trainer
+from luxonis_train.nodes import BaseNode
+from luxonis_train.utils.loaders import BaseLoaderTorch
+from luxonis_train.utils.types import FeaturesProtocol, LabelType
+
+
+class CustomMultiInputLoader(BaseLoaderTorch):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @property
+    def input_shape(self):
+        return {
+            "left": torch.Size([3, 224, 224]),
+            "right": torch.Size([3, 224, 224]),
+            "disparity": torch.Size([1, 224, 224]),
+            "pointcloud": torch.Size([1000, 3]),
+        }
+
+    def __getitem__(self, idx):
+        # Fake data
+        left = torch.rand(3, 224, 224, dtype=torch.float32)
+        right = torch.rand(3, 224, 224, dtype=torch.float32)
+        disparity = torch.rand(1, 224, 224, dtype=torch.float32)
+        pointcloud = torch.rand(1000, 3, dtype=torch.float32)
+        inputs = {
+            "left": left,
+            "right": right,
+            "disparity": disparity,
+            "pointcloud": pointcloud,
+        }
+
+        # Fake labels
+        segmap = torch.zeros(1, 224, 224, dtype=torch.float32)
+        labels = {
+            "segmentation": (segmap, LabelType.SEGMENTATION),
+        }
+
+        return inputs, labels
+
+    def __len__(self):
+        return 10
+
+    def get_classes(self) -> dict[LabelType, list[str]]:
+        return {LabelType.SEGMENTATION: ["square"]}
+
+
+class MultiInputTestBaseNode(BaseNode):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.scalar = Parameter(torch.tensor(1.0), requires_grad=True)
+
+    def forward(self, inputs):
+        return [self.scalar * inp for inp in inputs]
+
+    def unwrap(self, inputs: list[dict[str, list[Tensor]]]):
+        return [item for inp in inputs for key in inp for item in inp[key]]
+
+
+class FullBackbone(MultiInputTestBaseNode):
+    def __init__(self, **kwargs):
+        in_protocols = [FeaturesProtocol] * 4
+        super().__init__(**kwargs)
+        self.in_protocols = in_protocols
+
+
+class RGBDBackbone(MultiInputTestBaseNode):
+    def __init__(self, **kwargs):
+        in_protocols = [FeaturesProtocol] * 3
+        super().__init__(**kwargs)
+        self.in_protocols = in_protocols
+
+
+class PointcloudBackbone(MultiInputTestBaseNode):
+    def __init__(self, **kwargs):
+        in_protocols = [FeaturesProtocol]
+        super().__init__(**kwargs)
+        self.in_protocols = in_protocols
+
+
+class FusionNeck(MultiInputTestBaseNode):
+    def __init__(self, **kwargs):
+        in_protocols = [
+            FeaturesProtocol,
+            FeaturesProtocol,
+            FeaturesProtocol,
+        ]
+        super().__init__(**kwargs)
+        self.in_protocols = in_protocols
+
+
+class FusionNeck2(MultiInputTestBaseNode):
+    def __init__(self, **kwargs):
+        in_protocols = [FeaturesProtocol, FeaturesProtocol, FeaturesProtocol]
+        super().__init__(**kwargs)
+        self.in_protocols = in_protocols
+
+
+class CustomSegHead1(MultiInputTestBaseNode):
+    def __init__(self, **kwargs):
+        in_protocols = [FeaturesProtocol]
+        super().__init__(**kwargs, _task_type=LabelType.SEGMENTATION)
+        self.in_protocols = in_protocols
+
+
+class CustomSegHead2(MultiInputTestBaseNode):
+    def __init__(self, **kwargs):
+        in_protocols = [
+            FeaturesProtocol,
+            FeaturesProtocol,
+            FeaturesProtocol,
+        ]
+        super().__init__(**kwargs, _task_type=LabelType.SEGMENTATION)
+        self.in_protocols = in_protocols
+
+
+@pytest.fixture(scope="function", autouse=True)
+def clear_output():
+    shutil.rmtree("output", ignore_errors=True)
+
+
+@pytest.mark.parametrize(
+    "config_file", [path for path in os.listdir("configs") if "multi_input" in path]
+)
+def test_sanity(config_file):
+    # Test training
+    trainer = Trainer(f"configs/{config_file}")
+    trainer.train()
+    # Test evaluation
+    trainer.test(view="val")
+
+    # Test export
+    Exporter(f"configs/{config_file}").export("test_export_multi_input.onnx")
+    # Cleanup after exporter
+    assert os.path.exists("test_export_multi_input.onnx")
+    os.remove("test_export_multi_input.onnx")
+
+    # Test inference
+    Inferer(
+        f"configs/{config_file}",
+        opts=None,
+        view="train",
+        save_dir=Path("infer_save_dir"),
+    ).infer()
+    # Cleanup after inferer
+    assert os.path.exists("infer_save_dir")
+    shutil.rmtree("infer_save_dir")
diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py
index a54be4b6..112b321a 100644
--- a/tests/unittests/test_utils/test_loaders/test_base_loader.py
+++ b/tests/unittests/test_utils/test_loaders/test_base_loader.py
@@ -7,32 +7,64 @@
 from luxonis_train.utils.types import LabelType
 
 
-def test_collate_fn():
+@pytest.mark.parametrize(
+    "input_names_and_shapes",
+    [
+        [("features", torch.Size([3, 224, 224]))],
+        [
+            ("features", torch.Size([3, 224, 224])),
+            ("segmentation", torch.Size([1, 224, 224])),
+        ],
+        [
+            ("features", torch.Size([3, 224, 224])),
+            ("segmentation", torch.Size([1, 224, 224])),
+            ("disparity", torch.Size([1, 224, 224])),
+        ],
+        [
+            ("features", torch.Size([3, 224, 224])),
+            ("pointcloud", torch.Size([1000, 3])),
+        ],
+        [
+            ("features", torch.Size([3, 224, 224])),
+            ("pointcloud", torch.Size([1000, 3])),
+            ("foobar", torch.Size([2, 3, 4, 5, 6])),
+        ],
+    ],
+)
+@pytest.mark.parametrize("batch_size", [1, 2])
+def test_collate_fn(input_names_and_shapes, batch_size):
     # Mock batch data
-    batch = [
-        (
-            torch.rand(3, 224, 224, dtype=torch.float32),
-            {"classification": (torch.tensor([1, 0]), LabelType.CLASSIFICATION)},
-        ),
-        (
-            torch.rand(3, 224, 224, dtype=torch.float32),
-            {"classification": (torch.tensor([0, 1]), LabelType.CLASSIFICATION)},
-        ),
-    ]
+
+    def build_batch_element():
+        inputs = {}
+        for name, shape in input_names_and_shapes:
+            inputs[name] = torch.rand(shape, dtype=torch.float32)
+
+        labels = {
+            "classification": (
+                torch.randint(0, 2, (2,), dtype=torch.int64),
+                LabelType.CLASSIFICATION,
+            )
+        }
+
+        return inputs, labels
+
+    batch = [build_batch_element() for _ in range(batch_size)]
 
     # Call collate_fn
-    imgs, annotations = collate_fn(batch)  # type: ignore
+    inputs, annotations = collate_fn(batch)  # type: ignore
 
     # Check images tensor
-    assert imgs.shape == (2, 3, 224, 224)
-    assert imgs.dtype == torch.float32
+    assert inputs["features"].shape == (batch_size, 3, 224, 224)
+    assert inputs["features"].dtype == torch.float32
 
     # Check annotations
     assert "classification" in annotations
-    assert annotations["classification"][0].shape == (2, 2)
+    assert annotations["classification"][0].shape == (batch_size, 2)
     assert annotations["classification"][0].dtype == torch.int64
 
-    # TODO: test also segmentation, boundingbox and keypoint
+
+# TODO: test also segmentation, boundingbox and keypoint
 
 
 if __name__ == "__main__":

From bb9b01ddc8324b15855b7ef402032c22554d91c6 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Thu, 13 Jun 2024 05:15:29 +0200
Subject: [PATCH 28/75] Updated Tuner (#26)

Co-authored-by: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
Co-authored-by: Michal Sejak <hyas@seznam.cz>
Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/README.md              | 15 +++----
 configs/example_tuning.yaml    |  2 +-
 luxonis_train/core/tuner.py    | 73 +++++++++++++++++++++++++++-------
 luxonis_train/utils/config.py  |  1 +
 luxonis_train/utils/tracker.py | 22 +++++++++-
 requirements.txt               |  4 +-
 6 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index 01d1ebd3..e6eb0bad 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -241,13 +241,14 @@ Option specific for ONNX export.
 
 Here you can specify options for tuning.
 
-| Key        | Type              | Default value | Description                                                                                                                                                                                                                                                                                                        |
-| ---------- | ----------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| study_name | str               | "test-study"  | Name of the study.                                                                                                                                                                                                                                                                                                 |
-| use_pruner | bool              | True          | Whether to use the MedianPruner.                                                                                                                                                                                                                                                                                   |
-| n_trials   | int \| None       | 15            | Number of trials for each process. `None` represents no limit in terms of numbner of trials.                                                                                                                                                                                                                       |
-| timeout    | int \| None       | None          | Stop study after the given number of seconds.                                                                                                                                                                                                                                                                      |
-| params     | dict\[str, list\] | {}            | Which parameters to tune. The keys should be in the format `key1.key2.key3_<type>`. Type can be one of `[categorical, float, int, longuniform, uniform]`. For more information about the types, visit [Optuna documentation](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html). |
+| Key                     | Type              | Default value | Description                                                                                                                                                                                                                                                                                                        |
+| ----------------------- | ----------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| study_name              | str               | "test-study"  | Name of the study.                                                                                                                                                                                                                                                                                                 |
+| continue_existing_study | bool              | True          | Weather to continue existing study if `study_name` already exists.                                                                                                                                                                                                                                                 |
+| use_pruner              | bool              | True          | Whether to use the MedianPruner.                                                                                                                                                                                                                                                                                   |
+| n_trials                | int \| None       | 15            | Number of trials for each process. `None` represents no limit in terms of numbner of trials.                                                                                                                                                                                                                       |
+| timeout                 | int \| None       | None          | Stop study after the given number of seconds.                                                                                                                                                                                                                                                                      |
+| params                  | dict\[str, list\] | {}            | Which parameters to tune. The keys should be in the format `key1.key2.key3_<type>`. Type can be one of `[categorical, float, int, longuniform, uniform]`. For more information about the types, visit [Optuna documentation](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html). |
 
 Example of params for tuner block:
 
diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
index 41c4d8a8..009abc41 100755
--- a/configs/example_tuning.yaml
+++ b/configs/example_tuning.yaml
@@ -23,7 +23,7 @@ trainer:
       active: True
 
   batch_size: 4
-  epochs: &epochs 1
+  epochs: &epochs 10
   validation_interval: 1
   num_log_images: 8
 
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
index 4635789c..344e016a 100644
--- a/luxonis_train/core/tuner.py
+++ b/luxonis_train/core/tuner.py
@@ -1,4 +1,5 @@
 import os.path as osp
+from logging import getLogger
 from typing import Any
 
 import lightning.pytorch as pl
@@ -13,6 +14,8 @@
 
 from .core import Core
 
+logger = getLogger(__name__)
+
 
 class Tuner(Core):
     def __init__(self, cfg: str | dict, args: list[str] | tuple[str, ...] | None):
@@ -30,8 +33,26 @@ def __init__(self, cfg: str | dict, args: list[str] | tuple[str, ...] | None):
             raise ValueError("You have to specify the `tuner` section in config.")
         self.tune_cfg = self.cfg.tuner
 
+        # Parent tracker that only logs the best study parameters at the end
+        rank = rank_zero_only.rank
+        cfg_tracker = self.cfg.tracker
+        tracker_params = cfg_tracker.model_dump()
+        tracker_params[
+            "is_wandb"
+        ] = False  # wandb doesn't allow multiple concurrent runs, handle this separately
+        self.parent_tracker = LuxonisTrackerPL(
+            rank=rank,
+            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
+            is_sweep=False,
+            **tracker_params,
+        )
+        if self.parent_tracker.is_mlflow:
+            # Experiment needs to be interacted with to create actual MLFlow run
+            self.parent_tracker.experiment["mlflow"].active_run()
+
     def tune(self) -> None:
         """Runs Optuna tunning of hyperparameters."""
+        logger.info("Starting tuning...")
 
         pruner = (
             optuna.pruners.MedianPruner()
@@ -57,7 +78,7 @@ def tune(self) -> None:
             storage=storage,
             direction="minimize",
             pruner=pruner,
-            load_if_exists=True,
+            load_if_exists=self.tune_cfg.continue_existing_study,
         )
 
         study.optimize(
@@ -66,25 +87,44 @@ def tune(self) -> None:
             timeout=self.tune_cfg.timeout,
         )
 
+        best_study_params = study.best_params
+        logger.info(f"Best study parameters: {best_study_params}")
+
+        self.parent_tracker.log_hyperparams(best_study_params)
+
+        if self.cfg.tracker.is_wandb:
+            # If wandb used then init parent tracker separately at the end
+            wandb_parent_tracker = LuxonisTrackerPL(
+                project_name=self.cfg.tracker.project_name,
+                project_id=self.cfg.tracker.project_id,
+                run_name=self.parent_tracker.run_name,
+                save_directory=self.cfg.tracker.save_directory,
+                is_wandb=True,
+                wandb_entity=self.cfg.tracker.wandb_entity,
+                rank=rank_zero_only.rank,
+            )
+            wandb_parent_tracker.log_hyperparams(best_study_params)
+
     def _objective(self, trial: optuna.trial.Trial) -> float:
         """Objective function used to optimize Optuna study."""
         rank = rank_zero_only.rank
         cfg_tracker = self.cfg.tracker
         tracker_params = cfg_tracker.model_dump()
-        tracker = LuxonisTrackerPL(
+        child_tracker = LuxonisTrackerPL(
             rank=rank,
             mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
             is_sweep=True,
             **tracker_params,
         )
-        run_save_dir = osp.join(cfg_tracker.save_directory, tracker.run_name)
+
+        run_save_dir = osp.join(cfg_tracker.save_directory, child_tracker.run_name)
 
         curr_params = self._get_trial_params(trial)
         curr_params["model.predefined_model"] = None
         Config.clear_instance()
         cfg = Config.get_config(self.cfg.model_dump(), curr_params)
 
-        tracker.log_hyperparams(curr_params)
+        child_tracker.log_hyperparams(curr_params)
 
         cfg.save_data(osp.join(run_save_dir, "config.yaml"))
 
@@ -95,14 +135,11 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             input_shape=self.loaders["train"].input_shape,
         )
         lightning_module._core = self
-        pruner_callback = PyTorchLightningPruningCallback(
-            trial, monitor="val_loss/loss"
-        )
         callbacks: list[pl.Callback] = (
             [LuxonisProgressBar()] if self.cfg.use_rich_text else []
         )
+        pruner_callback = PyTorchLightningPruningCallback(trial, monitor="val/loss")
         callbacks.append(pruner_callback)
-
         deterministic = False
         if self.cfg.trainer.seed:
             pl.seed_everything(cfg.trainer.seed, workers=True)
@@ -112,7 +149,7 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             accelerator=cfg.trainer.accelerator,
             devices=cfg.trainer.devices,
             strategy=cfg.trainer.strategy,
-            logger=tracker,  # type: ignore
+            logger=child_tracker,  # type: ignore
             max_epochs=cfg.trainer.epochs,
             accumulate_grad_batches=cfg.trainer.accumulate_grad_batches,
             check_val_every_n_epoch=cfg.trainer.validation_interval,
@@ -122,12 +159,18 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             deterministic=deterministic,
         )
 
-        pl_trainer.fit(
-            lightning_module,  # type: ignore
-            self.pytorch_loaders["train"],
-            self.pytorch_loaders["val"],
-        )
-        pruner_callback.check_pruned()
+        try:
+            pl_trainer.fit(
+                lightning_module,  # type: ignore
+                self.pytorch_loaders["val"],
+                self.pytorch_loaders["train"],
+            )
+
+            pruner_callback.check_pruned()
+
+        except optuna.TrialPruned as e:
+            # Pruning is done by raising an error
+            logger.info(e)
 
         if "val/loss" not in pl_trainer.callback_metrics:
             raise ValueError(
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 96d132ab..3379f59f 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -286,6 +286,7 @@ class StorageConfig(CustomBaseModel):
 
 class TunerConfig(CustomBaseModel):
     study_name: str = "test-study"
+    continue_existing_study: bool = True
     use_pruner: bool = True
     n_trials: int | None = 15
     timeout: int | None = None
diff --git a/luxonis_train/utils/tracker.py b/luxonis_train/utils/tracker.py
index 13c77cb2..df157b3b 100644
--- a/luxonis_train/utils/tracker.py
+++ b/luxonis_train/utils/tracker.py
@@ -1,8 +1,28 @@
 from lightning.pytorch.loggers.logger import Logger
+from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from luxonis_ml.tracker import LuxonisTracker
 
 
 class LuxonisTrackerPL(LuxonisTracker, Logger):
     """Implementation of LuxonisTracker that is compatible with PytorchLightning."""
 
-    ...
+    @rank_zero_only
+    def finalize(self, status: str = "success") -> None:
+        """Finalizes current run."""
+        if self.is_tensorboard:
+            self.experiment["tensorboard"].flush()
+            self.experiment["tensorboard"].close()
+        if self.is_mlflow:
+            if status == "success":
+                mlflow_status = "FINISHED"
+            elif status == "failed":
+                mlflow_status = "FAILED"
+            elif status == "finished":
+                mlflow_status = "FINISHED"
+            self.experiment["mlflow"].end_run(mlflow_status)
+        if self.is_wandb:
+            if status == "success":
+                wandb_status = 0
+            else:
+                wandb_status = 1
+            self.experiment["wandb"].finish(wandb_status)
diff --git a/requirements.txt b/requirements.txt
index 6dc87275..42834ae6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,8 +5,8 @@ luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
 onnx>=1.12.0
 onnxruntime>=1.13.1
 onnxsim>=0.4.10
-optuna>=3.2.0
-optuna_integration>=3.6.0
+optuna>=3.6.0
+optuna-integration>=3.6.0
 parameterized>=0.9.0
 psycopg2-binary>=2.9.1
 pycocotools>=2.0.7

From bf6948032d3908fb0e783df4e9ce11997ad35740 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Mon, 17 Jun 2024 00:59:10 +0200
Subject: [PATCH 29/75] CLI Inspect Fix (#41)

---
 luxonis_train/__main__.py      | 160 +++++++++++----------------------
 luxonis_train/core/core.py     |  18 +---
 luxonis_train/core/exporter.py |   2 +-
 luxonis_train/core/inferer.py  |   9 +-
 luxonis_train/core/trainer.py  |   2 +-
 luxonis_train/core/tuner.py    |   8 +-
 luxonis_train/utils/config.py  |   6 +-
 7 files changed, 70 insertions(+), 135 deletions(-)

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 759bc87c..45e02adf 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -1,14 +1,12 @@
-import os
-from enum import Enum
+import tempfile
 from importlib.metadata import version
 from pathlib import Path
 from typing import Annotated, Optional
 
-import cv2
 import typer
-from torch.utils.data import DataLoader
-
-from luxonis_train.utils.registry import LOADERS
+import yaml
+from luxonis_ml.data.__main__ import inspect as lxml_inspect
+from luxonis_ml.enums import SplitType
 
 app = typer.Typer(
     help="Luxonis Train CLI",
@@ -17,20 +15,12 @@
 )
 
 
-class View(str, Enum):
-    train = "train"
-    val = "val"
-    test = "test"
-
-    def __str__(self):
-        return self.value
-
-
 ConfigType = Annotated[
-    Optional[Path],
+    Optional[str],
     typer.Option(
         help="Path to the configuration file.",
         show_default=False,
+        metavar="FILE",
     ),
 ]
 
@@ -42,7 +32,7 @@ def __str__(self):
     ),
 ]
 
-ViewType = Annotated[View, typer.Option(help="Which dataset view to use.")]
+ViewType = Annotated[SplitType, typer.Option(help="Which dataset view to use.")]
 
 SaveDirType = Annotated[
     Optional[Path],
@@ -61,15 +51,17 @@ def train(
     """Start training."""
     from luxonis_train.core import Trainer
 
-    Trainer(str(config), opts, resume=resume).train()
+    Trainer(config, opts, resume=resume).train()
 
 
 @app.command()
-def eval(config: ConfigType = None, view: ViewType = View.val, opts: OptsType = None):
+def eval(
+    config: ConfigType = None, view: ViewType = SplitType.VAL, opts: OptsType = None
+):
     """Evaluate model."""
     from luxonis_train.core import Trainer
 
-    Trainer(str(config), opts).test(view=view.name)
+    Trainer(config, opts).test(view=view.value)
 
 
 @app.command()
@@ -77,7 +69,7 @@ def tune(config: ConfigType = None, opts: OptsType = None):
     """Start hyperparameter tuning."""
     from luxonis_train.core import Tuner
 
-    Tuner(str(config), opts).tune()
+    Tuner(config, opts).tune()
 
 
 @app.command()
@@ -85,123 +77,75 @@ def export(config: ConfigType = None, opts: OptsType = None):
     """Export model."""
     from luxonis_train.core import Exporter
 
-    Exporter(str(config), opts).export()
+    Exporter(config, opts).export()
 
 
 @app.command()
 def infer(
     config: ConfigType = None,
-    view: ViewType = View.val,
+    view: ViewType = SplitType.VAL,
     save_dir: SaveDirType = None,
     opts: OptsType = None,
 ):
     """Run inference."""
     from luxonis_train.core import Inferer
 
-    Inferer(str(config), opts, view=view.name, save_dir=save_dir).infer()
+    Inferer(config, opts, view=view.value, save_dir=save_dir).infer()
 
 
 @app.command()
 def inspect(
     config: ConfigType = None,
-    view: ViewType = View.val,
-    save_dir: SaveDirType = None,
+    view: Annotated[
+        SplitType,
+        typer.Option(
+            ...,
+            "--view",
+            "-v",
+            help="Which split of the dataset to inspect.",
+            case_sensitive=False,
+        ),
+    ] = "train",  # type: ignore
     opts: OptsType = None,
 ):
     """Inspect dataset."""
     from lightning.pytorch import seed_everything
-    from luxonis_ml.data import Augmentations
-
-    from luxonis_train.attached_modules.visualizers.utils import (
-        draw_bounding_box_labels,
-        draw_keypoint_labels,
-        draw_segmentation_labels,
-        get_unnormalized_images,
-    )
-    from luxonis_train.utils.config import Config
-    from luxonis_train.utils.loaders import collate_fn
-    from luxonis_train.utils.types import LabelType
 
-    overrides = {}
-    if opts:
-        if len(opts) % 2 != 0:
-            raise ValueError("Override options should be a list of key-value pairs")
-
-        for i in range(0, len(opts), 2):
-            overrides[opts[i]] = opts[i + 1]
+    from luxonis_train.utils.config import Config
 
-    cfg = Config.get_config(str(config), overrides)
+    cfg = Config.get_config(config, opts)
     if cfg.trainer.seed is not None:
         seed_everything(cfg.trainer.seed, workers=True)
 
-    image_size = cfg.trainer.preprocessing.train_image_size
-
-    augmentations = Augmentations(
-        image_size=image_size,
-        augmentations=[
-            i.model_dump() for i in cfg.trainer.preprocessing.get_active_augmentations()
-        ],
-        train_rgb=cfg.trainer.preprocessing.train_rgb,
-        keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        only_normalize=view != "train",
-    )
-
-    loader = LOADERS.get(cfg.loader.name)(
-        view=view, augmentations=augmentations, **cfg.loader.params
-    )
-
-    pytorch_loader = DataLoader(
-        loader,
-        batch_size=1,
-        num_workers=0,
-        collate_fn=collate_fn,
-    )
-
-    if save_dir is not None:
-        os.makedirs(save_dir, exist_ok=True)
-
-    counter = 0
-    for data in pytorch_loader:
-        imgs, task_dict = data
-        for task, label_dict in task_dict.items():
-            images = get_unnormalized_images(cfg, imgs)
-            for i, img in enumerate(images):
-                for label_type, labels in label_dict.items():
-                    if label_type == LabelType.CLASSIFICATION:
-                        continue
-                    elif label_type == LabelType.BOUNDINGBOX:
-                        img = draw_bounding_box_labels(
-                            img,
-                            labels[labels[:, 0] == i][:, 2:],
-                            colors="yellow",
-                            width=1,
-                        )
-                    elif label_type == LabelType.KEYPOINTS:
-                        img = draw_keypoint_labels(
-                            img, labels[labels[:, 0] == i][:, 1:], colors="red"
-                        )
-                    elif label_type == LabelType.SEGMENTATION:
-                        img = draw_segmentation_labels(
-                            img, labels[i], alpha=0.8, colors="#5050FF"
-                        )
-
-                img_arr = img.permute(1, 2, 0).numpy()
-                img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
-                if save_dir is not None:
-                    counter += 1
-                    cv2.imwrite(
-                        os.path.join(save_dir, f"{counter}_{task}.png"), img_arr
-                    )
-                else:
-                    cv2.imshow(task, img_arr)
-        if save_dir is None and cv2.waitKey() == ord("q"):
-            exit()
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as f:
+        yaml.dump(
+            [
+                a.model_dump()
+                for a in cfg.trainer.preprocessing.get_active_augmentations()
+                if a.name != "Normalize"
+            ],
+            f,
+        )
+
+        if "dataset_name" not in cfg.loader.params:
+            raise ValueError("dataset_name is not set in the config")
+
+        lxml_inspect(
+            name=cfg.loader.params["dataset_name"],
+            view=view,
+            aug_config=Path(
+                f.name,
+            ),
+        )
 
 
 @app.command()
 def archive(
     executable: Annotated[
-        Optional[Path], typer.Option(help="Path to the model file.", show_default=False)
+        str,
+        typer.Option(
+            help="Path to the model file.", show_default=False, metavar="FILE"
+        ),
     ],
     config: ConfigType = None,
     opts: OptsType = None,
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index c1b1fb56..0fe1756f 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -31,7 +31,7 @@ class Core:
 
     def __init__(
         self,
-        cfg: str | dict[str, Any] | Config,
+        cfg: str | dict[str, Any] | Config | None,
         opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
     ):
         """Constructs a new Core instance.
@@ -46,24 +46,10 @@ def __init__(
         @param opts: Argument dict provided through command line, used for config overriding
         """
 
-        overrides = {}
-        if opts:
-            if isinstance(opts, dict):
-                overrides = opts
-            else:
-                if len(opts) % 2 != 0:
-                    raise ValueError(
-                        "Override options should be a list of key-value pairs"
-                    )
-
-                # NOTE: has to be done like this for torchx to work
-                for i in range(0, len(opts), 2):
-                    overrides[opts[i]] = opts[i + 1]
-
         if isinstance(cfg, Config):
             self.cfg = cfg
         else:
-            self.cfg = Config.get_config(cfg, overrides)
+            self.cfg = Config.get_config(cfg, opts)
 
         opts = opts or []
 
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 5318931f..6f3970e6 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -20,7 +20,7 @@
 class Exporter(Core):
     def __init__(
         self,
-        cfg: str | dict[str, Any] | Config,
+        cfg: str | dict[str, Any] | Config | None = None,
         opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
     ):
         """Provides an interface for exporting models to .onnx and .blob formats.
diff --git a/luxonis_train/core/inferer.py b/luxonis_train/core/inferer.py
index 710c4bb2..f7a6def3 100644
--- a/luxonis_train/core/inferer.py
+++ b/luxonis_train/core/inferer.py
@@ -1,11 +1,12 @@
 from pathlib import Path
-from typing import Literal
+from typing import Any, Literal
 
 import cv2
 
 from luxonis_train.attached_modules.visualizers import (
     get_unnormalized_images,
 )
+from luxonis_train.utils.config import Config
 
 from .trainer import Trainer
 
@@ -13,9 +14,9 @@
 class Inferer(Trainer):
     def __init__(
         self,
-        cfg: str | dict,
-        opts: list[str] | tuple[str, ...] | None,
-        view: Literal["train", "test", "val"],
+        cfg: str | dict[str, Any] | Config | None = None,
+        opts: list[str] | tuple[str, ...] | None = None,
+        view: Literal["train", "test", "val"] = "val",
         save_dir: Path | None = None,
     ):
         opts = list(opts or [])
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index ef20dc9e..8054522e 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -22,7 +22,7 @@ class Trainer(Core):
 
     def __init__(
         self,
-        cfg: str | dict[str, Any] | Config,
+        cfg: str | dict[str, Any] | Config | None = None,
         opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
         resume: str | None = None,
     ):
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
index 344e016a..4dfc780b 100644
--- a/luxonis_train/core/tuner.py
+++ b/luxonis_train/core/tuner.py
@@ -18,7 +18,11 @@
 
 
 class Tuner(Core):
-    def __init__(self, cfg: str | dict, args: list[str] | tuple[str, ...] | None):
+    def __init__(
+        self,
+        cfg: str | dict[str, Any] | Config | None = None,
+        opts: list[str] | tuple[str, ...] | None = None,
+    ):
         """Main API which is used to perform hyperparameter tunning.
 
         @type cfg: str | dict[str, Any] | Config
@@ -28,7 +32,7 @@ def __init__(self, cfg: str | dict, args: list[str] | tuple[str, ...] | None):
         @param args: Argument dict provided through command line,
             used for config overriding.
         """
-        super().__init__(cfg, args)
+        super().__init__(cfg, opts)
         if self.cfg.tuner is None:
             raise ValueError("You have to specify the `tuner` section in config.")
         self.tune_cfg = self.cfg.tuner
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 3379f59f..afd1f5c7 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -52,7 +52,7 @@ class PredefinedModelConfig(CustomBaseModel):
 
 
 class ModelConfig(CustomBaseModel):
-    name: str
+    name: str = "model"
     predefined_model: PredefinedModelConfig | None = None
     weights: str | None = None
     nodes: list[ModelNodeConfig] = []
@@ -298,7 +298,7 @@ class TunerConfig(CustomBaseModel):
 
 class Config(LuxonisConfig):
     use_rich_text: bool = True
-    model: ModelConfig
+    model: ModelConfig = ModelConfig()
     loader: LoaderConfig = LoaderConfig()
     tracker: TrackerConfig = TrackerConfig()
     trainer: TrainerConfig = TrainerConfig()
@@ -329,7 +329,7 @@ def setup_logging(cls, data: Any) -> Any:
     def get_config(
         cls,
         cfg: str | dict[str, Any] | None = None,
-        overrides: dict[str, Any] | None = None,
+        overrides: dict[str, Any] | list[str] | tuple[str, ...] | None = None,
     ):
         instance = super().get_config(cfg, overrides)
         if not isinstance(cfg, str):

From 88e8ff5cf0982fa2509d9839246a2bbda0e6c307 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Wed, 19 Jun 2024 15:44:50 +0200
Subject: [PATCH 30/75] Tuning - Augmentation Subsets Support (#35)

Co-authored-by: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
---
 configs/README.md             | 34 +++++++++++++++-------------
 configs/example_tuning.yaml   |  9 ++++++++
 luxonis_train/core/tuner.py   | 42 ++++++++++++++++++++++++++++++++++-
 luxonis_train/utils/config.py |  3 ++-
 4 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index e6eb0bad..6901375c 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -166,14 +166,14 @@ Here you can change everything related to actual training of the model.
 
 We use [Albumentations](https://albumentations.ai/docs/) library for `augmentations`. [Here](https://albumentations.ai/docs/api_reference/full_reference/#pixel-level-transforms) you can see a list of all pixel level augmentations supported, and [here](https://albumentations.ai/docs/api_reference/full_reference/#spatial-level-transforms) you see all spatial level transformations. In config you can specify any augmentation from this lists and their params. Additionaly we support `Mosaic4` batch augmentation and letterbox resizing if `keep_aspect_ratio: True`.
 
-| Key               | Type                                                                                 | Default value | Description                                                                                                                                                             |
-| ----------------- | ------------------------------------------------------------------------------------ | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| train_image_size  | list\[int\]                                                                          | \[256, 256\]  | image size used for training \[height, width\]                                                                                                                          |
-| keep_aspect_ratio | bool                                                                                 | True          | bool if keep aspect ration while resizing                                                                                                                               |
-| train_rgb         | bool                                                                                 | True          | bool if train on rgb or bgr                                                                                                                                             |
-| normalize.active  | bool                                                                                 | True          | bool if use normalization                                                                                                                                               |
-| normalize.params  | dict                                                                                 | {}            | params for normalization, see [documentation](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.Normalize) |
-| augmentations     | list\[{"name": Name of the augmentation, "params": Parameters of the augmentation}\] | \[\]          | list of Albumentations augmentations                                                                                                                                    |
+| Key               | Type                                                                                                                                          | Default value | Description                                                                                                                                                             |
+| ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| train_image_size  | list\[int\]                                                                                                                                   | \[256, 256\]  | image size used for training \[height, width\]                                                                                                                          |
+| keep_aspect_ratio | bool                                                                                                                                          | True          | bool if keep aspect ration while resizing                                                                                                                               |
+| train_rgb         | bool                                                                                                                                          | True          | bool if train on rgb or bgr                                                                                                                                             |
+| normalize.active  | bool                                                                                                                                          | True          | bool if use normalization                                                                                                                                               |
+| normalize.params  | dict                                                                                                                                          | {}            | params for normalization, see [documentation](https://albumentations.ai/docs/api_reference/augmentations/transforms/#albumentations.augmentations.transforms.Normalize) |
+| augmentations     | list\[{"name": Name of the augmentation, "active": Bool if aug is active, by default set to True, "params": Parameters of the augmentation}\] | \[\]          | list of Albumentations augmentations                                                                                                                                    |
 
 ### Optimizer
 
@@ -241,14 +241,15 @@ Option specific for ONNX export.
 
 Here you can specify options for tuning.
 
-| Key                     | Type              | Default value | Description                                                                                                                                                                                                                                                                                                        |
-| ----------------------- | ----------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| study_name              | str               | "test-study"  | Name of the study.                                                                                                                                                                                                                                                                                                 |
-| continue_existing_study | bool              | True          | Weather to continue existing study if `study_name` already exists.                                                                                                                                                                                                                                                 |
-| use_pruner              | bool              | True          | Whether to use the MedianPruner.                                                                                                                                                                                                                                                                                   |
-| n_trials                | int \| None       | 15            | Number of trials for each process. `None` represents no limit in terms of numbner of trials.                                                                                                                                                                                                                       |
-| timeout                 | int \| None       | None          | Stop study after the given number of seconds.                                                                                                                                                                                                                                                                      |
-| params                  | dict\[str, list\] | {}            | Which parameters to tune. The keys should be in the format `key1.key2.key3_<type>`. Type can be one of `[categorical, float, int, longuniform, uniform]`. For more information about the types, visit [Optuna documentation](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html). |
+| Key        | Type              | Default value | Description                                                                                                                                                                                                                                                                                                                |
+| ---------- | ----------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| study_name | str               | "test-study"  | Name of the study.                                                                                                                                                                                                                                                                                                         |
+| use_pruner | bool              | True          | Whether to use the MedianPruner.                                                                                                                                                                                                                                                                                           |
+| n_trials   | int \| None       | 15            | Number of trials for each process. `None` represents no limit in terms of numbner of trials.                                                                                                                                                                                                                               |
+| timeout    | int \| None       | None          | Stop study after the given number of seconds.                                                                                                                                                                                                                                                                              |
+| params     | dict\[str, list\] | {}            | Which parameters to tune. The keys should be in the format `key1.key2.key3_<type>`. Type can be one of `[categorical, float, int, longuniform, uniform, subset]`. For more information about the types, visit [Optuna documentation](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html). |
+
+**Note**: "subset" sampling is currently only supported for augmentations. You can specify a set of augmentations defined in `trainer` to choose from and every run subset of random N augmentations will be active (`is_active` parameter will be True for chosen ones and False for the rest in the set).
 
 Example of params for tuner block:
 
@@ -258,6 +259,7 @@ tuner:
     trainer.optimizer.name_categorical: ["Adam", "SGD"]
     trainer.optimizer.params.lr_float: [0.0001, 0.001]
     trainer.batch_size_int: [4, 16, 4]
+    trainer.preprocessing.augmentations_subset: [["Defocus", "Sharpen", "Flip"], 2]
 ```
 
 ### Storage
diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
index 009abc41..9a8bfd79 100755
--- a/configs/example_tuning.yaml
+++ b/configs/example_tuning.yaml
@@ -21,6 +21,14 @@ trainer:
     keep_aspect_ratio: False
     normalize:
       active: True
+    augmentations:
+      - name: Defocus
+        params:
+          p: 0.1
+      - name: Sharpen
+        params:
+          p: 0.1
+      - name: Flip
 
   batch_size: 4
   epochs: &epochs 10
@@ -38,3 +46,4 @@ tuner:
     trainer.optimizer.name_categorical: ["Adam", "SGD"]
     trainer.optimizer.params.lr_float: [0.0001, 0.001]
     trainer.batch_size_int: [4, 16, 4]
+    trainer.preprocessing.augmentations_subset: [["Defocus", "Sharpen", "Flip"], 2]
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
index 4dfc780b..13d56ca4 100644
--- a/luxonis_train/core/tuner.py
+++ b/luxonis_train/core/tuner.py
@@ -1,4 +1,5 @@
 import os.path as osp
+import random
 from logging import getLogger
 from typing import Any
 
@@ -125,8 +126,15 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
 
         curr_params = self._get_trial_params(trial)
         curr_params["model.predefined_model"] = None
+
+        cfg_copy = self.cfg.model_copy(deep=True)
+        cfg_copy.trainer.preprocessing.augmentations = [
+            a
+            for a in cfg_copy.trainer.preprocessing.augmentations
+            if a.name != "Normalize"
+        ]  # manually remove Normalize so it doesn't duplicate it when creating new cfg instance
         Config.clear_instance()
-        cfg = Config.get_config(self.cfg.model_dump(), curr_params)
+        cfg = Config.get_config(cfg_copy.model_dump(), curr_params)
 
         child_tracker.log_hyperparams(curr_params)
 
@@ -193,6 +201,18 @@ def _get_trial_params(self, trial: optuna.trial.Trial) -> dict[str, Any]:
             key_name = "_".join(key_info[:-1])
             key_type = key_info[-1]
             match key_type, value:
+                case "subset", [list(whole_set), int(subset_size)]:
+                    if key_name.split(".")[-1] != "augmentations":
+                        raise ValueError(
+                            "Subset sampling currently only supported for augmentations"
+                        )
+                    whole_set_indices = self._augs_to_indices(whole_set)
+                    subset = random.sample(whole_set_indices, subset_size)
+                    for aug_id in whole_set_indices:
+                        new_params[f"{key_name}.{aug_id}.active"] = (
+                            True if aug_id in subset else False
+                        )
+                    continue
                 case "categorical", list(lst):
                     new_value = trial.suggest_categorical(key_name, lst)
                 case "float", [float(low), float(high), *tail]:
@@ -225,3 +245,23 @@ def _get_trial_params(self, trial: optuna.trial.Trial) -> dict[str, Any]:
                 "No paramteres to tune. Specify them under `tuner.params`."
             )
         return new_params
+
+    def _augs_to_indices(self, aug_names: list[str]) -> list[int]:
+        """Maps augmentation names to indices."""
+        all_augs = [a.name for a in self.cfg.trainer.preprocessing.augmentations]
+        aug_indices = []
+        for aug_name in aug_names:
+            if aug_name == "Normalize":
+                logger.warn(
+                    f"'{aug_name}' should be tuned directly by adding '...normalize.active_categorical' to the tuner params, skipping."
+                )
+                continue
+            try:
+                index = all_augs.index(aug_name)
+                aug_indices.append(index)
+            except ValueError:
+                logger.warn(
+                    f"Augmentation '{aug_name}' not found under trainer augemntations, skipping."
+                )
+                continue
+        return aug_indices
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index afd1f5c7..768c6f04 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -292,7 +292,8 @@ class TunerConfig(CustomBaseModel):
     timeout: int | None = None
     storage: StorageConfig = StorageConfig()
     params: Annotated[
-        dict[str, list[str | int | float | bool]], Field(default={}, min_length=1)
+        dict[str, list[str | int | float | bool | list]],
+        Field(default={}, min_length=1),
     ]
 
 

From 248fa48994b13860675f40b938e157cdb20908c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Fri, 21 Jun 2024 13:29:20 +0200
Subject: [PATCH 31/75] Support For Export Without Weights (#43)

---
 luxonis_train/__init__.py             |  1 +
 luxonis_train/core/exporter.py        |  4 +--
 luxonis_train/models/luxonis_model.py |  4 ++-
 luxonis_train/utils/registry.py       | 36 ++++++++++++++++++---------
 4 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py
index 066e1110..60d8d501 100644
--- a/luxonis_train/__init__.py
+++ b/luxonis_train/__init__.py
@@ -1,6 +1,7 @@
 from .attached_modules import *
 from .core import *
 from .models import *
+from .nodes import *
 from .utils import *
 
 __version__ = "0.0.1"
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 6f3970e6..71a3c2b7 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -37,8 +37,8 @@ def __init__(
 
         input_shape = self.cfg.exporter.input_shape
         if self.cfg.model.weights is None:
-            raise ValueError(
-                "Model weights must be specified in config file for export."
+            logger.warning(
+                "No model weights specified. Exporting model without weights."
             )
         self.local_path = self.cfg.model.weights
         if input_shape is None:
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 2daf61cb..f5acae68 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -712,7 +712,9 @@ def configure_callbacks(self) -> list[pl.Callback]:
 
     def configure_optimizers(
         self,
-    ) -> tuple[list[torch.optim.Optimizer], list[nn.Module]]:
+    ) -> tuple[
+        list[torch.optim.Optimizer], list[torch.optim.lr_scheduler._LRScheduler]
+    ]:
         """Configures model optimizers and schedulers."""
         cfg_optimizer = self.cfg.trainer.optimizer
         cfg_scheduler = self.cfg.trainer.scheduler
diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py
index 6da8893a..2222ecbd 100644
--- a/luxonis_train/utils/registry.py
+++ b/luxonis_train/utils/registry.py
@@ -1,34 +1,46 @@
 """This module implements a metaclass for automatic registration of classes."""
 
-
+import lightning.pytorch as pl
+import torch
 from luxonis_ml.utils.registry import Registry
 
-LOADERS = Registry(name="loaders")
-"""Registry for all loaders."""
+import luxonis_train
 
-CALLBACKS = Registry(name="callbacks")
+CALLBACKS: Registry[type[pl.Callback]] = Registry(name="callbacks")
 """Registry for all callbacks."""
 
-LOADERS = Registry(name="loaders")
+LOADERS: Registry[type["luxonis_train.utils.loaders.BaseLoaderTorch"]] = Registry(
+    name="loaders"
+)
 """Registry for all loaders."""
 
-LOSSES = Registry(name="losses")
+LOSSES: Registry[type["luxonis_train.attached_modules.BaseLoss"]] = Registry(
+    name="losses"
+)
 """Registry for all losses."""
 
-METRICS = Registry(name="metrics")
+METRICS: Registry[type["luxonis_train.attached_modules.BaseMetric"]] = Registry(
+    name="metrics"
+)
 """Registry for all metrics."""
 
-MODELS = Registry(name="models")
+MODELS: Registry[type["luxonis_train.models.BasePredefinedModel"]] = Registry(
+    name="models"
+)
 """Registry for all models."""
 
-NODES = Registry(name="nodes")
+NODES: Registry[type["luxonis_train.nodes.BaseNode"]] = Registry(name="nodes")
 """Registry for all nodes."""
 
-OPTIMIZERS = Registry(name="optimizers")
+OPTIMIZERS: Registry[type[torch.optim.Optimizer]] = Registry(name="optimizers")
 """Registry for all optimizers."""
 
-SCHEDULERS = Registry(name="schedulers")
+SCHEDULERS: Registry[type[torch.optim.lr_scheduler._LRScheduler]] = Registry(
+    name="schedulers"
+)
 """Registry for all schedulers."""
 
-VISUALIZERS = Registry(name="visualizers")
+VISUALIZERS: Registry[type["luxonis_train.visualizers.BaseVisualizer"]] = Registry(
+    "visualizers"
+)
 """Registry for all visualizers."""

From 4dda2552a8e14b01d58a8ffa11adf997d2b24bec Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Thu, 27 Jun 2024 15:50:42 +0200
Subject: [PATCH 32/75] Openvino version in blobconverter config (#44)

---
 configs/README.md              | 9 +++++----
 luxonis_train/core/exporter.py | 1 +
 luxonis_train/utils/config.py  | 1 +
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index 6901375c..8e3b4935 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -232,10 +232,11 @@ Option specific for ONNX export.
 
 ### Blob
 
-| Key    | Type | Default value | Description                          |
-| ------ | ---- | ------------- | ------------------------------------ |
-| active | bool | False         | Whether to export to `.blob` format. |
-| shaves | int  | 6             | How many shaves.                     |
+| Key     | Type                                                             | Default value | Description                             |
+| ------- | ---------------------------------------------------------------- | ------------- | --------------------------------------- |
+| active  | bool                                                             | False         | Whether to export to `.blob` format.    |
+| shaves  | int                                                              | 6             | How many shaves.                        |
+| version | Literal\["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"\] | "2022.1"      | OpenVINO version to use for conversion. |
 
 ## Tuner
 
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 71a3c2b7..f73e7ec8 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -160,6 +160,7 @@ def export(self, onnx_path: str | None = None):
                     optimizer_params=optimizer_params,
                     data_type=self.cfg.exporter.data_type,
                     shaves=self.cfg.exporter.blobconverter.shaves,
+                    version=self.cfg.exporter.blobconverter.version,
                     use_cache=False,
                     output_dir=self.export_path,
                 )
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 768c6f04..fa0ee586 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -245,6 +245,7 @@ class OnnxExportConfig(CustomBaseModel):
 class BlobconverterExportConfig(CustomBaseModel):
     active: bool = False
     shaves: int = 6
+    version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = "2022.1"
 
 
 class ExportConfig(CustomBaseModel):

From 7c3155556b4515cf1e300568ff22afd8b9a7d569 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Tue, 9 Jul 2024 18:34:06 +0200
Subject: [PATCH 33/75] Fix onnx export (#45)

---
 luxonis_train/models/luxonis_model.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index f5acae68..bebca871 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -418,6 +418,7 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]:
         @rtype: list[str]
         @return: List of output names.
         """
+        self.eval()
 
         inputs = {
             input_name: torch.zeros([1, *shape]).to(self.device)
@@ -490,6 +491,9 @@ def export_forward(inputs) -> tuple[Tensor, ...]:
                 module.set_export_mode(False)
 
         logger.info(f"Model exported to {save_path}")
+
+        self.train()
+
         return output_names
 
     def process_losses(

From db247605524115b91f36cc2a445859e2de02db49 Mon Sep 17 00:00:00 2001
From: Matej Rojec <64556640+MatejRojec@users.noreply.github.com>
Date: Thu, 11 Jul 2024 15:43:56 +0200
Subject: [PATCH 34/75] Multi-GPU Training Support (#42)

Co-authored-by: GitHub Actions <actions@github.com>
---
 luxonis_train/nodes/efficientnet.py | 8 ++++++--
 luxonis_train/nodes/mobilenetv2.py  | 7 ++++---
 luxonis_train/nodes/mobileone.py    | 2 ++
 luxonis_train/nodes/resnet.py       | 5 ++++-
 luxonis_train/nodes/rexnetv1.py     | 2 +-
 5 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/luxonis_train/nodes/efficientnet.py b/luxonis_train/nodes/efficientnet.py
index 57b52d09..37f8ced5 100644
--- a/luxonis_train/nodes/efficientnet.py
+++ b/luxonis_train/nodes/efficientnet.py
@@ -5,7 +5,7 @@
 """
 
 import torch
-from torch import Tensor
+from torch import Tensor, nn
 
 from .base_node import BaseNode
 
@@ -27,7 +27,10 @@ def __init__(self, download_weights: bool = False, **kwargs):
             "efficientnet_lite0",
             pretrained=download_weights,
         )
-        self.out_indices = [1, 2, 4, 6]
+        efficientnet_lite0_model.classifier = nn.Identity()
+        self.out_indices = [0, 1, 2, 4, 6]
+        efficientnet_lite0_model.bn2 = nn.Identity()
+        efficientnet_lite0_model.conv_head = nn.Identity()
         self.backbone = efficientnet_lite0_model
 
     def forward(self, x: Tensor) -> list[Tensor]:
@@ -39,4 +42,5 @@ def forward(self, x: Tensor) -> list[Tensor]:
             x = m(x)
             if i in self.out_indices:
                 outs.append(x)
+
         return outs
diff --git a/luxonis_train/nodes/mobilenetv2.py b/luxonis_train/nodes/mobilenetv2.py
index 732d0b12..db6cf879 100644
--- a/luxonis_train/nodes/mobilenetv2.py
+++ b/luxonis_train/nodes/mobilenetv2.py
@@ -4,7 +4,7 @@
 """
 
 import torchvision
-from torch import Tensor
+from torch import Tensor, nn
 
 from .base_node import BaseNode
 
@@ -29,8 +29,9 @@ def __init__(self, download_weights: bool = False, **kwargs):
         mobilenet_v2 = torchvision.models.mobilenet_v2(
             weights="DEFAULT" if download_weights else None
         )
-        self.out_indices = [3, 6, 13, 17]
-        self.channels = [24, 32, 96, 320]
+        mobilenet_v2.classifier = nn.Identity()
+        self.out_indices = [3, 6, 13, 18]
+        self.channels = [24, 32, 96, 1280]
         self.backbone = mobilenet_v2
 
     def forward(self, x: Tensor) -> list[Tensor]:
diff --git a/luxonis_train/nodes/mobileone.py b/luxonis_train/nodes/mobileone.py
index b1658eb4..645534e4 100644
--- a/luxonis_train/nodes/mobileone.py
+++ b/luxonis_train/nodes/mobileone.py
@@ -93,6 +93,8 @@ def forward(self, inputs: Tensor) -> list[Tensor]:
         outs.append(x)
         x = self.stage3(x)
         outs.append(x)
+        x = self.stage4(x)
+        outs.append(x)
 
         return outs
 
diff --git a/luxonis_train/nodes/resnet.py b/luxonis_train/nodes/resnet.py
index 8228d37a..3f810100 100644
--- a/luxonis_train/nodes/resnet.py
+++ b/luxonis_train/nodes/resnet.py
@@ -6,7 +6,7 @@
 from typing import Literal
 
 import torchvision
-from torch import Tensor
+from torch import Tensor, nn
 
 from .base_node import BaseNode
 
@@ -43,6 +43,9 @@ def __init__(
         self.backbone = RESNET_VARIANTS[variant](
             weights="DEFAULT" if download_weights else None
         )
+
+        self.backbone.fc = nn.Identity()
+
         self.channels_list = channels_list or [64, 128, 256, 512]
 
     def forward(self, inputs: Tensor) -> list[Tensor]:
diff --git a/luxonis_train/nodes/rexnetv1.py b/luxonis_train/nodes/rexnetv1.py
index de2c08ae..4999d6a1 100644
--- a/luxonis_train/nodes/rexnetv1.py
+++ b/luxonis_train/nodes/rexnetv1.py
@@ -44,7 +44,7 @@ def __init__(
         """
         super().__init__(**kwargs)
 
-        self.out_indices = [1, 4, 10, 16]
+        self.out_indices = [1, 4, 10, 17]
         self.channels = [16, 48, 112, 184]
         layers = [1, 2, 2, 3, 3, 5]
         strides = [1, 2, 2, 2, 1, 2]

From c70b2a6da5f03defe6c335f6dbd09c2836427b86 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Thu, 11 Jul 2024 18:40:54 +0200
Subject: [PATCH 35/75] Ensure checkpoint is always generated (#46)

---
 luxonis_train/utils/config.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index fa0ee586..4406c285 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -236,6 +236,15 @@ def check_num_workes_platform(self):
             )
         return self
 
+    @model_validator(mode="after")
+    def check_validation_interval(self):
+        if self.validation_interval > self.epochs:
+            logger.warning(
+                "Setting `validation_interval` same as `epochs` otherwise no checkpoint would be generated."
+            )
+            self.validation_interval = self.epochs
+        return self
+
 
 class OnnxExportConfig(CustomBaseModel):
     opset_version: int = 12

From 57258eea70634d3be9a0a52929506e19a44ba285 Mon Sep 17 00:00:00 2001
From: Jernej Sabadin <116955183+JSabadin@users.noreply.github.com>
Date: Thu, 11 Jul 2024 18:41:34 +0200
Subject: [PATCH 36/75] New Keypoint Heads and Losses (#40)

Co-authored-by: klemen1999 <klemenskrlj8@gmail.com>
Co-authored-by: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
Co-authored-by: GitHub Actions <actions@github.com>
---
 README.md                                     |  14 +
 configs/coco_model.yaml                       |   2 +-
 .../attached_modules/losses/README.md         |  18 +-
 .../attached_modules/losses/__init__.py       |   2 +
 .../losses/adaptive_detection_loss.py         |   5 +-
 .../losses/efficient_keypoint_bbox_loss.py    | 391 ++++++++++++++++++
 .../losses/implicit_keypoint_bbox_loss.py     |  51 ++-
 .../attached_modules/losses/keypoint_loss.py  |  88 ++--
 .../attached_modules/metrics/README.md        |   2 +
 .../metrics/mean_average_precision.py         |   4 +-
 .../mean_average_precision_keypoints.py       |  41 +-
 .../metrics/object_keypoint_similarity.py     | 192 ++++++---
 .../visualizers/keypoint_visualizer.py        |   2 +-
 luxonis_train/core/archiver.py                |  10 +-
 luxonis_train/nodes/README.md                 |  14 +
 luxonis_train/nodes/__init__.py               |   2 +
 luxonis_train/nodes/efficient_bbox_head.py    |   6 +-
 .../nodes/efficient_keypoint_bbox_head.py     | 207 ++++++++++
 .../nodes/enums/head_categorization.py        |   2 +
 .../utils/assigners/atts_assigner.py          |   6 +-
 luxonis_train/utils/assigners/tal_assigner.py |  13 +-
 luxonis_train/utils/boxutils.py               |  14 +-
 media/coverage_badge.svg                      |   4 +-
 .../test_assigners/test_atts_assigner.py      |   3 +-
 .../test_assigners/test_tal_assigner.py       |   6 +-
 25 files changed, 963 insertions(+), 136 deletions(-)
 create mode 100644 luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
 create mode 100644 luxonis_train/nodes/efficient_keypoint_bbox_head.py

diff --git a/README.md b/README.md
index a612b59e..873fe2c9 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,12 @@ For instructions on how to create a dataset in the LDF, follow the
 [examples](https://github.com/luxonis/luxonis-ml/tree/main/examples) in
 the [luxonis-ml](https://github.com/luxonis/luxonis-ml) repository.
 
+To inspect dataset images by split (train, val, test), use the command:
+
+```bash
+luxonis_train data inspect --config <config.yaml> --view <train/val/test>
+```
+
 ## Training
 
 Once you've created your `config.yaml` file you can train the model using this command:
@@ -66,6 +72,14 @@ luxonis_train train --config config.yaml trainer.batch_size 8 trainer.epochs 10
 
 where key and value are space separated and sub-keys are dot (`.`) separated. If the configuration field is a list, then key/sub-key should be a number (e.g. `trainer.preprocessing.augmentations.0.name RotateCustom`).
 
+## Evaluating
+
+To evaluate the model on a specific dataset split (train, test, or val), use the following command:
+
+```bash
+luxonis_train eval --config <config.yaml> --view <train/test/val>
+```
+
 ## Tuning
 
 To improve training performance you can use `Tuner` for hyperparameter optimization.
diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
index cad138a5..9af25feb 100755
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -46,7 +46,7 @@ model:
     - name: ImplicitKeypointBBoxLoss
       attached_to: ImplicitKeypointBBoxHead
       params:
-        keypoint_distance_loss_weight: 0.5
+        keypoint_regression_loss_weight: 0.5
         keypoint_visibility_loss_weight: 0.7
         bbox_loss_weight: 0.05
         objectness_loss_weight: 0.2
diff --git a/luxonis_train/attached_modules/losses/README.md b/luxonis_train/attached_modules/losses/README.md
index aafbc440..c5b1d348 100644
--- a/luxonis_train/attached_modules/losses/README.md
+++ b/luxonis_train/attached_modules/losses/README.md
@@ -11,6 +11,7 @@ List of all the available loss functions.
 - [SoftmaxFocalLoss](#softmaxfocalloss)
 - [AdaptiveDetectionLoss](#adaptivedetectionloss)
 - [ImplicitKeypointBBoxLoss](#implicitkeypointbboxloss)
+- [EfficientKeypointBBoxLoss](#efficientkeypointbboxloss)
 
 ## CrossEntropyLoss
 
@@ -97,10 +98,25 @@ Keypoint Similarity Loss](https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf
 | label_smoothing                 | float         | 0.0               | Smoothing for [SmothBCEWithLogitsLoss](#smoothbcewithlogitsloss) for classification loss.  |
 | min_objectness_iou              | float         | 0.0               | Minimum objectness IoU.                                                                    |
 | bbox_loss_weight                | float         | 0.05              | Weight for bbox detection sub-loss.                                                        |
-| keypoint_distance_loss_weight   | float         | 0.10              | Weight for keypoint distance sub-loss.                                                     |
+| keypoint_regression_loss_weight | float         | 0.5               | Weight for OKS sub-loss.                                                                   |
 | keypoint_visibility_loss_weight | float         | 0.6               | Weight for keypoint visibility sub-loss.                                                   |
 | class_loss_weight               | float         | 0.6               | Weight for classification sub-loss.                                                        |
 | objectness_loss_weight          | float         | 0.7               | Weight for objectness sub-loss.                                                            |
 | anchor_threshold                | float         | 4.0               | Threshold for matching anchors to targets.                                                 |
 | bias                            | float         | 0.5               | Bias for matchinf anchors to targets.                                                      |
 | balance                         | list\[float\] | \[4.0, 1.0, 0.4\] | Balance for objectness loss.                                                               |
+
+## EfficientKeypointBBoxLoss
+
+Adapted from [YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object
+Keypoint Similarity Loss](https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf).
+
+| Key                   | Type                                              | Default value | Description                                                                         |
+| --------------------- | ------------------------------------------------- | ------------- | ----------------------------------------------------------------------------------- |
+| viz_pw                | float                                             | 1.0           | Power for [BCEWithLogitsLoss](#bcewithlogitsloss) for keypoint visibility.          |
+| n_warmup_epochs       | int                                               | 4             | Number of epochs where ATSS assigner is used, after that we switch to TAL assigner. |
+| iou_type              | Literal\["none", "giou", "diou", "ciou", "siou"\] | "giou"        | IoU type used for bbox regression sub-loss                                          |
+| class_loss_weight     | float                                             | 1.0           | Weight used for the classification sub-loss.                                        |
+| iou_loss_weight       | float                                             | 2.5           | Weight used for the IoU sub-loss.                                                   |
+| regr_kpts_loss_weight | float                                             | 1.5           | Weight used for the OKS sub-loss.                                                   |
+| vis_kpts_loss_weight  | float                                             | 1.0           | Weight used for the keypoint visibility sub-loss.                                   |
diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py
index 737373d2..28585504 100644
--- a/luxonis_train/attached_modules/losses/__init__.py
+++ b/luxonis_train/attached_modules/losses/__init__.py
@@ -2,6 +2,7 @@
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
 from .cross_entropy import CrossEntropyLoss
+from .efficient_keypoint_bbox_loss import EfficientKeypointBBoxLoss
 from .implicit_keypoint_bbox_loss import ImplicitKeypointBBoxLoss
 from .keypoint_loss import KeypointLoss
 from .sigmoid_focal_loss import SigmoidFocalLoss
@@ -12,6 +13,7 @@
     "AdaptiveDetectionLoss",
     "BCEWithLogitsLoss",
     "CrossEntropyLoss",
+    "EfficientKeypointBBoxLoss",
     "ImplicitKeypointBBoxLoss",
     "KeypointLoss",
     "BaseLoss",
diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index 21291bfa..83660463 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -100,7 +100,6 @@ def prepare(
         feats = outputs["features"]
         pred_scores = outputs["class_scores"][0]
         pred_distri = outputs["distributions"][0]
-
         batch_size = pred_scores.shape[0]
         device = pred_scores.device
 
@@ -142,6 +141,7 @@ def prepare(
                 assigned_bboxes,
                 assigned_scores,
                 mask_positive,
+                _,
             ) = self.atts_assigner(
                 anchors,
                 n_anchors_list,
@@ -157,7 +157,8 @@ def prepare(
                 assigned_bboxes,
                 assigned_scores,
                 mask_positive,
-            ) = self.tal_assigner.forward(
+                _,
+            ) = self.tal_assigner(
                 pred_scores.detach(),
                 pred_bboxes.detach() * stride_tensor,
                 anchor_points,
diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
new file mode 100644
index 00000000..4fc2a7c0
--- /dev/null
+++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
@@ -0,0 +1,391 @@
+from typing import Literal, cast
+
+import torch
+import torch.nn.functional as F
+from pydantic import Field
+from torch import Tensor, nn
+from torchvision.ops import box_convert
+from typing_extensions import Annotated
+
+from luxonis_train.attached_modules.metrics.object_keypoint_similarity import (
+    get_area_factor,
+    get_sigmas,
+)
+from luxonis_train.nodes import EfficientKeypointBBoxHead
+from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner
+from luxonis_train.utils.boxutils import (
+    IoUType,
+    anchors_for_fpn_features,
+    compute_iou_loss,
+    dist2bbox,
+)
+from luxonis_train.utils.types import (
+    BaseProtocol,
+    IncompatibleException,
+    Labels,
+    LabelType,
+    Packet,
+)
+
+from .base_loss import BaseLoss
+from .bce_with_logits import BCEWithLogitsLoss
+
+
+class Protocol(BaseProtocol):
+    features: list[Tensor]
+    class_scores: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
+    distributions: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
+
+
+class EfficientKeypointBBoxLoss(
+    BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]
+):
+    node: EfficientKeypointBBoxHead
+
+    class NodePacket(Packet[Tensor]):
+        features: list[Tensor]
+        class_scores: Tensor
+        distributions: Tensor
+
+    def __init__(
+        self,
+        n_warmup_epochs: int = 4,
+        iou_type: IoUType = "giou",
+        reduction: Literal["sum", "mean"] = "mean",
+        class_bbox_loss_weight: float = 1.0,
+        iou_loss_weight: float = 2.5,
+        viz_pw: float = 1.0,
+        regr_kpts_loss_weight: float = 1.5,
+        vis_kpts_loss_weight: float = 1.0,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
+        **kwargs,
+    ):
+        """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications
+        <https://arxiv.org/pdf/2209.02976.pdf>}. It combines IoU based bbox regression loss and varifocal loss
+        for classification.
+        Code is adapted from U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models}.
+
+        @type n_warmup_epochs: int
+        @param n_warmup_epochs: Number of epochs where ATSS assigner is used, after that we switch to TAL assigner.
+        @type iou_type: L{IoUType}
+        @param iou_type: IoU type used for bbox regression loss.
+        @type reduction: Literal["sum", "mean"]
+        @param reduction: Reduction type for loss.
+        @type class_bbox_loss_weight: float
+        @param class_bbox_loss_weight: Weight of classification loss for bounding boxes.
+        @type regr_kpts_loss_weight: float
+        @param regr_kpts_loss_weight: Weight of regression loss for keypoints.
+        @type vis_kpts_loss_weight: float
+        @param vis_kpts_loss_weight: Weight of visibility loss for keypoints.
+        @type iou_loss_weight: float
+        @param iou_loss_weight: Weight of IoU loss.
+        @type sigmas: list[float] | None
+        @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}.
+        @type kwargs: dict
+        @param kwargs: Additional arguments to pass to L{BaseLoss}.
+        """
+        super().__init__(
+            required_labels=[LabelType.BOUNDINGBOX], protocol=Protocol, **kwargs
+        )
+
+        if not isinstance(self.node, EfficientKeypointBBoxHead):
+            raise IncompatibleException(
+                f"Loss `{self.__class__.__name__}` is only "
+                "compatible with nodes of type `EfficientKeypointBBoxHead`."
+            )
+        self.iou_type: IoUType = iou_type
+        self.reduction = reduction
+        self.n_classes = self.node.n_classes
+        self.stride = self.node.stride
+        self.grid_cell_size = self.node.grid_cell_size
+        self.grid_cell_offset = self.node.grid_cell_offset
+        self.original_img_size = self.node.original_in_shape[1:]
+        self.n_heads = self.node.n_heads
+        self.n_kps = self.node.n_keypoints
+
+        self.b_cross_entropy = BCEWithLogitsLoss(
+            pos_weight=torch.tensor([viz_pw]), **kwargs
+        )
+        self.sigmas = get_sigmas(
+            sigmas=sigmas, n_keypoints=self.n_kps, class_name=self.__class__.__name__
+        )
+        self.area_factor = get_area_factor(
+            area_factor, class_name=self.__class__.__name__
+        )
+
+        self.n_warmup_epochs = n_warmup_epochs
+        self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes)
+        self.tal_assigner = TaskAlignedAssigner(
+            topk=13, n_classes=self.n_classes, alpha=1.0, beta=6.0
+        )
+
+        self.varifocal_loss = VarifocalLoss()
+        self.class_bbox_loss_weight = class_bbox_loss_weight
+        self.iou_loss_weight = iou_loss_weight
+        self.regr_kpts_loss_weight = regr_kpts_loss_weight
+        self.vis_kpts_loss_weight = vis_kpts_loss_weight
+
+    def prepare(
+        self, outputs: Packet[Tensor], labels: Labels
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+        feats = outputs["features"]
+        pred_scores = outputs["class_scores"][0]
+        pred_distri = outputs["distributions"][0]
+        pred_kpts = outputs["keypoints_raw"][0]
+
+        batch_size = pred_scores.shape[0]
+        device = pred_scores.device
+
+        target_bbox = labels["boundingbox"][0].to(device)
+        target_kpts = labels["keypoints"][0].to(device)
+        n_kpts = (target_kpts.shape[1] - 2) // 3
+
+        gt_bboxes_scale = torch.tensor(
+            [
+                self.original_img_size[1],
+                self.original_img_size[0],
+                self.original_img_size[1],
+                self.original_img_size[0],
+            ],
+            device=device,
+        )
+        gt_kpts_scale = torch.tensor(
+            [
+                self.original_img_size[1],
+                self.original_img_size[0],
+            ],
+            device=device,
+        )
+        (
+            anchors,
+            anchor_points,
+            n_anchors_list,
+            stride_tensor,
+        ) = anchors_for_fpn_features(
+            feats,
+            self.stride,
+            self.grid_cell_size,
+            self.grid_cell_offset,
+            multiply_with_stride=True,
+        )
+
+        anchor_points_strided = anchor_points / stride_tensor
+        pred_bboxes = dist2bbox(pred_distri, anchor_points_strided)
+        pred_kpts = self.dist2kpts_noscale(
+            anchor_points_strided, pred_kpts.view(batch_size, -1, n_kpts, 3)
+        )
+
+        target_bbox = self._preprocess_bbox_target(
+            target_bbox, batch_size, gt_bboxes_scale
+        )
+
+        gt_bbox_labels = target_bbox[:, :, :1]
+        gt_xyxy = target_bbox[:, :, 1:]
+        mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float()
+
+        if self._epoch < self.n_warmup_epochs:
+            (
+                assigned_labels,
+                assigned_bboxes,
+                assigned_scores,
+                mask_positive,
+                assigned_gt_idx,
+            ) = self.atts_assigner(
+                anchors,
+                n_anchors_list,
+                gt_bbox_labels,
+                gt_xyxy,
+                mask_gt,
+                pred_bboxes.detach() * stride_tensor,
+            )
+        else:
+            (
+                assigned_labels,
+                assigned_bboxes,
+                assigned_scores,
+                mask_positive,
+                assigned_gt_idx,
+            ) = self.tal_assigner(
+                pred_scores.detach(),
+                pred_bboxes.detach() * stride_tensor,
+                anchor_points,
+                gt_bbox_labels,
+                gt_xyxy,
+                mask_gt,
+            )
+
+        batched_kpts = self._preprocess_kpts_target(
+            target_kpts, batch_size, gt_kpts_scale
+        )
+        assigned_gt_idx_expanded = assigned_gt_idx.unsqueeze(-1).unsqueeze(-1)
+        selected_keypoints = batched_kpts.gather(
+            1, assigned_gt_idx_expanded.expand(-1, -1, self.n_kps, 3)
+        )
+        xy_components = selected_keypoints[:, :, :, :2]
+        normalized_xy = xy_components / stride_tensor.view(1, -1, 1, 1)
+        selected_keypoints = torch.cat(
+            (normalized_xy, selected_keypoints[:, :, :, 2:]), dim=-1
+        )
+        gt_kpt = selected_keypoints[mask_positive]
+        pred_kpts = pred_kpts[mask_positive]
+        assigned_bboxes = assigned_bboxes / stride_tensor
+
+        area = (
+            assigned_bboxes[mask_positive][:, 0] - assigned_bboxes[mask_positive][:, 2]
+        ) * (
+            assigned_bboxes[mask_positive][:, 1] - assigned_bboxes[mask_positive][:, 3]
+        )
+
+        return (
+            pred_bboxes,
+            pred_scores,
+            assigned_bboxes,
+            assigned_labels,
+            assigned_scores,
+            mask_positive,
+            gt_kpt,
+            pred_kpts,
+            area * self.area_factor,
+        )
+
+    def forward(
+        self,
+        pred_bboxes: Tensor,
+        pred_scores: Tensor,
+        assigned_bboxes: Tensor,
+        assigned_labels: Tensor,
+        assigned_scores: Tensor,
+        mask_positive: Tensor,
+        gt_kpts: Tensor,
+        pred_kpts: Tensor,
+        area: Tensor,
+    ):
+        device = pred_bboxes.device
+        sigmas = self.sigmas.to(device)
+        d = (gt_kpts[..., 0] - pred_kpts[..., 0]).pow(2) + (
+            gt_kpts[..., 1] - pred_kpts[..., 1]
+        ).pow(2)
+        e = d / ((2 * sigmas).pow(2) * ((area.view(-1, 1) + 1e-9) * 2))
+        mask = (gt_kpts[..., 2] > 0).float()
+        regression_loss = (
+            ((1 - torch.exp(-e)) * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
+        ).mean()
+        visibility_loss = self.b_cross_entropy.forward(pred_kpts[..., 2], mask)
+
+        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1]
+        loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label)
+
+        if assigned_scores.sum() > 1:
+            loss_cls /= assigned_scores.sum()
+
+        loss_iou = compute_iou_loss(
+            pred_bboxes,
+            assigned_bboxes,
+            assigned_scores,
+            mask_positive,
+            reduction="sum",
+            iou_type=self.iou_type,
+            bbox_format="xyxy",
+        )[0]
+
+        loss = (
+            self.class_bbox_loss_weight * loss_cls
+            + self.iou_loss_weight * loss_iou
+            + regression_loss * self.regr_kpts_loss_weight
+            + visibility_loss * self.vis_kpts_loss_weight
+        )
+
+        sub_losses = {
+            "class": loss_cls.detach(),
+            "iou": loss_iou.detach(),
+            "regression": regression_loss.detach(),
+            "visibility": visibility_loss.detach(),
+        }
+
+        return loss, sub_losses
+
+    def _preprocess_bbox_target(
+        self, bbox_target: Tensor, batch_size: int, scale_tensor: Tensor
+    ) -> Tensor:
+        """Preprocess target bboxes in shape [batch_size, N, 5] where N is maximum
+        number of instances in one image."""
+        sample_ids, counts = cast(
+            tuple[Tensor, Tensor],
+            torch.unique(bbox_target[:, 0].int(), return_counts=True),
+        )
+        c_max = int(counts.max()) if counts.numel() > 0 else 0
+        out_target = torch.zeros(batch_size, c_max, 5, device=bbox_target.device)
+        out_target[:, :, 0] = -1
+        for id, count in zip(sample_ids, counts):
+            out_target[id, :count] = bbox_target[bbox_target[:, 0] == id][:, 1:]
+
+        scaled_target = out_target[:, :, 1:5] * scale_tensor
+        out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
+        return out_target
+
+    def _preprocess_kpts_target(
+        self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor
+    ) -> Tensor:
+        """Preprocesses the target keypoints in shape [batch_size, N, n_keypoints, 3]
+        where N is the maximum number of keypoints in one image."""
+
+        _, counts = torch.unique(kpts_target[:, 0].int(), return_counts=True)
+        max_kpts = int(counts.max()) if counts.numel() > 0 else 0
+        batched_keypoints = torch.zeros(
+            (batch_size, max_kpts, self.n_kps, 3), device=kpts_target.device
+        )
+        for i in range(batch_size):
+            keypoints_i = kpts_target[kpts_target[:, 0] == i]
+            scaled_keypoints_i = keypoints_i[:, 2:].clone()
+            batched_keypoints[i, : keypoints_i.shape[0]] = scaled_keypoints_i.view(
+                -1, self.n_kps, 3
+            )
+            batched_keypoints[i, :, :, :2] *= scale_tensor[:2]
+
+        return batched_keypoints
+
+    def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor:
+        """Adjusts and scales predicted keypoints relative to anchor points without
+        considering image stride."""
+        adj_kpts = kpts.clone()
+        scale = 2.0
+        x_adj = anchor_points[:, [0]] - 0.5
+        y_adj = anchor_points[:, [1]] - 0.5
+
+        adj_kpts[..., :2] *= scale
+        adj_kpts[..., 0] += x_adj
+        adj_kpts[..., 1] += y_adj
+        return adj_kpts
+
+
+class VarifocalLoss(nn.Module):
+    def __init__(self, alpha: float = 0.75, gamma: float = 2.0):
+        """Varifocal Loss is a loss function for training a dense object detector to predict
+        the IoU-aware classification score, inspired by focal loss.
+        Code is adapted from: U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models/losses.py}
+
+        @type alpha: float
+        @param alpha: alpha parameter in focal loss, default is 0.75.
+        @type gamma: float
+        @param gamma: gamma parameter in focal loss, default is 2.0.
+        """
+
+        super().__init__()
+
+        self.alpha = alpha
+        self.gamma = gamma
+
+    def forward(
+        self, pred_score: Tensor, target_score: Tensor, label: Tensor
+    ) -> Tensor:
+        weight = (
+            self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label
+        )
+        ce_loss = F.binary_cross_entropy(
+            pred_score.float(), target_score.float(), reduction="none"
+        )
+        loss = (ce_loss * weight).sum()
+        return loss
diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
index 555d0d30..ff530b2a 100644
--- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
@@ -45,8 +45,10 @@ def __init__(
         label_smoothing: float = 0.0,
         min_objectness_iou: float = 0.0,
         bbox_loss_weight: float = 0.05,
-        keypoint_distance_loss_weight: float = 0.10,
         keypoint_visibility_loss_weight: float = 0.6,
+        keypoint_regression_loss_weight: float = 0.5,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
         class_loss_weight: float = 0.6,
         objectness_loss_weight: float = 0.7,
         anchor_threshold: float = 4.0,
@@ -72,10 +74,14 @@ def __init__(
         @param min_objectness_iou: Minimum objectness iou. Defaults to C{0.0}.
         @type bbox_loss_weight: float
         @param bbox_loss_weight: Weight for the bounding box loss.
-        @type keypoint_distance_loss_weight: float
-        @param keypoint_distance_loss_weight: Weight for the keypoint distance loss. Defaults to C{0.10}.
         @type keypoint_visibility_loss_weight: float
         @param keypoint_visibility_loss_weight: Weight for the keypoint visibility loss. Defaults to C{0.6}.
+        @type keypoint_regression_loss_weight: float
+        @param keypoint_regression_loss_weight: Weight for the keypoint regression loss. Defaults to C{0.5}.
+        @type sigmas: list[float] | None
+        @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}.
         @type class_loss_weight: float
         @param class_loss_weight: Weight for the class loss. Defaults to C{0.6}.
         @type objectness_loss_weight: float
@@ -117,10 +123,10 @@ class Protocol(BaseProtocol):
 
         self.min_objectness_iou = min_objectness_iou
         self.bbox_weight = bbox_loss_weight
-        self.kpt_distance_weight = keypoint_distance_loss_weight
         self.class_weight = class_loss_weight
         self.objectness_weight = objectness_loss_weight
         self.kpt_visibility_weight = keypoint_visibility_loss_weight
+        self.keypoint_regression_loss_weight = keypoint_regression_loss_weight
         self.anchor_threshold = anchor_threshold
 
         self.bias = bias
@@ -134,9 +140,10 @@ class Protocol(BaseProtocol):
             **kwargs,
         )
         self.keypoint_loss = KeypointLoss(
+            n_keypoints=self.n_keypoints,
             bce_power=viz_pw,
-            distance_weight=keypoint_distance_loss_weight,
-            visibility_weight=keypoint_visibility_loss_weight,
+            sigmas=sigmas,
+            area_factor=area_factor,
             **kwargs,
         )
 
@@ -169,13 +176,15 @@ def prepare(
         boxes = labels["boundingbox"][0]
 
         nkpts = (kpts.shape[1] - 2) // 3
-        targets = torch.zeros((len(boxes), nkpts * 2 + self.box_offset + 1))
+        targets = torch.zeros((len(boxes), nkpts * 3 + self.box_offset + 1))
         targets[:, :2] = boxes[:, :2]
         targets[:, 2 : self.box_offset + 1] = box_convert(
             boxes[:, 2:], "xywh", "cxcywh"
         )
-        targets[:, self.box_offset + 1 :: 2] = kpts[:, 2::3]  # insert kp x coordinates
-        targets[:, self.box_offset + 2 :: 2] = kpts[:, 3::3]  # insert kp y coordinates
+
+        targets[:, self.box_offset + 1 :: 3] = kpts[:, 2::3]  # insert kp x coordinates
+        targets[:, self.box_offset + 2 :: 3] = kpts[:, 3::3]  # insert kp y coordinates
+        targets[:, self.box_offset + 3 :: 3] = kpts[:, 4::3]  # insert kp visibility
 
         n_targets = len(targets)
 
@@ -203,7 +212,6 @@ def prepare(
         for i in range(self.num_heads):
             anchor = self.anchors[i]
             feature_height, feature_width = predictions[i].shape[2:4]
-
             scaled_targets, xy_shifts = match_to_anchor(
                 targets,
                 anchor,
@@ -259,7 +267,7 @@ def forward(
             "objectness": torch.tensor(0.0, device=device),
             "class": torch.tensor(0.0, device=device),
             "kpt_visibility": torch.tensor(0.0, device=device),
-            "kpt_distance": torch.tensor(0.0, device=device),
+            "kpt_regression": torch.tensor(0.0, device=device),
         }
 
         for pred, class_target, box_target, kpt_target, index, anchor, balance in zip(
@@ -284,13 +292,16 @@ def forward(
 
                 sub_losses["bboxes"] += bbox_loss * self.bbox_weight
 
+                area = box_target[:, 2] * box_target[:, 3]
+
                 _, kpt_sublosses = self.keypoint_loss.forward(
                     pred_subset[:, self.box_offset + self.n_classes :],
                     kpt_target.to(device),
+                    area.to(device),
                 )
 
-                sub_losses["kpt_distance"] += (
-                    kpt_sublosses["distance"] * self.kpt_distance_weight
+                sub_losses["kpt_regression"] += (
+                    kpt_sublosses["regression"] * self.keypoint_regression_loss_weight
                 )
                 sub_losses["kpt_visibility"] += (
                     kpt_sublosses["visibility"] * self.kpt_visibility_weight
@@ -326,8 +337,14 @@ def forward(
     def _create_keypoint_target(self, scaled_targets: Tensor, box_xy_deltas: Tensor):
         keypoint_target = scaled_targets[:, self.box_offset + 1 : -1]
         for j in range(self.n_keypoints):
-            low = 2 * j
-            high = 2 * (j + 1)
-            keypoint_mask = keypoint_target[:, low:high] != 0
-            keypoint_target[:, low:high][keypoint_mask] -= box_xy_deltas[keypoint_mask]
+            idx = 3 * j
+            keypoint_coords = keypoint_target[:, idx : idx + 2]
+            visibility = keypoint_target[:, idx + 2]
+
+            keypoint_mask = visibility != 0
+            keypoint_coords[keypoint_mask] -= box_xy_deltas[keypoint_mask]
+
+            keypoint_target[:, idx : idx + 2] = keypoint_coords
+            keypoint_target[:, idx + 2] = visibility
+
         return keypoint_target
diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py
index b1ddd8ba..8a5640cb 100644
--- a/luxonis_train/attached_modules/losses/keypoint_loss.py
+++ b/luxonis_train/attached_modules/losses/keypoint_loss.py
@@ -4,6 +4,10 @@
 from pydantic import Field
 from torch import Tensor
 
+from luxonis_train.attached_modules.metrics.object_keypoint_similarity import (
+    get_area_factor,
+    get_sigmas,
+)
 from luxonis_train.utils.boxutils import process_keypoints_predictions
 from luxonis_train.utils.types import (
     BaseProtocol,
@@ -23,25 +27,44 @@ class Protocol(BaseProtocol):
 class KeypointLoss(BaseLoss[Tensor, Tensor]):
     def __init__(
         self,
+        n_keypoints: int,
         bce_power: float = 1.0,
-        distance_weight: float = 0.1,
-        visibility_weight: float = 0.6,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
         **kwargs,
     ):
+        """Keypoint based loss that is computed from OKS-based regression and visibility
+        loss.
+
+        @type n_keypoints: int
+        @param n_keypoints: Number of keypoints.
+        @type bce_power: float
+        @param bce_power: Power used for BCE visibility loss. Defaults to C{1.0}.
+        @param sigmas: Sigmas used for OKS. If None then use COCO ones if possible or
+            default ones. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area. If None then use
+            default one. Defaults to C{None}.
+        """
+
         super().__init__(
             protocol=Protocol, required_labels=[LabelType.KEYPOINTS], **kwargs
         )
         self.b_cross_entropy = BCEWithLogitsLoss(
             pos_weight=torch.tensor([bce_power]), **kwargs
         )
-        self.distance_weight = distance_weight
-        self.visibility_weight = visibility_weight
+        self.sigmas = get_sigmas(
+            sigmas=sigmas, n_keypoints=n_keypoints, class_name=self.__class__.__name__
+        )
+        self.area_factor = get_area_factor(
+            area_factor, class_name=self.__class__.__name__
+        )
 
     def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Tensor, Tensor]:
-        return torch.cat(inputs["keypoints"], dim=0), labels[LabelType.KEYPOINTS]
+        return torch.cat(inputs["keypoints"], dim=0), self.get_label(labels)[0]
 
     def forward(
-        self, prediction: Tensor, target: Tensor
+        self, prediction: Tensor, target: Tensor, area: Tensor
     ) -> tuple[Tensor, dict[str, Tensor]]:
         """Computes the keypoint loss and visibility loss for a given prediction and
         target.
@@ -49,29 +72,36 @@ def forward(
         @type prediction: Tensor
         @param prediction: Predicted tensor of shape C{[n_detections, n_keypoints * 3]}.
         @type target: Tensor
-        @param target: Target tensor of shape C{[n_detections, n_keypoints * 2]}.
-        @rtype: tuple[Tensor, Tensor]
-        @return: A tuple containing the keypoint loss tensor of shape C{[1,]} and the
-            visibility loss tensor of shape C{[1,]}.
+        @param target: Target tensor of shape C{[n_detections, n_keypoints * 3]}.
+        @type area: Tensor
+        @param area: Area tensor of shape C{[n_detections]}.
+        @rtype: tuple[Tensor, dict[str, Tensor]]
+        @return: A tuple containing the total loss tensor of shape C{[1,]} and a
+            dictionary with the regression loss and visibility loss tensors.
         """
-        x, y, visibility_score = process_keypoints_predictions(prediction)
-        gt_x = target[:, 0::2]
-        gt_y = target[:, 1::2]
-
-        mask = target[:, 0::2] != 0
-        visibility_loss = (
-            self.b_cross_entropy.forward(visibility_score, mask.float())
-            * self.visibility_weight
-        )
-        distance = (x - gt_x) ** 2 + (y - gt_y) ** 2
+        device = prediction.device
+        sigmas = self.sigmas.to(device)
 
-        loss_factor = (torch.sum(mask != 0) + torch.sum(mask == 0)) / (
-            torch.sum(mask != 0) + 1e-9
-        )
-        distance_loss = (
-            loss_factor
-            * (torch.log(distance + 1 + 1e-9) * mask).mean()
-            * self.distance_weight
+        pred_x, pred_y, pred_v = process_keypoints_predictions(prediction)
+        gt_x = target[:, 0::3]
+        gt_y = target[:, 1::3]
+        gt_v = (target[:, 2::3] > 0).float()
+
+        visibility_loss = self.b_cross_entropy.forward(pred_v, gt_v)
+        scales = area * self.area_factor
+
+        d = (gt_x - pred_x) ** 2 + (gt_y - pred_y) ** 2
+        e = d / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2
+
+        regression_loss_unreduced = 1 - torch.exp(-e)
+        regression_loss_reduced = (regression_loss_unreduced * gt_v).sum(dim=1) / (
+            gt_v.sum(dim=1) + 1e-9
         )
-        loss = distance_loss + visibility_loss
-        return loss, {"distance": distance_loss, "visibility": visibility_loss}
+        regression_loss = regression_loss_reduced.mean()
+
+        total_loss = regression_loss + visibility_loss
+
+        return total_loss, {
+            "regression": regression_loss,
+            "visibility": visibility_loss,
+        }
diff --git a/luxonis_train/attached_modules/metrics/README.md b/luxonis_train/attached_modules/metrics/README.md
index 4e452158..17735540 100644
--- a/luxonis_train/attached_modules/metrics/README.md
+++ b/luxonis_train/attached_modules/metrics/README.md
@@ -42,3 +42,5 @@ boxes.
 ## MeanAveragePrecisionKeypoints
 
 Similar to [MeanAveragePrecision](#meanaverageprecision), but uses [OKS](#objectkeypointsimilarity) as `IoU` measure.
+For a deeper understanding of how OKS works, please refer to the detailed explanation provided [here](https://learnopencv.com/object-keypoint-similarity/).
+Evaluation leverages  COCO evaluation framework (COCOeval) to assess mAP performance.
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index 67c010ec..c3eaad7e 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -38,7 +38,9 @@ def update(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        label = labels[self.task][0]
+        label = labels["boundingbox"][
+            0
+        ]  # TODO: Think of a better way to deal with multi-task heads
         output_nms = self.get_input_tensors(outputs)
 
         image_size = self.node.original_in_shape[1:]
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
index 31bc7557..27df8102 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
@@ -8,6 +8,10 @@
 from torch import Tensor
 from torchvision.ops import box_convert
 
+from luxonis_train.attached_modules.metrics.object_keypoint_similarity import (
+    get_area_factor,
+    get_sigmas,
+)
 from luxonis_train.utils.types import (
     BBoxProtocol,
     KeypointProtocol,
@@ -46,7 +50,9 @@ class MeanAveragePrecisionKeypoints(BaseMetric):
 
     def __init__(
         self,
-        kpt_sigmas: Tensor | None = None,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
+        max_dets: int = 20,
         box_format: Literal["xyxy", "xywh", "cxcywh"] = "xyxy",
         **kwargs,
     ):
@@ -59,8 +65,13 @@ def __init__(
 
         @type num_keypoints: int
         @param num_keypoints: Number of keypoints.
-        @type kpt_sigmas: Tensor or None
-        @param kpt_sigmas: Sigma for each keypoint to weigh its importance, if None use same weights for all.
+        @type sigmas: list[float] | None
+        @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then
+            use COCO if possible otherwise defaults. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area. If None then use default one. Defaults to C{None}.
+        @type max_dets: int,
+        @param max_dets: Maximum number of detections to be considered per image. Defaults to C{20}.
         @type box_format: Literal["xyxy", "xywh", "cxcywh"]
         @param box_format: Input bbox format.
         @type kwargs: Any
@@ -74,9 +85,9 @@ def __init__(
 
         self.n_keypoints = self.node.n_keypoints
 
-        if kpt_sigmas is not None and len(kpt_sigmas) != self.n_keypoints:
-            raise ValueError("Expected kpt_sigmas to be of shape (num_keypoints).")
-        self.kpt_sigmas = kpt_sigmas or torch.ones(self.n_keypoints)
+        self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.__class__.__name__)
+        self.area_factor = get_area_factor(area_factor, self.__class__.__name__)
+        self.max_dets = max_dets
 
         allowed_box_formats = ("xyxy", "xywh", "cxcywh")
         if box_format not in allowed_box_formats:
@@ -214,7 +225,7 @@ def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
         coco_preds.dataset = self._get_coco_format(
             self.pred_boxes,
             self.pred_keypoints,
-            self.groundtruth_labels,
+            self.pred_labels,
             scores=self.pred_scores,
         )  # type: ignore
 
@@ -223,7 +234,8 @@ def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
             coco_preds.createIndex()
 
             self.coco_eval = COCOeval(coco_target, coco_preds, iouType="keypoints")
-            self.coco_eval.params.kpt_oks_sigmas = self.kpt_sigmas.cpu().numpy()
+            self.coco_eval.params.kpt_oks_sigmas = self.sigmas.cpu().numpy()
+            self.coco_eval.params.maxDets = [self.max_dets]
 
             self.coco_eval.evaluate()
             self.coco_eval.accumulate()
@@ -293,19 +305,22 @@ def _get_coco_format(
                 if area is not None and area[image_id][k].cpu().item() > 0:
                     area_stat = area[image_id][k].cpu().tolist()
                 else:
-                    area_stat = image_box[2] * image_box[3]
+                    area_stat = image_box[2] * image_box[3] * self.area_factor
 
+                num_keypoints = len(
+                    [i for i in range(2, len(image_kpt), 3) if image_kpt[i] != 0]
+                )  # number of annotated keypoints
                 annotation = {
                     "id": annotation_id,
                     "image_id": image_id,
                     "bbox": image_box,
                     "area": area_stat,
                     "category_id": image_label,
-                    "iscrowd": crowds[image_id][k].cpu().tolist()
-                    if crowds is not None
-                    else 0,
+                    "iscrowd": (
+                        crowds[image_id][k].cpu().tolist() if crowds is not None else 0
+                    ),
                     "keypoints": image_kpt,
-                    "num_keypoints": self.n_keypoints,
+                    "num_keypoints": num_keypoints,
                 }
 
                 if scores is not None:
diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
index c1768012..cfbae11f 100644
--- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
+++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
@@ -1,3 +1,5 @@
+import logging
+
 import torch
 from scipy.optimize import linear_sum_assignment
 from torch import Tensor
@@ -12,22 +14,12 @@
 
 from .base_metric import BaseMetric
 
+logger = logging.getLogger(__name__)
+
 
 class ObjectKeypointSimilarity(
     BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]]
 ):
-    """Object Keypoint Similarity metric for evaluating keypoint predictions.
-
-    @type n_keypoints: int
-    @param n_keypoints: Number of keypoints.
-    @type kpt_sigmas: Tensor
-    @param kpt_sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then
-        use same weights for all.
-    @type use_cocoeval_oks: bool
-    @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use the
-        one from definition.
-    """
-
     is_differentiable: bool = False
     higher_is_better: bool = True
     full_state_update: bool = True
@@ -41,10 +33,25 @@ class ObjectKeypointSimilarity(
     def __init__(
         self,
         n_keypoints: int | None = None,
-        kpt_sigmas: Tensor | None = None,
-        use_cocoeval_oks: bool = False,
+        sigmas: list[float] | None = None,
+        area_factor: float | None = None,
+        use_cocoeval_oks: bool = True,
         **kwargs,
     ) -> None:
+        """Object Keypoint Similarity metric for evaluating keypoint predictions.
+
+        @type n_keypoints: int
+        @param n_keypoints: Number of keypoints.
+        @type sigmas: list[float] | None
+        @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then
+            use COCO if possible otherwise defaults. Defaults to C{None}.
+        @type area_factor: float | None
+        @param area_factor: Factor by which we multiply bbox area. If None then use
+            default one. Defaults to C{None}.
+        @type use_cocoeval_oks: bool
+        @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use
+            the one from definition. Defaults to C{True}.
+        """
         super().__init__(
             required_labels=[LabelType.KEYPOINTS], protocol=KeypointProtocol, **kwargs
         )
@@ -55,9 +62,9 @@ def __init__(
                 f"to {self.__class__.__name__}."
             )
         self.n_keypoints = n_keypoints or self.node.n_keypoints
-        if kpt_sigmas is not None and len(kpt_sigmas) != self.n_keypoints:
-            raise ValueError("Expected kpt_sigmas to be of shape (num_keypoints).")
-        self.kpt_sigmas = kpt_sigmas or torch.ones(self.n_keypoints) / self.n_keypoints
+
+        self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.__class__.__name__)
+        self.area_factor = get_area_factor(area_factor, self.__class__.__name__)
         self.use_cocoeval_oks = use_cocoeval_oks
 
         self.add_state("pred_keypoints", default=[], dist_reduce_fx=None)
@@ -93,7 +100,7 @@ def prepare(
             curr_kpts[:, 1::3] *= image_size[0]
             curr_bboxs_widths = curr_bboxs[:, 2] - curr_bboxs[:, 0]
             curr_bboxs_heights = curr_bboxs[:, 3] - curr_bboxs[:, 1]
-            curr_scales = torch.sqrt(curr_bboxs_widths * curr_bboxs_heights)
+            curr_scales = curr_bboxs_widths * curr_bboxs_heights * self.area_factor
             label_list_oks.append({"keypoints": curr_kpts, "scales": curr_scales})
 
         return output_list_oks, label_list_oks
@@ -136,7 +143,7 @@ def update(
     def compute(self) -> Tensor:
         """Computes the OKS metric based on the inner state."""
 
-        self.kpt_sigmas = self.kpt_sigmas.to(self.device)
+        self.sigmas = self.sigmas.to(self.device)
         image_mean_oks = torch.zeros(len(self.groundtruth_keypoints))
         for i, (pred_kpts, gt_kpts, gt_scales) in enumerate(
             zip(
@@ -145,7 +152,13 @@ def compute(self) -> Tensor:
         ):
             gt_kpts = torch.reshape(gt_kpts, (-1, self.n_keypoints, 3))  # [N, K, 3]
 
-            image_ious = self._compute_oks(pred_kpts, gt_kpts, gt_scales)  # [M, N]
+            image_ious = compute_oks(
+                pred_kpts,
+                gt_kpts,
+                gt_scales,
+                self.sigmas,
+                self.use_cocoeval_oks,
+            )  # [M, N]
             gt_indices, pred_indices = linear_sum_assignment(
                 image_ious.cpu().numpy(), maximize=True
             )
@@ -156,48 +169,115 @@ def compute(self) -> Tensor:
 
         return final_oks
 
-    def _compute_oks(self, pred: Tensor, gt: Tensor, scales: Tensor) -> Tensor:
-        """Compute Object Keypoint Similarity between every GT and prediction.
-
-        @type pred: Tensor[N, K, 3]
-        @param pred: Predicted keypoints.
-        @type gt: Tensor[M, K, 3]
-        @param gt: Groundtruth keypoints.
-        @type scales: Tensor[M]
-        @param scales: Scales of the bounding boxes.
-        @rtype: Tensor
-        @return: Object Keypoint Similarity every pred and gt [M, N]
-        """
-        eps = 1e-7
-        distances = (gt[:, None, :, 0] - pred[..., 0]) ** 2 + (
-            gt[:, None, :, 1] - pred[..., 1]
-        ) ** 2
-        kpt_mask = gt[..., 2] != 0  # only compute on visible keypoints
-        if self.use_cocoeval_oks:
-            # use same formula as in COCOEval script here:
-            # https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/cocoeval.py#L229
-            oks = (
-                distances
-                / (2 * self.kpt_sigmas) ** 2
-                / (scales[:, None, None] + eps)
-                / 2
-            )
-        else:
-            # use same formula as defined here: https://cocodataset.org/#keypoints-eval
-            oks = (
-                distances
-                / ((scales[:, None, None] + eps) * self.kpt_sigmas.to(scales.device))
-                ** 2
-                / 2
-            )
 
-        return (torch.exp(-oks) * kpt_mask[:, None]).sum(-1) / (
-            kpt_mask.sum(-1)[:, None] + eps
+def compute_oks(
+    pred: Tensor,
+    gt: Tensor,
+    scales: Tensor,
+    sigmas: Tensor,
+    use_cocoeval_oks: bool,
+) -> Tensor:
+    """Compute Object Keypoint Similarity between every GT and prediction.
+
+    @type pred: Tensor[N, K, 3]
+    @param pred: Predicted keypoints.
+    @type gt: Tensor[M, K, 3]
+    @param gt: Groundtruth keypoints.
+    @type scales: Tensor[M]
+    @param scales: Scales of the bounding boxes.
+    @type sigmas: Tensor
+    @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then use
+        same weights for all.
+    @type use_cocoeval_oks: bool
+    @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use the
+        one from definition.
+    @rtype: Tensor
+    @return: Object Keypoint Similarity every pred and gt [M, N]
+    """
+    eps = 1e-7
+    distances = (gt[:, None, :, 0] - pred[..., 0]) ** 2 + (
+        gt[:, None, :, 1] - pred[..., 1]
+    ) ** 2
+    kpt_mask = gt[..., 2] != 0  # only compute on visible keypoints
+    if use_cocoeval_oks:
+        # use same formula as in COCOEval script here:
+        # https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/cocoeval.py#L229
+        oks = distances / (2 * sigmas) ** 2 / (scales[:, None, None] + eps) / 2
+    else:
+        # use same formula as defined here: https://cocodataset.org/#keypoints-eval
+        oks = (
+            distances
+            / ((scales[:, None, None] + eps) * sigmas.to(scales.device)) ** 2
+            / 2
         )
 
+    return (torch.exp(-oks) * kpt_mask[:, None]).sum(-1) / (
+        kpt_mask.sum(-1)[:, None] + eps
+    )
+
 
 def fix_empty_tensors(input_tensor: Tensor) -> Tensor:
     """Empty tensors can cause problems in DDP mode, this methods corrects them."""
     if input_tensor.numel() == 0 and input_tensor.ndim == 1:
         return input_tensor.unsqueeze(0)
     return input_tensor
+
+
+def get_sigmas(
+    sigmas: list[float] | None, n_keypoints: int, class_name: str | None
+) -> Tensor:
+    """Validate and set the sigma values."""
+    if sigmas is not None:
+        if len(sigmas) == n_keypoints:
+            return torch.tensor(sigmas, dtype=torch.float32)
+        else:
+            error_msg = "The length of the sigmas list must be the same as the number of keypoints."
+            if class_name:
+                error_msg = f"[{class_name}] {error_msg}"
+            raise ValueError(error_msg)
+    else:
+        if n_keypoints == 17:
+            warn_msg = "Default COCO sigmas are being used."
+            if class_name:
+                warn_msg = f"[{class_name}] {warn_msg}"
+            logger.warning(warn_msg)
+            return torch.tensor(
+                [
+                    0.026,
+                    0.025,
+                    0.025,
+                    0.035,
+                    0.035,
+                    0.079,
+                    0.079,
+                    0.072,
+                    0.072,
+                    0.062,
+                    0.062,
+                    0.107,
+                    0.107,
+                    0.087,
+                    0.087,
+                    0.089,
+                    0.089,
+                ],
+                dtype=torch.float32,
+            )
+        else:
+            warn_msg = "Default sigma of 0.04 is being used for each keypoint."
+            if class_name:
+                warn_msg = f"[{class_name}] {warn_msg}"
+            logger.warning(warn_msg)
+            return torch.tensor([0.04] * n_keypoints, dtype=torch.float32)
+
+
+def get_area_factor(area_factor: float | None, class_name: str | None) -> float:
+    """Set the default area factor if not defined."""
+    if area_factor is None:
+        warn_msg = "Default area_factor of 0.53 is being used bbox area scaling."
+        if class_name:
+            warn_msg = f"[{class_name}] {warn_msg}"
+        logger.warning(warn_msg)
+        return 0.53
+    else:
+        return area_factor
diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
index 6594912f..18d45ece 100644
--- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
@@ -56,7 +56,7 @@ def draw_predictions(
     ) -> Tensor:
         viz = torch.zeros_like(canvas)
         for i in range(len(canvas)):
-            prediction = predictions[i][:, 1:]
+            prediction = predictions[i]
             mask = prediction[..., 2] < visibility_threshold
             visible_kpts = prediction[..., :2] * (~mask).unsqueeze(-1).float()
             viz[i] = draw_keypoints(
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index a42d2ec7..9e2b7c5a 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -289,7 +289,13 @@ def _get_head_specific_parameters(
             parameters["max_det"] = head_node.max_det
             parameters["n_keypoints"] = head_node.n_keypoints
             parameters["anchors"] = head_node.anchors.tolist()
-
+        elif head_name == "EfficientKeypointBBoxHead":
+            # or appropriate subtype
+            head_node = self.lightning_module._modules["nodes"][head_alias]
+            parameters["iou_threshold"] = head_node.iou_thres
+            parameters["conf_threshold"] = head_node.conf_thres
+            parameters["max_det"] = head_node.max_det
+            parameters["n_keypoints"] = head_node.n_keypoints
         else:
             raise ValueError("Unknown head name")
         return parameters
@@ -310,6 +316,8 @@ def _get_head_outputs(self, head_name) -> dict:
             head_outputs["predictions"] = self.outputs[0]["name"]
         elif head_name == "ImplicitKeypointBBoxHead":
             head_outputs["predictions"] = self.outputs[0]["name"]
+        elif head_name == "EfficientKeypointBBoxHead":
+            head_outputs["predictions"] = self.outputs[0]["name"]
         else:
             raise ValueError("Unknown head name")
         return head_outputs
diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md
index 637c5026..6a29d237 100644
--- a/luxonis_train/nodes/README.md
+++ b/luxonis_train/nodes/README.md
@@ -20,6 +20,7 @@ arbitrarily as long as the two nodes are compatible with each other.
 - [BiSeNetHead](#bisenethead)
 - [EfficientBBoxHead](#efficientbboxhead)
 - [ImplicitKeypointBBoxHead](#implicitkeypointbboxhead)
+- [EfficientKeypointBBoxHead](#efficientkeypointbboxhead)
 
 Every node takes these parameters:
 
@@ -193,3 +194,16 @@ Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf).
 | init_coco_biases | bool                        | True          | Whether to use COCO bias and weight initialization                                                         |
 | conf_thres       | float                       | 0.25          | confidence threshold for nms (used for evaluation)                                                         |
 | iou_thres        | float                       | 0.45          | iou threshold for nms (used for evaluation)                                                                |
+
+## EfficientKeypointBBoxHead
+
+Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf).
+
+**Params**
+
+| Key         | Type        | Default value | Description                                        |
+| ----------- | ----------- | ------------- | -------------------------------------------------- |
+| n_keypoints | int \| None | None          | Number of keypoints.                               |
+| n_heads     | int         | 3             | Number of output heads                             |
+| conf_thres  | float       | 0.25          | confidence threshold for nms (used for evaluation) |
+| iou_thres   | float       | 0.45          | iou threshold for nms (used for evaluation)        |
diff --git a/luxonis_train/nodes/__init__.py b/luxonis_train/nodes/__init__.py
index 9a506c1f..4c90abaa 100644
--- a/luxonis_train/nodes/__init__.py
+++ b/luxonis_train/nodes/__init__.py
@@ -3,6 +3,7 @@
 from .classification_head import ClassificationHead
 from .contextspatial import ContextSpatial
 from .efficient_bbox_head import EfficientBBoxHead
+from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead
 from .efficientnet import EfficientNet
 from .efficientrep import EfficientRep
 from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
@@ -22,6 +23,7 @@
     "EfficientBBoxHead",
     "EfficientNet",
     "EfficientRep",
+    "EfficientKeypointBBoxHead",
     "ImplicitKeypointBBoxHead",
     "BaseNode",
     "MicroNet",
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/efficient_bbox_head.py
index e7b23288..23728af1 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/efficient_bbox_head.py
@@ -50,7 +50,9 @@ def __init__(
         @type max_det: int
         @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
-        super().__init__(_task_type=LabelType.BOUNDINGBOX, **kwargs)
+        super().__init__(
+            _task_type=kwargs.pop("_task_type", LabelType.BOUNDINGBOX), **kwargs
+        )
 
         self.n_heads = n_heads
 
@@ -126,7 +128,7 @@ def _fit_stride_to_num_heads(self):
         """Returns correct stride for number of heads and attach index."""
         stride = torch.tensor(
             [
-                self.original_in_shape[1] / x[1]  # type: ignore
+                self.original_in_shape[1] / x[2]  # type: ignore
                 for x in self.in_sizes[: self.n_heads]
             ],
             dtype=torch.int,
diff --git a/luxonis_train/nodes/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/efficient_keypoint_bbox_head.py
new file mode 100644
index 00000000..dabb62c5
--- /dev/null
+++ b/luxonis_train/nodes/efficient_keypoint_bbox_head.py
@@ -0,0 +1,207 @@
+from typing import Literal
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.blocks import ConvModule
+from luxonis_train.utils.boxutils import (
+    anchors_for_fpn_features,
+    dist2bbox,
+    non_max_suppression,
+)
+from luxonis_train.utils.types import LabelType, Packet
+
+from .efficient_bbox_head import EfficientBBoxHead
+
+
+class EfficientKeypointBBoxHead(EfficientBBoxHead):
+    def __init__(
+        self,
+        n_keypoints: int | None = None,
+        n_heads: Literal[2, 3, 4] = 3,
+        conf_thres: float = 0.25,
+        iou_thres: float = 0.45,
+        max_det: int = 300,
+        **kwargs,
+    ):
+        """Head for object and keypoint detection.
+
+        Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
+        Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
+
+        @param n_keypoints: Number of keypoints. If not defined, inferred
+            from the dataset metadata (if provided). Defaults to C{None}.
+        @type n_keypoints: int | None
+
+        @param n_heads: Number of output heads. Defaults to C{3}.
+            B{Note:} Should be same also on neck in most cases.
+        @type n_heads: int
+
+        @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
+        @type conf_thres: float
+
+        @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+        @type iou_thres: float
+
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
+        @type max_det: int
+        """
+        super().__init__(
+            n_heads=n_heads,
+            conf_thres=conf_thres,
+            iou_thres=iou_thres,
+            max_det=max_det,
+            _task_type=LabelType.KEYPOINTS,
+            **kwargs,
+        )
+
+        n_keypoints = n_keypoints or self.dataset_metadata._n_keypoints
+
+        if n_keypoints is None:
+            raise ValueError(
+                "Number of keypoints must be specified either in the constructor or "
+                "in the dataset metadata."
+            )
+
+        self.n_keypoints = n_keypoints
+        self.nk = n_keypoints * 3
+
+        mid_ch = max(self.in_channels[0] // 4, self.nk)
+        self.kpt_layers = nn.ModuleList(
+            nn.Sequential(
+                ConvModule(x, mid_ch, 3, 1, 1, activation=nn.SiLU()),
+                ConvModule(mid_ch, mid_ch, 3, 1, 1, activation=nn.SiLU()),
+                nn.Conv2d(mid_ch, self.nk, 1, 1),
+            )
+            for x in self.in_channels
+        )
+
+    def forward(
+        self, inputs: list[Tensor]
+    ) -> tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]:
+        features, cls_score_list, reg_distri_list = super().forward(inputs)
+
+        _, self.anchor_points, _, self.stride_tensor = anchors_for_fpn_features(
+            features,
+            self.stride,
+            self.grid_cell_size,
+            self.grid_cell_offset,
+            multiply_with_stride=False,
+        )
+
+        kpt_list: list[Tensor] = []
+        for i in range(self.n_heads):
+            kpt_pred = self.kpt_layers[i](inputs[i])
+            kpt_list.append(kpt_pred)
+
+        return features, cls_score_list, reg_distri_list, kpt_list
+
+    def wrap(
+        self, output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]
+    ) -> Packet[Tensor]:
+        features, cls_score_list, reg_distri_list, kpt_list = output
+        bs = features[0].shape[0]
+        if self.export:
+            outputs = []
+            for out_cls, out_reg, out_kpts in zip(
+                cls_score_list, reg_distri_list, kpt_list, strict=True
+            ):
+                chunks = out_kpts.split(3, dim=1)
+                modified_chunks = []
+                for chunk in chunks:
+                    x = chunk[:, 0:1, :, :]
+                    y = chunk[:, 1:2, :, :]
+                    v = torch.sigmoid(chunk[:, 2:3, :, :])
+                    modified_chunk = torch.cat([x, y, v], dim=1)
+                    modified_chunks.append(modified_chunk)
+                out_kpts_modified = torch.cat(modified_chunks, dim=1)
+                out = torch.cat([out_reg, out_cls, out_kpts_modified], dim=1)
+                outputs.append(out)
+            return {"outputs": outputs}
+        cls_tensor = torch.cat(
+            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
+        ).permute(0, 2, 1)
+        reg_tensor = torch.cat(
+            [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2
+        ).permute(0, 2, 1)
+        kpt_tensor = torch.cat(
+            [
+                kpt_list[i].view(bs, self.nk, -1).flatten(2)
+                for i in range(len(kpt_list))
+            ],
+            dim=2,
+        ).permute(0, 2, 1)
+
+        if self.training:
+            return {
+                "features": features,
+                "class_scores": [cls_tensor],
+                "distributions": [reg_tensor],
+                "keypoints_raw": [kpt_tensor],
+            }
+
+        pred_kpt = self._dist2kpts(kpt_tensor)
+        detections = self._process_to_bbox_and_kps(
+            (features, cls_tensor, reg_tensor, pred_kpt)
+        )
+        return {
+            "boundingbox": [detection[:, :6] for detection in detections],
+            "features": features,
+            "class_scores": [cls_tensor],
+            "distributions": [reg_tensor],
+            "keypoints": [
+                detection[:, 6:].reshape(-1, self.n_keypoints, 3)
+                for detection in detections
+            ],
+            "keypoints_raw": [kpt_tensor],
+        }
+
+    def _dist2kpts(self, kpts):
+        """Decodes keypoints."""
+        y = kpts.clone()
+
+        anchor_points_transposed = self.anchor_points.transpose(0, 1)
+        stride_tensor = self.stride_tensor.squeeze(-1)
+
+        stride_tensor = stride_tensor.view(1, -1, 1)
+        anchor_points_x = anchor_points_transposed[0].view(1, -1, 1)
+        anchor_points_y = anchor_points_transposed[1].view(1, -1, 1)
+
+        y[:, :, 0::3] = (y[:, :, 0::3] * 2.0 + (anchor_points_x - 0.5)) * stride_tensor
+        y[:, :, 1::3] = (y[:, :, 1::3] * 2.0 + (anchor_points_y - 0.5)) * stride_tensor
+        y[:, :, 2::3] = y[:, :, 2::3].sigmoid()
+
+        return y
+
+    def _process_to_bbox_and_kps(
+        self, output: tuple[list[Tensor], Tensor, Tensor, Tensor]
+    ) -> list[Tensor]:
+        """Performs post-processing of the output and returns bboxs after NMS."""
+        features, cls_score_list, reg_dist_list, keypoints = output
+
+        pred_bboxes = dist2bbox(reg_dist_list, self.anchor_points, out_format="xyxy")
+
+        pred_bboxes *= self.stride_tensor
+        output_merged = torch.cat(
+            [
+                pred_bboxes,
+                torch.ones(
+                    (features[-1].shape[0], pred_bboxes.shape[1], 1),
+                    dtype=pred_bboxes.dtype,
+                    device=pred_bboxes.device,
+                ),
+                cls_score_list,
+                keypoints,
+            ],
+            dim=-1,
+        )
+
+        return non_max_suppression(
+            output_merged,
+            n_classes=self.n_classes,
+            conf_thres=self.conf_thres,
+            iou_thres=self.iou_thres,
+            bbox_format="xyxy",
+            max_det=self.max_det,
+            predicts_objectness=False,
+        )
diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py
index 56f98ff3..a2854b3a 100644
--- a/luxonis_train/nodes/enums/head_categorization.py
+++ b/luxonis_train/nodes/enums/head_categorization.py
@@ -7,6 +7,7 @@ class ImplementedHeads(Enum):
     ClassificationHead = "Classification"
     EfficientBBoxHead = "ObjectDetectionYOLO"
     ImplicitKeypointBBoxHead = "KeypointDetectionYOLO"
+    EfficientKeypointBBoxHead = "Keypoint"
     SegmentationHead = "Segmentation"
     BiSeNetHead = "Segmentation"
 
@@ -17,5 +18,6 @@ class ImplementedHeadsIsSoxtmaxed(Enum):
     ClassificationHead = False
     EfficientBBoxHead = None
     ImplicitKeypointBBoxHead = None
+    EfficientKeypointBBoxHead = None
     SegmentationHead = False
     BiSeNetHead = False
diff --git a/luxonis_train/utils/assigners/atts_assigner.py b/luxonis_train/utils/assigners/atts_assigner.py
index 26b4dc23..f4989b54 100644
--- a/luxonis_train/utils/assigners/atts_assigner.py
+++ b/luxonis_train/utils/assigners/atts_assigner.py
@@ -38,7 +38,7 @@ def forward(
         gt_bboxes: Tensor,
         mask_gt: Tensor,
         pred_bboxes: Tensor,
-    ) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
         """Assigner's forward method which generates final assignments.
 
         @type anchor_bboxes: Tensor
@@ -53,7 +53,7 @@ def forward(
         @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1]
         @type pred_bboxes: Tensor
         @param pred_bboxes: Predicted bboxes of shape [bs, n_anchors, 4]
-        @rtype: tuple[Tensor, Tensor, Tensor, Tensor]
+        @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
         @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
             n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and
             output positive mask of shape [bs, n_anchors].
@@ -70,6 +70,7 @@ def forward(
                 torch.zeros([self.bs, self.n_anchors, 4]).to(device),
                 torch.zeros([self.bs, self.n_anchors, self.n_classes]).to(device),
                 torch.zeros([self.bs, self.n_anchors]).to(device),
+                torch.zeros([self.bs, self.n_anchors]).to(device),
             )
 
         gt_bboxes_flat = gt_bboxes.reshape([-1, 4])
@@ -124,6 +125,7 @@ def forward(
             assigned_bboxes,
             assigned_scores,
             out_mask_positive,
+            assigned_gt_idx,
         )
 
     def _get_bbox_center(self, bbox: Tensor) -> Tensor:
diff --git a/luxonis_train/utils/assigners/tal_assigner.py b/luxonis_train/utils/assigners/tal_assigner.py
index 0765ad6a..08b5b461 100644
--- a/luxonis_train/utils/assigners/tal_assigner.py
+++ b/luxonis_train/utils/assigners/tal_assigner.py
@@ -50,7 +50,7 @@ def forward(
         gt_labels: Tensor,
         gt_bboxes: Tensor,
         mask_gt: Tensor,
-    ) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
         """Assigner's forward method which generates final assignments.
 
         @type pred_scores: Tensor
@@ -65,7 +65,7 @@ def forward(
         @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 4]
         @type mask_gt: Tensor
         @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1]
-        @rtype: tuple[Tensor, Tensor, Tensor, Tensor]
+        @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
         @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
             n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and
             output mask of shape [bs, n_anchors]
@@ -80,6 +80,7 @@ def forward(
                 torch.zeros_like(pred_bboxes).to(device),
                 torch.zeros_like(pred_scores).to(device),
                 torch.zeros_like(pred_scores[..., 0]).to(device),
+                torch.zeros_like(pred_scores[..., 0]).to(device),
             )
 
         # Compute alignment metric between all bboxes (bboxes of all pyramid levels) and GT
@@ -121,7 +122,13 @@ def forward(
 
         out_mask_positive = mask_pos_sum.bool()
 
-        return assigned_labels, assigned_bboxes, assigned_scores, out_mask_positive
+        return (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            out_mask_positive,
+            assigned_gt_idx,
+        )
 
     def _get_alignment_metric(
         self,
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 3a26cc4f..64a8b8dd 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -77,12 +77,20 @@ def match_to_anchor(
     # The boxes and keypoints need to be scaled to the size of the features
     # First two indices are batch index and class label,
     # last index is anchor index. Those are not scaled.
-    scale_length = 2 * n_keypoints + box_offset + 2
+    scale_length = 3 * n_keypoints + box_offset + 2
     scales = torch.ones(scale_length, device=targets.device)
-    scales[2 : scale_length - 1] = torch.tensor(
-        [scale_width, scale_height] * (n_keypoints + 2)
+
+    # Scale box and keypoint coordinates, but not visibility
+    for i in range(n_keypoints):
+        scales[box_offset + 1 + 3 * i] = scale_width
+        scales[box_offset + 2 + 3 * i] = scale_height
+
+    scales[2 : box_offset + 1] = torch.tensor(
+        [scale_width, scale_height, scale_width, scale_height]
     )
+
     scaled_targets = targets * scales
+
     if targets.size(1) == 0:
         return targets[0], torch.zeros(1, device=targets.device)
 
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index b750dd9c..6c15cace 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
-        <text x="80" y="14">77%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">75%</text>
+        <text x="80" y="14">75%</text>
     </g>
 </svg>
diff --git a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py b/tests/unittests/test_utils/test_assigners/test_atts_assigner.py
index 4512d9e5..a3801ebb 100644
--- a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py
+++ b/tests/unittests/test_utils/test_assigners/test_atts_assigner.py
@@ -24,7 +24,7 @@ def test_forward():
     mask_gt = torch.rand(bs, n_max_boxes, 1)
     pred_bboxes = torch.rand(bs, n_anchors, 4)
 
-    labels, bboxes, scores, mask = assigner.forward(
+    labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward(
         anchor_bboxes, n_level_bboxes, gt_labels, gt_bboxes, mask_gt, pred_bboxes
     )
 
@@ -32,6 +32,7 @@ def test_forward():
     assert bboxes.shape == (bs, n_anchors, 4)
     assert scores.shape == (bs, n_anchors, n_classes)
     assert mask.shape == (bs, n_anchors)
+    assert assigned_gt_idx.shape == (bs, n_anchors)
 
 
 def test_get_bbox_center():
diff --git a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py b/tests/unittests/test_utils/test_assigners/test_tal_assigner.py
index bb2dd912..8f291615 100644
--- a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py
+++ b/tests/unittests/test_utils/test_assigners/test_tal_assigner.py
@@ -31,7 +31,7 @@ def test_forward():
     mask_gt = torch.rand(batch_size, num_max_boxes, 1)
 
     # Call the forward method
-    labels, bboxes, scores, mask = assigner.forward(
+    labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward(
         pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt
     )
 
@@ -60,6 +60,10 @@ def test_forward():
     assert torch.equal(
         mask, torch.zeros_like(mask)
     )  # All mask values should be zero as there are no GT boxes
+    assert assigned_gt_idx.shape == (batch_size, num_anchors)
+    assert torch.equal(
+        assigned_gt_idx, torch.zeros_like(assigned_gt_idx)
+    )  # All assigned_gt_idx values should be zero as there are no GT boxes
 
 
 def test_get_alignment_metric():

From ef606b77cbcb8f7faffb046a4ae2faecb188697c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 17 Jul 2024 21:37:06 +0200
Subject: [PATCH 37/75] Fix Views (#47)

---
 luxonis_train/core/core.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 0fe1756f..a8bec082 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -128,11 +128,11 @@ def __init__(
                     if view == "train"
                     else self.val_augmentations
                 ),
-                view=(
-                    self.cfg.loader.train_view
-                    if view == "train"
-                    else self.cfg.loader.val_view
-                ),
+                view={
+                    "train": self.cfg.loader.train_view,
+                    "val": self.cfg.loader.val_view,
+                    "test": self.cfg.loader.test_view,
+                }[view],
                 image_source=self.cfg.loader.image_source,
                 **self.cfg.loader.params,
             )

From 5e4dd76a7be5c6e008f24b25f396d30e90502eb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Fri, 26 Jul 2024 17:53:56 +0200
Subject: [PATCH 38/75] Remove `dataset_id` (#48)

---
 luxonis_train/utils/loaders/luxonis_loader_torch.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index 094bc96a..1c4bb8b5 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -17,7 +17,6 @@ def __init__(
         self,
         dataset_name: str | None = None,
         team_id: str | None = None,
-        dataset_id: str | None = None,
         bucket_type: Literal["internal", "external"] = "internal",
         bucket_storage: Literal["local", "s3", "gcs", "azure"] = "local",
         stream: bool = False,
@@ -27,7 +26,6 @@ def __init__(
         self.dataset = LuxonisDataset(
             dataset_name=dataset_name,
             team_id=team_id,
-            dataset_id=dataset_id,
             bucket_type=BucketType(bucket_type),
             bucket_storage=BucketStorage(bucket_storage),
         )

From c0305d6fec8ceabdb59a36689bd3172b426bf471 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 30 Jul 2024 09:33:29 +0200
Subject: [PATCH 39/75] Fix Archive Tests (#50)

---
 tests/unittests/test_core/test_archiver.py | 23 ----------------------
 1 file changed, 23 deletions(-)

diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
index 52449e6a..d7b4dcef 100644
--- a/tests/unittests/test_core/test_archiver.py
+++ b/tests/unittests/test_core/test_archiver.py
@@ -12,7 +12,6 @@
 import onnx
 import pytest
 from luxonis_ml.data import LuxonisDataset
-from luxonis_ml.nn_archive.config_building_blocks.base_models import head_outputs
 from parameterized import parameterized
 
 import luxonis_train
@@ -209,7 +208,6 @@ def dataset_generator():
             print("Deleting existing dataset")
             LuxonisDataset(ldf_name).delete_dataset()
         dataset = LuxonisDataset(ldf_name)
-        dataset.set_classes(list(labels))
         if kpt_anno:
             keypoint_labels = [
                 "kp1",
@@ -333,27 +331,6 @@ def test_config_io(self, head_name):
 
         assert valid_inputs and valid_outputs
 
-    @parameterized.expand(HEAD_NAMES)
-    def test_head_outputs(self, head_name):
-        """Tests if archived config head outputs are valid."""
-        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
-        with tarfile.open(archive_path, mode="r") as tar:
-            f = tar.extractfile("config.json")
-            json_dict = json.load(f)
-        head_output = json_dict["model"]["heads"][0]["outputs"]
-        if head_name == "ClassificationHead":
-            assert head_outputs.OutputsClassification.parse_obj(head_output)
-        elif head_name == "EfficientBBoxHead":
-            assert head_outputs.OutputsYOLO.parse_obj(head_output)
-        elif head_name == "ImplicitKeypointBBoxHead":
-            assert head_outputs.OutputsKeypointDetectionYOLO.parse_obj(head_output)
-        elif head_name == "SegmentationHead":
-            assert head_outputs.OutputsSegmentation.parse_obj(head_output)
-        elif head_name == "BiSeNetHead":
-            assert head_outputs.OutputsSegmentation.parse_obj(head_output)
-        else:
-            raise NotImplementedError(f"Missing tests for {head_name} head")
-
     @classmethod
     def teardown_class(cls):
         """Removes all files created during setup."""

From 3d5307d049863f7853073501a5c581502bd82628 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 30 Jul 2024 18:15:31 +0200
Subject: [PATCH 40/75] Fixed New `mlflow` Run (#49)

---
 luxonis_train/callbacks/upload_checkpoint.py |  2 +-
 luxonis_train/core/core.py                   | 21 +++++++++++++++-----
 luxonis_train/core/trainer.py                | 13 +++++++-----
 luxonis_train/utils/tracker.py               |  6 +++++-
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
index efd7fe02..b91f7998 100644
--- a/luxonis_train/callbacks/upload_checkpoint.py
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -56,7 +56,7 @@ def on_save_checkpoint(
                         local_path=temp_filename,
                         remote_path=temp_filename,
                         mlflow_instance=trainer.logger.experiment.get(  # type: ignore
-                            "mlflow", None
+                            "mlflow",
                         ),
                     )
                     os.remove(temp_filename)
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index a8bec082..73131df9 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -58,11 +58,12 @@ def __init__(
 
         self.rank = rank_zero_only.rank
 
-        self.tracker = LuxonisTrackerPL(
-            rank=self.rank,
-            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
-            **self.cfg.tracker.model_dump(),
-        )
+        self.tracker = self._create_tracker()
+        # NOTE: tracker.experiment has to be called first in order
+        # for the run_id to be initialized
+        # TODO: it shouldn't be a property because of the above
+        _ = self.tracker.experiment
+        self._run_id = self.tracker.run_id
 
         self.run_save_dir = os.path.join(
             self.cfg.tracker.save_directory, self.tracker.run_name
@@ -222,3 +223,13 @@ def get_best_metric_checkpoint_path(self) -> str:
     def reset_logging(self) -> None:
         """Close file handlers to release the log file."""
         reset_logging()
+
+    def _create_tracker(self, run_id: str | None = None) -> LuxonisTrackerPL:
+        kwargs = self.cfg.tracker.model_dump()
+        if run_id is not None:
+            kwargs["run_id"] = run_id
+        return LuxonisTrackerPL(
+            rank=self.rank,
+            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
+            **kwargs,
+        )
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index 8054522e..90c15059 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -10,6 +10,7 @@
 
 from luxonis_train.models import LuxonisModel
 from luxonis_train.utils.config import Config
+from luxonis_train.utils.tracker import LuxonisTrackerPL
 
 from .core import Core
 
@@ -60,7 +61,8 @@ def graceful_exit(signum: int, _):
             logger.info(f"{signal.Signals(signum).name} received, stopping training...")
             ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
             self.pl_trainer.save_checkpoint(ckpt_path)
-            self._upload_logs()
+            tracker = self._create_tracker(self._run_id)
+            self._upload_logs(tracker)
 
             if self.cfg.tracker.is_mlflow:
                 logger.info("Uploading checkpoint to MLFlow.")
@@ -72,14 +74,15 @@ def graceful_exit(signum: int, _):
                 fs.put_file(
                     local_path=ckpt_path,
                     remote_path="resume.ckpt",
-                    mlflow_instance=self.tracker.experiment.get("mlflow", None),
+                    mlflow_instance=tracker.experiment.get("mlflow"),
                 )
 
             exit(0)
 
         signal.signal(signal.SIGTERM, graceful_exit)
 
-    def _upload_logs(self) -> None:
+    def _upload_logs(self, tracker: LuxonisTrackerPL | None = None) -> None:
+        tracker = tracker or self.tracker
         if self.cfg.tracker.is_mlflow:
             logger.info("Uploading logs to MLFlow.")
             fs = LuxonisFileSystem(
@@ -90,7 +93,7 @@ def _upload_logs(self) -> None:
             fs.put_file(
                 local_path=self.log_file,
                 remote_path="luxonis_train.log",
-                mlflow_instance=self.tracker.experiment.get("mlflow", None),
+                mlflow_instance=tracker.experiment.get("mlflow"),
             )
 
     def _trainer_fit(self, *args, **kwargs):
@@ -99,7 +102,7 @@ def _trainer_fit(self, *args, **kwargs):
         except Exception:
             logger.exception("Encountered exception during training.")
         finally:
-            self._upload_logs()
+            self._upload_logs(self._create_tracker(self._run_id))
 
     def train(self, new_thread: bool = False) -> None:
         """Runs training.
diff --git a/luxonis_train/utils/tracker.py b/luxonis_train/utils/tracker.py
index df157b3b..5f64f08b 100644
--- a/luxonis_train/utils/tracker.py
+++ b/luxonis_train/utils/tracker.py
@@ -1,3 +1,5 @@
+from typing import Literal
+
 from lightning.pytorch.loggers.logger import Logger
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from luxonis_ml.tracker import LuxonisTracker
@@ -7,7 +9,9 @@ class LuxonisTrackerPL(LuxonisTracker, Logger):
     """Implementation of LuxonisTracker that is compatible with PytorchLightning."""
 
     @rank_zero_only
-    def finalize(self, status: str = "success") -> None:
+    def finalize(
+        self, status: Literal["success", "failed", "finished"] = "success"
+    ) -> None:
         """Finalizes current run."""
         if self.is_tensorboard:
             self.experiment["tensorboard"].flush()

From fc4bf11cd58036f2f7451b279fe3c2a19059073a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Fri, 2 Aug 2024 16:25:55 +0200
Subject: [PATCH 41/75] Suppressing `mlflow` Exceptions (#52)

---
 luxonis_train/core/core.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 73131df9..3c1acf7f 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -1,5 +1,6 @@
 import os
 import os.path as osp
+from contextlib import suppress
 from logging import getLogger
 from typing import Any
 
@@ -62,7 +63,8 @@ def __init__(
         # NOTE: tracker.experiment has to be called first in order
         # for the run_id to be initialized
         # TODO: it shouldn't be a property because of the above
-        _ = self.tracker.experiment
+        with suppress(Exception):
+            _ = self.tracker.experiment
         self._run_id = self.tracker.run_id
 
         self.run_save_dir = os.path.join(

From a20de5f788f46dfb11aec010fe95b51266931bdc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Sat, 3 Aug 2024 14:20:48 +0200
Subject: [PATCH 42/75] Hotfix for Freezing `polars` (#54)

---
 luxonis_train/utils/config.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 4406c285..2e8460ca 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -4,6 +4,7 @@
 
 from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
 from pydantic import BaseModel, ConfigDict, Field, model_validator
+from typing_extensions import Self
 
 logger = logging.getLogger(__name__)
 
@@ -62,7 +63,7 @@ class ModelConfig(CustomBaseModel):
     outputs: list[str] = []
 
     @model_validator(mode="after")
-    def check_predefined_model(self):
+    def check_predefined_model(self) -> Self:
         from luxonis_train.utils.registry import MODELS
 
         if self.predefined_model:
@@ -84,7 +85,7 @@ def check_predefined_model(self):
         return self
 
     @model_validator(mode="after")
-    def check_graph(self):
+    def check_graph(self) -> Self:
         from luxonis_train.utils.general import is_acyclic
 
         graph = {node.alias or node.name: node.inputs for node in self.nodes}
@@ -103,7 +104,7 @@ def check_graph(self):
         return self
 
     @model_validator(mode="after")
-    def check_unique_names(self):
+    def check_unique_names(self) -> Self:
         for section, objects in [
             ("nodes", self.nodes),
             ("losses", self.losses),
@@ -164,7 +165,7 @@ class PreprocessingConfig(CustomBaseModel):
     augmentations: list[AugmentationConfig] = []
 
     @model_validator(mode="after")
-    def check_normalize(self):
+    def check_normalize(self) -> Self:
         if self.normalize.active:
             self.augmentations.append(
                 AugmentationConfig(name="Normalize", params=self.normalize.params)
@@ -226,7 +227,7 @@ class TrainerConfig(CustomBaseModel):
     scheduler: SchedulerConfig = SchedulerConfig()
 
     @model_validator(mode="after")
-    def check_num_workes_platform(self):
+    def check_num_workes_platform(self) -> Self:
         if (
             sys.platform == "win32" or sys.platform == "darwin"
         ) and self.num_workers != 0:
@@ -237,7 +238,7 @@ def check_num_workes_platform(self):
         return self
 
     @model_validator(mode="after")
-    def check_validation_interval(self):
+    def check_validation_interval(self) -> Self:
         if self.validation_interval > self.epochs:
             logger.warning(
                 "Setting `validation_interval` same as `epochs` otherwise no checkpoint would be generated."
@@ -271,7 +272,7 @@ class ExportConfig(CustomBaseModel):
     upload_url: str | None = None
 
     @model_validator(mode="after")
-    def check_values(self):
+    def check_values(self) -> Self:
         def pad_values(values: float | list[float] | None):
             if values is None:
                 return None
@@ -318,6 +319,17 @@ class Config(LuxonisConfig):
     tuner: TunerConfig | None = None
     ENVIRON: Environ = Field(Environ(), exclude=True)
 
+    @model_validator(mode="after")
+    def validate_num_workers(self) -> Self:
+        if self.loader.name == "LuxonisLoaderTorch":
+            if self.trainer.num_workers != 0:
+                logger.warning(
+                    "Setting `num_workers` to 0 because of "
+                    "compatibility with LuxonisDataset."
+                )
+                self.trainer.num_workers = 0
+        return self
+
     @model_validator(mode="before")
     @classmethod
     def check_environment(cls, data: Any) -> Any:

From 611b93c92f8bdb899a297a2b3ce486d2bbc4c422 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 7 Aug 2024 19:31:41 +0200
Subject: [PATCH 43/75] Full Custom Tasks Support (#53)

Co-authored-by: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/coco_multitask_model.yaml             | 188 ++++++++++++
 configs/efficient_coco_model.yaml             | 114 ++++++++
 configs/example_multi_input.yaml              |  27 +-
 luxonis_train/__main__.py                     |   2 +-
 .../attached_modules/base_attached_module.py  | 256 +++++++++++------
 .../losses/adaptive_detection_loss.py         |  31 +-
 .../attached_modules/losses/base_loss.py      |   1 -
 .../losses/bce_with_logits.py                 |   3 +
 .../attached_modules/losses/cross_entropy.py  |   3 +
 .../losses/efficient_keypoint_bbox_loss.py    |  45 +--
 .../losses/implicit_keypoint_bbox_loss.py     |  35 +--
 .../attached_modules/losses/keypoint_loss.py  |  26 +-
 .../losses/sigmoid_focal_loss.py              |   3 +
 .../losses/smooth_bce_with_logits.py          |   4 +-
 .../losses/softmax_focal_loss.py              |   3 +
 .../attached_modules/metrics/base_metric.py   |   1 -
 .../attached_modules/metrics/common.py        |  15 +-
 .../metrics/mean_average_precision.py         |  21 +-
 .../mean_average_precision_keypoints.py       |  34 +--
 .../metrics/object_keypoint_similarity.py     |  36 +--
 .../visualizers/base_visualizer.py            |   1 -
 .../visualizers/bbox_visualizer.py            |  26 +-
 .../visualizers/classification_visualizer.py  |   9 +-
 .../visualizers/keypoint_visualizer.py        |  15 +-
 .../visualizers/multi_visualizer.py           |   6 +-
 .../visualizers/segmentation_visualizer.py    |  20 +-
 .../callbacks/luxonis_progress_bar.py         |   1 +
 luxonis_train/core/inferer.py                 |   8 +-
 luxonis_train/models/luxonis_model.py         |  34 ++-
 .../models/predefined_models/README.md        |  46 +--
 .../predefined_models/classification_model.py |   2 +
 .../predefined_models/detection_model.py      |   2 +
 .../keypoint_detection_model.py               |  17 +-
 .../predefined_models/segmentation_model.py   |   2 +
 luxonis_train/nodes/README.md                 |  14 +-
 luxonis_train/nodes/base_node.py              | 269 +++++++++++++++---
 luxonis_train/nodes/bisenet_head.py           |   4 +-
 luxonis_train/nodes/classification_head.py    |   8 +-
 luxonis_train/nodes/efficient_bbox_head.py    |   9 +-
 .../nodes/efficient_keypoint_bbox_head.py     |   9 +-
 .../nodes/implicit_keypoint_bbox_head.py      |   9 +-
 luxonis_train/nodes/rexnetv1.py               |   5 +-
 luxonis_train/nodes/segmentation_head.py      |   8 +-
 .../utils/assigners/atts_assigner.py          |   7 +-
 luxonis_train/utils/config.py                 |   3 +-
 luxonis_train/utils/loaders/__init__.py       |   6 +-
 .../utils/loaders/luxonis_loader_torch.py     |   7 +-
 luxonis_train/utils/types.py                  |  14 +-
 media/coverage_badge.svg                      |   4 +-
 tests/integration/conftest.py                 |  30 +-
 tests/integration/test_multi_input.py         |  76 +++--
 tests/integration/test_sanity.py              |  10 +-
 .../test_loaders/test_base_loader.py          |   4 +-
 53 files changed, 1029 insertions(+), 504 deletions(-)
 create mode 100755 configs/coco_multitask_model.yaml
 create mode 100644 configs/efficient_coco_model.yaml

diff --git a/configs/coco_multitask_model.yaml b/configs/coco_multitask_model.yaml
new file mode 100755
index 00000000..7cf8541a
--- /dev/null
+++ b/configs/coco_multitask_model.yaml
@@ -0,0 +1,188 @@
+# An example configuration for a more complex network.
+
+
+model:
+  name: coco_test
+  nodes:
+    - name: EfficientRep
+      params:
+        channels_list: [64, 128, 256, 512, 1024]
+        num_repeats: [1, 6, 12, 18, 6]
+        depth_mul: 0.33
+        width_mul: 0.33
+
+    - name: RepPANNeck
+      inputs:
+        - EfficientRep
+      params:
+        channels_list: [256, 128, 128, 256, 256, 512]
+        num_repeats: [12, 12, 12, 12]
+        depth_mul: 0.33
+        width_mul: 0.33
+
+    - name: ImplicitKeypointBBoxHead
+      task:
+        keypoints: keypoints-task
+        boundingbox: boundingbox-task
+      inputs:
+        - RepPANNeck
+      params:
+        conf_thres: 0.25
+        iou_thres: 0.45
+
+    - name: SegmentationHead
+      task: segmentation-task
+      inputs:
+        - RepPANNeck
+
+    - name: EfficientBBoxHead
+      task: boundingbox-task
+      inputs:
+        - RepPANNeck
+      params:
+        conf_thres: 0.75
+        iou_thres: 0.45
+
+  losses:
+    - name: AdaptiveDetectionLoss
+      attached_to: EfficientBBoxHead
+    - name: BCEWithLogitsLoss
+      attached_to: SegmentationHead
+    - name: ImplicitKeypointBBoxLoss
+      attached_to: ImplicitKeypointBBoxHead
+      params:
+        keypoint_regression_loss_weight: 0.5
+        keypoint_visibility_loss_weight: 0.7
+        bbox_loss_weight: 0.05
+        objectness_loss_weight: 0.2
+
+  metrics:
+    - name: ObjectKeypointSimilarity
+      is_main_metric: true
+      attached_to: ImplicitKeypointBBoxHead
+    - name: MeanAveragePrecisionKeypoints
+      attached_to: ImplicitKeypointBBoxHead
+    - name: MeanAveragePrecision
+      attached_to: EfficientBBoxHead
+    - name: F1Score
+      attached_to: SegmentationHead
+      params:
+        task: binary
+    - name: JaccardIndex
+      attached_to: SegmentationHead
+      params:
+        task: binary
+
+  visualizers:
+    - name: MultiVisualizer
+      attached_to: ImplicitKeypointBBoxHead
+      params:
+        visualizers:
+          - name: KeypointVisualizer
+            params:
+              nonvisible_color: blue
+          - name: BBoxVisualizer
+            params:
+              colors:
+                person: "#FF5055"
+    - name: SegmentationVisualizer
+      attached_to: SegmentationHead
+      params:
+        colors: "#FF5055"
+    - name: BBoxVisualizer
+      attached_to: EfficientBBoxHead
+
+tracker:
+  project_name: coco_test_multitask
+  save_directory: output
+  is_tensorboard: True
+  is_wandb: False
+  wandb_entity: luxonis
+  is_mlflow: False
+
+loader:
+  train_view: train
+  val_view: val
+  test_view: test
+
+  params:
+    dataset_name: coco_test_multitask
+
+trainer:
+  accelerator: auto
+  devices: auto
+  strategy: auto
+
+  num_sanity_val_steps: 1
+  profiler: null
+  verbose: True
+  batch_size: 4
+  accumulate_grad_batches: 1
+  epochs: &epochs 200
+  num_workers: 0
+  train_metrics_interval: -1
+  validation_interval: 10
+  num_log_images: 8
+  skip_last_batch: True
+  log_sub_losses: True
+  save_top_k: 3
+
+  preprocessing:
+    train_image_size: [&height 256, &width 320]
+    keep_aspect_ratio: False
+    train_rgb: True
+    normalize:
+      active: True
+    augmentations:
+      - name: Defocus
+        params:
+          p: 0.1
+      - name: Sharpen
+        params:
+          p: 0.1
+      - name: Flip
+      - name: RandomRotate90
+      - name: Mosaic4
+        params:
+          out_width: *width
+          out_height: *height
+
+  callbacks:
+    - name: LearningRateMonitor
+      params:
+        logging_interval: step
+    - name: MetadataLogger
+      params:
+        hyperparams: ["trainer.epochs", trainer.batch_size]
+    - name: EarlyStopping
+      params:
+        patience: 3
+        monitor: val/loss
+        mode: min
+        verbose: true
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.02
+      momentum: 0.937
+      nesterov: True
+      weight_decay: 0.0005
+
+  scheduler:
+    name: CosineAnnealingLR
+    params:
+      T_max: *epochs
+      eta_min: 0
+
+exporter:
+  onnx:
+    opset_version: 11
+
+tuner:
+  params:
+    trainer.optimizer.name_categorical: ["Adam", "SGD"]
+    trainer.optimizer.params.lr_float: [0.0001, 0.001]
+    trainer.batch_size_int: [4, 16, 4]
diff --git a/configs/efficient_coco_model.yaml b/configs/efficient_coco_model.yaml
new file mode 100644
index 00000000..d9f70647
--- /dev/null
+++ b/configs/efficient_coco_model.yaml
@@ -0,0 +1,114 @@
+
+model:
+  name: coco_test
+  nodes:
+    - name: EfficientRep
+      params:
+        channels_list: [64, 128, 256, 512, 1024]
+        num_repeats: [1, 6, 12, 18, 6]
+        depth_mul: 0.33
+        width_mul: 0.33
+
+    - name: RepPANNeck
+      inputs:
+        - EfficientRep
+      params:
+        channels_list: [256, 128, 128, 256, 256, 512]
+        num_repeats: [12, 12, 12, 12]
+        depth_mul: 0.33
+        width_mul: 0.33
+
+    - name: EfficientKeypointBBoxHead
+      inputs:
+        - RepPANNeck
+      params:
+        conf_thres: 0.25
+        iou_thres: 0.45
+
+    - name: SegmentationHead
+      inputs:
+        - RepPANNeck
+
+    - name: EfficientBBoxHead
+      inputs:
+        - RepPANNeck
+      params:
+        conf_thres: 0.75
+        iou_thres: 0.45
+
+  losses:
+    - name: AdaptiveDetectionLoss
+      attached_to: EfficientBBoxHead
+    - name: BCEWithLogitsLoss
+      attached_to: SegmentationHead
+    - name: EfficientKeypointBBoxLoss
+      attached_to: EfficientKeypointBBoxHead
+
+  metrics:
+    - name: ObjectKeypointSimilarity
+      is_main_metric: true
+      attached_to: EfficientKeypointBBoxHead
+    - name: MeanAveragePrecisionKeypoints
+      attached_to: EfficientKeypointBBoxHead
+    - name: MeanAveragePrecision
+      attached_to: EfficientBBoxHead
+    - name: F1Score
+      attached_to: SegmentationHead
+      params:
+        task: binary
+    - name: JaccardIndex
+      attached_to: SegmentationHead
+      params:
+        task: binary
+
+  visualizers:
+    - name: MultiVisualizer
+      attached_to: EfficientKeypointBBoxHead
+      params:
+        visualizers:
+          - name: KeypointVisualizer
+            params:
+              nonvisible_color: blue
+          - name: BBoxVisualizer
+            params:
+              colors:
+                person: "#FF5055"
+    - name: SegmentationVisualizer
+      attached_to: SegmentationHead
+      params:
+        colors: "#FF5055"
+    - name: BBoxVisualizer
+      attached_to: EfficientBBoxHead
+
+tracker:
+  project_name: coco_test
+  save_directory: output
+  is_tensorboard: True
+
+loader:
+  params:
+    dataset_name: coco_test
+
+trainer:
+
+  num_sanity_val_steps: 1
+  batch_size: 4
+  accumulate_grad_batches: 1
+  epochs: 200
+  num_workers: 0
+  train_metrics_interval: -1
+  validation_interval: 10
+  num_log_images: 8
+  save_top_k: 3
+
+  preprocessing:
+    train_image_size: [&height 256, &width 320]
+    keep_aspect_ratio: False
+    train_rgb: True
+    normalize:
+      active: True
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
diff --git a/configs/example_multi_input.yaml b/configs/example_multi_input.yaml
index 7d4d252b..31eaa44e 100644
--- a/configs/example_multi_input.yaml
+++ b/configs/example_multi_input.yaml
@@ -61,7 +61,7 @@ model:
       alias: loss_1
       attached_to: head_1
 
-    - name: BCEWithLogitsLoss
+    - name: CrossEntropyLoss
       alias: loss_2
       attached_to: head_2
 
@@ -79,12 +79,29 @@ model:
       params:
         task: binary
 
+  visualizers:
+    - name: SegmentationVisualizer
+      alias: seg_vis_1
+      attached_to: head_1
+      params:
+        colors: "#FF5055"
+
+    - name: SegmentationVisualizer
+      alias: seg_vis_2
+      attached_to: head_2
+      params:
+        colors: "#55AAFF"
+
+tracker:
+  project_name: multi_input_example
+  is_tensorboard: True
+
 trainer:
-  batch_size: 8
-  epochs: 3
+  batch_size: 1
+  epochs: 10
   num_workers: 4
-  validation_interval: 3
-  num_log_images: -1
+  validation_interval: 10
+  num_log_images: 4
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 45e02adf..eefdaa7e 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -180,7 +180,7 @@ def common(
     ] = None,
 ):
     if source:
-        exec(source.read_text())
+        exec(source.read_text(), globals(), globals())
 
 
 if __name__ == "__main__":
diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py
index 6ac47820..e86cf24f 100644
--- a/luxonis_train/attached_modules/base_attached_module.py
+++ b/luxonis_train/attached_modules/base_attached_module.py
@@ -3,19 +3,11 @@
 from typing import Generic
 
 from luxonis_ml.utils.registry import AutoRegisterMeta
-from pydantic import ValidationError
 from torch import Tensor, nn
 from typing_extensions import TypeVarTuple, Unpack
 
 from luxonis_train.nodes import BaseNode
-from luxonis_train.utils.general import validate_packet
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    IncompatibleException,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
 
 logger = logging.getLogger(__name__)
 
@@ -33,36 +25,59 @@ class BaseAttachedModule(
     should be sufficient for most simple cases. More complex modules should
     override the `prepare` method.
 
+    When subclassing, the following methods can be overridden:
+        - L{prepare}: Prepares node outputs for the forward pass of the module.
+          Override this method if the default implementation is not sufficient.
+
+    Additionally, the following attributes can be overridden:
+        - L{supported_labels}: List of label types that the module supports.
+          Used to determine which labels to extract from the dataset and to validate
+          compatibility with the node based on the node's tasks.
+
     @type node: BaseNode
-    @ivar node: Reference to the node that this module is attached to.
-    @type protocol: type[BaseProtocol]
-    @ivar protocol: Schema for validating inputs to the module.
-    @type required_labels: list[LabelType]
-    @ivar required_labels: List of labels required by this model.
+    @param node: Reference to the node that this module is attached to.
+
+    @type supported_labels: list[LabelType | tuple[LabelType, ...]] | None
+    @ivar supported_labels: List of label types that the module supports.
+        Elements of the list can be either a single label type or a tuple of
+        label types. In case of the latter, the module requires all of the
+        specified labels in the tuple to be present.
+
+        Example:
+            - C{[LabelType.CLASSIFICATION, LabelType.SEGMENTATION]} means that the
+              module requires either classification or segmentation labels.
+            - C{[(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS), LabelType.SEGMENTATION]}
+              means that the module requires either both bounding box I{and} keypoint
+              labels I{or} segmentation labels.
     """
 
-    def __init__(
-        self,
-        *,
-        node: BaseNode | None = None,
-        protocol: type[BaseProtocol] | None = None,
-        required_labels: list[LabelType] | None = None,
-    ):
-        """Base class for all modules that are attached to a L{LuxonisNode}.
-
-        @type node: L{BaseNode}
-        @param node: Reference to the node that this module is attached to.
-        @type protocol: type[BaseProtocol]
-        @param protocol: Schema for validating inputs to the module.
-        @type required_labels: list[LabelType]
-        @param required_labels: List of labels required by this model.
-        """
+    supported_labels: list[LabelType | tuple[LabelType, ...]] | None = None
+
+    def __init__(self, *, node: BaseNode | None = None):
         super().__init__()
-        self.required_labels = required_labels or []
-        self.protocol = protocol
         self._node = node
         self._epoch = 0
 
+        self._required_labels: tuple[LabelType, ...] | None = None
+        if self._node and self.supported_labels and self.node.tasks:
+            node_tasks = set(self.node.tasks)
+            for required_labels in self.supported_labels:
+                if isinstance(required_labels, LabelType):
+                    required_labels = (required_labels,)
+                if set(required_labels) <= node_tasks:
+                    self._required_labels = required_labels
+                    break
+            else:
+                raise ValueError(
+                    f"Module {self.name} supports labels {self.supported_labels}, "
+                    f"but is connected to node {self.node.name} which does not support any of them. "
+                    f"{self.node.name} supports {list(self.node_tasks.keys())}."
+                )
+
+    @property
+    def name(self) -> str:
+        return self.__class__.__name__
+
     @property
     def node(self) -> BaseNode:
         """Reference to the node that this module is attached to.
@@ -77,49 +92,122 @@ def node(self) -> BaseNode:
             )
         return self._node
 
-    def get_label(self, labels: Labels) -> tuple[Tensor, LabelType]:
-        if len(self.required_labels) != 1:
-            if self.task in labels:
-                return labels[self.task]
+    @property
+    def required_labels(self) -> tuple[LabelType, ...]:
+        if self._required_labels is None:
+            raise ValueError(f"{self.name} does not require any labels.")
+        return self._required_labels
+
+    @property
+    def node_tasks(self) -> dict[LabelType, str]:
+        if self.node._tasks is None:
+            raise ValueError("Node must have the `tasks` attribute specified.")
+        return self.node._tasks
+
+    def get_label(
+        self, labels: Labels, label_type: LabelType | None = None
+    ) -> tuple[Tensor, LabelType]:
+        """Extracts a specific label from the labels dictionary.
+
+        If the label type is not provided, the first label that matches the
+        required label type is returned.
+
+        Example::
+            >>> # supported_labels = [LabelType.SEGMENTATION]
+            >>> labels = {"segmentation": ..., "boundingbox": ...}
+            >>> get_label(labels)
+            (..., LabelType.SEGMENTATION)  # returns the first matching label
+            >>> get_label(labels, LabelType.BOUNDINGBOX)
+            (..., LabelType.BOUNDINGBOX)  # returns the bounding box label
+            >>> get_label(labels, LabelType.CLASSIFICATION)
+            IncompatibleException: Label 'classification' is missing from the dataset.
+
+        @type labels: L{Labels}
+        @param labels: Labels from the dataset.
+        @type label_type: LabelType | None
+        @param label_type: Type of the label to extract.
+        @raises IncompatibleException: If the label is not found in the labels dictionary.
+        @raises NotImplementedError: If the module requires multiple labels. For such cases,
+            the `prepare` method should be overridden.
+
+        @rtype: tuple[Tensor, LabelType]
+        @return: Extracted label and its type.
+        """
+        if label_type is not None:
+            task_name = self.node.get_task_name(label_type)
+            if task_name not in labels:
+                raise IncompatibleException.from_missing_task(
+                    label_type.value, list(labels.keys()), self.name
+                )
+            return labels[task_name]
+
+        if len(self.required_labels) > 1:
             raise NotImplementedError(
-                f"{self.__class__.__name__} requires multiple labels, "
-                "the default `prepare` implementation does not support this."
+                f"{self.name} requires multiple labels. You must provide the "
+                "`label_type` argument to extract the desired label."
             )
         for label, label_type in labels.values():
             if label_type == self.required_labels[0]:
                 return label, label_type
         raise IncompatibleException.from_missing_task(
-            self.required_labels[0].value, list(labels.keys()), self.__class__.__name__
+            self.required_labels[0].value, list(labels.keys()), self.name
         )
 
-    def get_input_tensors(self, inputs: Packet[Tensor]) -> list[Tensor]:
-        if self.protocol is not None:
-            return inputs[self.protocol.get_task()]
-        if self.node._task_type is not None:
-            return inputs[self.node._task_type.value]
-        return inputs[self.node.task]
+    def get_input_tensors(
+        self, inputs: Packet[Tensor], task_type: LabelType | str | None = None
+    ) -> list[Tensor]:
+        """Extracts the input tensors from the packet.
 
-    @property
-    def task(self) -> str:
-        """Task of the node that this module is attached to.
+        Example::
+            >>> # supported_labels = [LabelType.SEGMENTATION]
+            >>> # node.tasks = {LabelType.SEGMENTATION: "segmentation-task"}
+            >>> inputs = [{"segmentation-task": [seg_tensor]}, {"features": [feat_tensor]}]
+            >>> get_input_tensors(inputs)  # matches supported labels to node's tasks
+            [seg_tensor]
+            >>> get_input_tensors(inputs, "features")
+            [feat_tensor]
+            >>> get_input_tensors(inputs, LabelType.CLASSIFICATION)
+            ValueError: Task 'classification' is not supported by the node.
+
+        @type inputs: L{Packet}[Tensor]
+        @param inputs: Output from the node this module is attached to.
+        @type task_type: LabelType | str | None
+        @param task_type: Type of the task to extract. Must be provided when the node
+            supports multiple tasks or if the module doesn't require any tasks.
+        @rtype: list[Tensor]
+        @return: Extracted input tensors
 
-        @rtype: str
+        @raises ValueError: If the task type is not supported by the node or if the task
+            is not present in the inputs.
+
+        @raises NotImplementedError: If the module requires multiple labels.
+            For such cases, the `prepare` method should be overridden.
         """
-        task = self.node._task
-        if task is None:
-            if self.required_labels and len(self.required_labels) == 1:
-                return self.required_labels[0].value
-            raise RuntimeError(
-                "Attempt to access `task` reference, but the node does not have a task. ",
-                f"You have to specify the task in the configuration for node {self.node.__class__.__name__}.",
+        if task_type is not None:
+            if isinstance(task_type, LabelType):
+                if task_type not in self.node_tasks:
+                    raise ValueError(
+                        f"Task {task_type.value} is not supported by the node "
+                        f"{self.node.name}."
+                    )
+                return inputs[self.node_tasks[task_type]]
+            else:
+                if task_type not in inputs:
+                    raise ValueError(f"Task {task_type} is not present in the inputs.")
+                return inputs[task_type]
+
+        if len(self.required_labels) > 1:
+            raise NotImplementedError(
+                f"{self.name} requires multiple labels, "
+                "you must provide the `task_type` argument to extract the desired input."
             )
-        return task
+        return inputs[self.node_tasks[self.required_labels[0]]]
 
     def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
         """Prepares node outputs for the forward pass of the module.
 
         This default implementation selects the output and label based on
-        C{required_labels} attribute. If not set, then it returns the first
+        C{supported_labels} attribute. If not set, then it returns the first
         matching output and label.
         That is the first pair of outputs and labels that have the same type.
         For more complex modules this method should be overridden.
@@ -138,10 +226,26 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
         @raises NotImplementedError: If the module requires multiple labels.
         @raises IncompatibleException: If the inputs are not compatible with the module.
         """
-        if len(self.required_labels) > 1:
-            raise NotImplementedError(
-                "This module requires multiple labels, the default `prepare` "
-                "implementation does not support this."
+        if self.node._tasks is None:
+            raise ValueError(
+                f"{self.node.name} must have the `tasks` attribute specified "
+                f"for {self.name} to make use of the default `prepare` method."
+            )
+        if self.supported_labels is None:
+            raise ValueError(
+                f"{self.name} must have the `supported_labels` attribute "
+                "specified in order to use the default `prepare` method."
+            )
+        if len(self.supported_labels) > 1:
+            if len(self.node._tasks) > 1:
+                raise NotImplementedError(
+                    f"{self.name} supports more than one label type"
+                    f"and is connected to {self.node.name} node "
+                    "which is a multi-task node. The default `prepare` "
+                    "implementation cannot be used in this case."
+                )
+            self.supported_labels = list(
+                set(self.supported_labels) & set(self.node._tasks)
             )
         x = self.get_input_tensors(inputs)
         label, label_type = self.get_label(labels)
@@ -151,32 +255,12 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
                     x = x[0]
                 else:
                     logger.warning(
-                        f"Module {self.__class__.__name__} expects a single tensor as input, "
+                        f"Module {self.name} expects a single tensor as input, "
                         f"but got {len(x)} tensors. Using the last tensor. "
-                        f"If this is not the desired behavior, please override the `prepare` method of the attached module or the `wrap` method of {self.node.__class__.__name__}."
+                        f"If this is not the desired behavior, please override the "
+                        "`prepare` method of the attached module or the `wrap` "
+                        f"method of {self.node.name}."
                     )
                     x = x[-1]
 
         return x, label  # type: ignore
-
-    def validate(self, inputs: Packet[Tensor], labels: Labels) -> None:
-        """Validates that the inputs and labels are compatible with the module.
-
-        @type inputs: L{Packet}[Tensor]
-        @param inputs: Output from the node, inputs to the attached module.
-        @type labels: L{Labels}
-        @param labels: Labels from the dataset. @raises L{IncompatibleException}: If the
-            inputs are not compatible with the module.
-        """
-        if self.node.task is not None and self.node.task not in labels:
-            raise IncompatibleException.from_missing_task(
-                self.node.task, list(labels.keys()), self.__class__.__name__
-            )
-
-        if self.protocol is not None:
-            try:
-                validate_packet(inputs, self.protocol)
-            except ValidationError as e:
-                raise IncompatibleException.from_validation_error(
-                    e, self.__class__.__name__
-                ) from e
diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index 83660463..c495e400 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -2,10 +2,8 @@
 
 import torch
 import torch.nn.functional as F
-from pydantic import Field
 from torch import Tensor, nn
 from torchvision.ops import box_convert
-from typing_extensions import Annotated
 
 from luxonis_train.nodes import EfficientBBoxHead
 from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner
@@ -15,25 +13,14 @@
     compute_iou_loss,
     dist2bbox,
 )
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    IncompatibleException,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
 
 from .base_loss import BaseLoss
 
 
-class Protocol(BaseProtocol):
-    features: list[Tensor]
-    class_scores: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
-    distributions: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
-
-
 class AdaptiveDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]):
     node: EfficientBBoxHead
+    supported_labels = [LabelType.BOUNDINGBOX]
 
     class NodePacket(Packet[Tensor]):
         features: list[Tensor]
@@ -67,13 +54,11 @@ def __init__(
         @type kwargs: dict
         @param kwargs: Additional arguments to pass to L{BaseLoss}.
         """
-        super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX], protocol=Protocol, **kwargs
-        )
+        super().__init__(**kwargs)
 
         if not isinstance(self.node, EfficientBBoxHead):
             raise IncompatibleException(
-                f"Loss `{self.__class__.__name__}` is only "
+                f"Loss `{self.name}` is only "
                 "compatible with nodes of type `EfficientBBoxHead`."
             )
         self.iou_type: IoUType = iou_type
@@ -97,13 +82,13 @@ def __init__(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
-        feats = outputs["features"]
-        pred_scores = outputs["class_scores"][0]
-        pred_distri = outputs["distributions"][0]
+        feats = self.get_input_tensors(outputs, "features")
+        pred_scores = self.get_input_tensors(outputs, "class_scores")[0]
+        pred_distri = self.get_input_tensors(outputs, "distributions")[0]
         batch_size = pred_scores.shape[0]
         device = pred_scores.device
 
-        target = labels[self.task][0].to(device)
+        target = self.get_label(labels)[0]
         gt_bboxes_scale = torch.tensor(
             [
                 self.original_img_size[1],
diff --git a/luxonis_train/attached_modules/losses/base_loss.py b/luxonis_train/attached_modules/losses/base_loss.py
index 61297f10..89ce8d8c 100644
--- a/luxonis_train/attached_modules/losses/base_loss.py
+++ b/luxonis_train/attached_modules/losses/base_loss.py
@@ -49,5 +49,4 @@ def run(
             Only the main loss is used for backpropagation.
         @raises IncompatibleException: If the inputs are not compatible with the module.
         """
-        self.validate(inputs, labels)
         return self(*self.prepare(inputs, labels))
diff --git a/luxonis_train/attached_modules/losses/bce_with_logits.py b/luxonis_train/attached_modules/losses/bce_with_logits.py
index 5800cbdb..442a89c3 100644
--- a/luxonis_train/attached_modules/losses/bce_with_logits.py
+++ b/luxonis_train/attached_modules/losses/bce_with_logits.py
@@ -1,12 +1,15 @@
 from typing import Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
 from .base_loss import BaseLoss
 
 
 class BCEWithLogitsLoss(BaseLoss[Tensor, Tensor]):
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         weight: list[float] | None = None,
diff --git a/luxonis_train/attached_modules/losses/cross_entropy.py b/luxonis_train/attached_modules/losses/cross_entropy.py
index f073401e..05a0f524 100644
--- a/luxonis_train/attached_modules/losses/cross_entropy.py
+++ b/luxonis_train/attached_modules/losses/cross_entropy.py
@@ -3,6 +3,7 @@
 
 import torch
 import torch.nn as nn
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from .base_loss import BaseLoss
@@ -15,6 +16,8 @@ class CrossEntropyLoss(BaseLoss[Tensor, Tensor]):
     """This criterion computes the cross entropy loss between input logits and
     target."""
 
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         weight: list[float] | None = None,
diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
index 4fc2a7c0..2e6621de 100644
--- a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
@@ -2,10 +2,8 @@
 
 import torch
 import torch.nn.functional as F
-from pydantic import Field
 from torch import Tensor, nn
 from torchvision.ops import box_convert
-from typing_extensions import Annotated
 
 from luxonis_train.attached_modules.metrics.object_keypoint_similarity import (
     get_area_factor,
@@ -19,28 +17,17 @@
     compute_iou_loss,
     dist2bbox,
 )
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    IncompatibleException,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
 
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
 
 
-class Protocol(BaseProtocol):
-    features: list[Tensor]
-    class_scores: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
-    distributions: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
-
-
 class EfficientKeypointBBoxLoss(
     BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]
 ):
     node: EfficientKeypointBBoxHead
+    supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)]
 
     class NodePacket(Packet[Tensor]):
         features: list[Tensor]
@@ -87,13 +74,11 @@ def __init__(
         @type kwargs: dict
         @param kwargs: Additional arguments to pass to L{BaseLoss}.
         """
-        super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX], protocol=Protocol, **kwargs
-        )
+        super().__init__(**kwargs)
 
         if not isinstance(self.node, EfficientKeypointBBoxHead):
             raise IncompatibleException(
-                f"Loss `{self.__class__.__name__}` is only "
+                f"Loss `{self.name}` is only "
                 "compatible with nodes of type `EfficientKeypointBBoxHead`."
             )
         self.iou_type: IoUType = iou_type
@@ -106,15 +91,11 @@ def __init__(
         self.n_heads = self.node.n_heads
         self.n_kps = self.node.n_keypoints
 
-        self.b_cross_entropy = BCEWithLogitsLoss(
-            pos_weight=torch.tensor([viz_pw]), **kwargs
-        )
+        self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([viz_pw]))
         self.sigmas = get_sigmas(
-            sigmas=sigmas, n_keypoints=self.n_kps, class_name=self.__class__.__name__
-        )
-        self.area_factor = get_area_factor(
-            area_factor, class_name=self.__class__.__name__
+            sigmas=sigmas, n_keypoints=self.n_kps, class_name=self.name
         )
+        self.area_factor = get_area_factor(area_factor, class_name=self.name)
 
         self.n_warmup_epochs = n_warmup_epochs
         self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes)
@@ -131,16 +112,16 @@ def __init__(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
-        feats = outputs["features"]
-        pred_scores = outputs["class_scores"][0]
-        pred_distri = outputs["distributions"][0]
-        pred_kpts = outputs["keypoints_raw"][0]
+        feats = self.get_input_tensors(outputs, "features")
+        pred_scores = self.get_input_tensors(outputs, "class_scores")[0]
+        pred_distri = self.get_input_tensors(outputs, "distributions")[0]
+        pred_kpts = self.get_input_tensors(outputs, "keypoints_raw")[0]
 
         batch_size = pred_scores.shape[0]
         device = pred_scores.device
 
-        target_bbox = labels["boundingbox"][0].to(device)
-        target_kpts = labels["keypoints"][0].to(device)
+        target_kpts = self.get_label(labels, LabelType.KEYPOINTS)[0]
+        target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX)[0]
         n_kpts = (target_kpts.shape[1] - 2) // 3
 
         gt_bboxes_scale = torch.tensor(
diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
index ff530b2a..a10d1f31 100644
--- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
@@ -1,10 +1,8 @@
 from typing import cast
 
 import torch
-from pydantic import Field
 from torch import Tensor
 from torchvision.ops import box_convert
-from typing_extensions import Annotated
 
 from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss
 from luxonis_train.nodes import ImplicitKeypointBBoxHead
@@ -13,13 +11,7 @@
     match_to_anchor,
     process_bbox_predictions,
 )
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    IncompatibleException,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
 
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
@@ -36,6 +28,7 @@
 
 class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]):
     node: ImplicitKeypointBBoxHead
+    supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)]
 
     def __init__(
         self,
@@ -94,14 +87,11 @@ def __init__(
         @param balance: Balance for the different heads. Defaults to C{None}.
         """
 
-        super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINTS],
-            **kwargs,
-        )
+        super().__init__(**kwargs)
 
         if not isinstance(self.node, ImplicitKeypointBBoxHead):
             raise IncompatibleException(
-                f"Loss `{self.__class__.__name__}` is only "
+                f"Loss `{self.name}` is only "
                 "compatible with nodes of type `ImplicitKeypointBBoxHead`."
             )
         self.n_classes = self.node.n_classes
@@ -116,11 +106,6 @@ def __init__(
                 f"Balance list must have at least {self.num_heads} elements."
             )
 
-        class Protocol(BaseProtocol):
-            features: Annotated[list[Tensor], Field(min_length=self.num_heads)]
-
-        self.protocol = Protocol  # type: ignore
-
         self.min_objectness_iou = min_objectness_iou
         self.bbox_weight = bbox_loss_weight
         self.class_weight = class_loss_weight
@@ -131,20 +116,16 @@ class Protocol(BaseProtocol):
 
         self.bias = bias
 
-        self.b_cross_entropy = BCEWithLogitsLoss(
-            pos_weight=torch.tensor([obj_pw]), **kwargs
-        )
+        self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([obj_pw]))
         self.class_loss = SmoothBCEWithLogitsLoss(
             label_smoothing=label_smoothing,
             bce_pow=cls_pw,
-            **kwargs,
         )
         self.keypoint_loss = KeypointLoss(
             n_keypoints=self.n_keypoints,
             bce_power=viz_pw,
             sigmas=sigmas,
             area_factor=area_factor,
-            **kwargs,
         )
 
         self.positive_smooth_const = 1 - 0.5 * label_smoothing
@@ -170,10 +151,10 @@ def prepare(
             feature_width, n_classes + box_offset + n_keypoints * 3) to get a tensor of
             shape (n_targets, n_classes + box_offset + n_keypoints * 3).
         """
-        predictions = outputs["features"]
+        predictions = self.get_input_tensors(outputs, "features")
 
-        kpts = labels["keypoints"][0]
-        boxes = labels["boundingbox"][0]
+        kpts = self.get_label(labels, LabelType.KEYPOINTS)[0]
+        boxes = self.get_label(labels, LabelType.BOUNDINGBOX)[0]
 
         nkpts = (kpts.shape[1] - 2) // 3
         targets = torch.zeros((len(boxes), nkpts * 3 + self.box_offset + 1))
diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py
index 8a5640cb..d5ca278f 100644
--- a/luxonis_train/attached_modules/losses/keypoint_loss.py
+++ b/luxonis_train/attached_modules/losses/keypoint_loss.py
@@ -1,7 +1,4 @@
-from typing import Annotated
-
 import torch
-from pydantic import Field
 from torch import Tensor
 
 from luxonis_train.attached_modules.metrics.object_keypoint_similarity import (
@@ -9,22 +6,15 @@
     get_sigmas,
 )
 from luxonis_train.utils.boxutils import process_keypoints_predictions
-from luxonis_train.utils.types import (
-    BaseProtocol,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.types import Labels, LabelType, Packet
 
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
 
 
-class Protocol(BaseProtocol):
-    keypoints: Annotated[list[Tensor], Field(min_length=1, max_length=1)]
-
-
 class KeypointLoss(BaseLoss[Tensor, Tensor]):
+    supported_labels = [LabelType.KEYPOINTS]
+
     def __init__(
         self,
         n_keypoints: int,
@@ -47,18 +37,14 @@ def __init__(
             default one. Defaults to C{None}.
         """
 
-        super().__init__(
-            protocol=Protocol, required_labels=[LabelType.KEYPOINTS], **kwargs
-        )
+        super().__init__(**kwargs)
         self.b_cross_entropy = BCEWithLogitsLoss(
             pos_weight=torch.tensor([bce_power]), **kwargs
         )
         self.sigmas = get_sigmas(
-            sigmas=sigmas, n_keypoints=n_keypoints, class_name=self.__class__.__name__
-        )
-        self.area_factor = get_area_factor(
-            area_factor, class_name=self.__class__.__name__
+            sigmas=sigmas, n_keypoints=n_keypoints, class_name=self.name
         )
+        self.area_factor = get_area_factor(area_factor, class_name=self.name)
 
     def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Tensor, Tensor]:
         return torch.cat(inputs["keypoints"], dim=0), self.get_label(labels)[0]
diff --git a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
index 31e16051..f3affc74 100644
--- a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
+++ b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
@@ -1,5 +1,6 @@
 from typing import Literal
 
+from luxonis_ml.data import LabelType
 from torch import Tensor
 from torchvision.ops import sigmoid_focal_loss
 
@@ -7,6 +8,8 @@
 
 
 class SigmoidFocalLoss(BaseLoss[Tensor, Tensor]):
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         alpha: float = 0.25,
diff --git a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
index 48f827d6..2f92b230 100644
--- a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
+++ b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
@@ -1,6 +1,7 @@
 from typing import Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from .base_loss import BaseLoss
@@ -8,6 +9,8 @@
 
 
 class SmoothBCEWithLogitsLoss(BaseLoss[list[Tensor], Tensor]):
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         label_smoothing: float = 0.0,
@@ -40,7 +43,6 @@ def __init__(
         self.negative_smooth_const = 1.0 - 0.5 * label_smoothing
         self.positive_smooth_const = 0.5 * label_smoothing
         self.criterion = BCEWithLogitsLoss(
-            node=self.node,
             pos_weight=torch.tensor(
                 [bce_pow],
             ),
diff --git a/luxonis_train/attached_modules/losses/softmax_focal_loss.py b/luxonis_train/attached_modules/losses/softmax_focal_loss.py
index 57b288f3..14f32e54 100644
--- a/luxonis_train/attached_modules/losses/softmax_focal_loss.py
+++ b/luxonis_train/attached_modules/losses/softmax_focal_loss.py
@@ -3,6 +3,7 @@
 from typing import Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from luxonis_train.attached_modules.losses import BaseLoss
@@ -11,6 +12,8 @@
 
 
 class SoftmaxFocalLoss(BaseLoss[Tensor, Tensor]):
+    supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         alpha: float | list[float] = 0.25,
diff --git a/luxonis_train/attached_modules/metrics/base_metric.py b/luxonis_train/attached_modules/metrics/base_metric.py
index f2334163..b2e456c9 100644
--- a/luxonis_train/attached_modules/metrics/base_metric.py
+++ b/luxonis_train/attached_modules/metrics/base_metric.py
@@ -56,5 +56,4 @@ def run_update(self, outputs: Packet[Tensor], labels: Labels) -> None:
         @param labels: The labels of the model. @raises L{IncompatibleException}: If the
             inputs are not compatible with the module.
         """
-        self.validate(outputs, labels)
         self.update(*self.prepare(outputs, labels))
diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py
index 8d181840..340ad444 100644
--- a/luxonis_train/attached_modules/metrics/common.py
+++ b/luxonis_train/attached_modules/metrics/common.py
@@ -1,6 +1,7 @@
 import logging
 
 import torchmetrics
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from .base_metric import BaseMetric
@@ -10,11 +11,7 @@
 
 class TorchMetricWrapper(BaseMetric):
     def __init__(self, **kwargs):
-        super().__init__(
-            node=kwargs.pop("node", None),
-            protocol=kwargs.pop("protocol", None),
-            required_labels=kwargs.pop("required_labels", None),
-        )
+        super().__init__(node=kwargs.pop("node", None))
         task = kwargs.get("task")
 
         if task is None:
@@ -23,8 +20,7 @@ def __init__(self, **kwargs):
             else:
                 task = "binary"
             logger.warning(
-                f"Task type not specified for {self.__class__.__name__}, "
-                f"assuming {task}."
+                f"Task type not specified for {self.name}, assuming '{task}'."
             )
             kwargs["task"] = task
         self._task = task
@@ -61,20 +57,25 @@ def reset(self) -> None:
 
 
 class Accuracy(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
     Metric = torchmetrics.Accuracy
 
 
 class F1Score(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
     Metric = torchmetrics.F1Score
 
 
 class JaccardIndex(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
     Metric = torchmetrics.JaccardIndex
 
 
 class Precision(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
     Metric = torchmetrics.Precision
 
 
 class Recall(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
     Metric = torchmetrics.Recall
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index c3eaad7e..ffdf5e22 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -2,12 +2,7 @@
 from torch import Tensor
 from torchvision.ops import box_convert
 
-from luxonis_train.utils.types import (
-    BBoxProtocol,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.types import Labels, LabelType, Packet
 
 from .base_metric import BaseMetric
 
@@ -20,12 +15,10 @@ class MeanAveragePrecision(BaseMetric):
     <https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html>}.
     """
 
+    supported_labels = [LabelType.BOUNDINGBOX]
+
     def __init__(self, **kwargs):
-        super().__init__(
-            protocol=BBoxProtocol,
-            required_labels=[LabelType.BOUNDINGBOX],
-            **kwargs,
-        )
+        super().__init__(**kwargs)
         self.metric = detection.MeanAveragePrecision()
 
     def update(
@@ -38,9 +31,7 @@ def update(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        label = labels["boundingbox"][
-            0
-        ]  # TODO: Think of a better way to deal with multi-task heads
+        box_label = self.get_label(labels)[0]
         output_nms = self.get_input_tensors(outputs)
 
         image_size = self.node.original_in_shape[1:]
@@ -56,7 +47,7 @@ def prepare(
                 }
             )
 
-            curr_label = label[label[:, 0] == i]
+            curr_label = box_label[box_label[:, 0] == i]
             curr_bboxs = box_convert(curr_label[:, 2:], "xywh", "xyxy")
             curr_bboxs[:, 0::2] *= image_size[1]
             curr_bboxs[:, 1::2] *= image_size[0]
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
index 27df8102..0d558b43 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
@@ -12,27 +12,19 @@
     get_area_factor,
     get_sigmas,
 )
-from luxonis_train.utils.types import (
-    BBoxProtocol,
-    KeypointProtocol,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.types import Labels, LabelType, Packet
 
 from .base_metric import BaseMetric
 
 
-class Protocol(KeypointProtocol, BBoxProtocol):
-    ...
-
-
 class MeanAveragePrecisionKeypoints(BaseMetric):
     """Mean Average Precision metric for keypoints.
 
     Uses C{OKS} as IoU measure.
     """
 
+    supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)]
+
     is_differentiable: bool = False
     higher_is_better: bool = True
     full_state_update: bool = True
@@ -77,16 +69,12 @@ def __init__(
         @type kwargs: Any
         @param kwargs: Additional arguments to pass to L{BaseMetric}.
         """
-        super().__init__(
-            protocol=Protocol,
-            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINTS],
-            **kwargs,
-        )
+        super().__init__(**kwargs)
 
         self.n_keypoints = self.node.n_keypoints
 
-        self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.__class__.__name__)
-        self.area_factor = get_area_factor(area_factor, self.__class__.__name__)
+        self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.name)
+        self.area_factor = get_area_factor(area_factor, self.name)
         self.max_dets = max_dets
 
         allowed_box_formats = ("xyxy", "xywh", "cxcywh")
@@ -108,8 +96,10 @@ def __init__(
         self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None)
 
     def prepare(self, outputs: Packet[Tensor], labels: Labels):
-        kpts = labels["keypoints"][0]
-        boxes = labels["boundingbox"][0]
+        assert self.node.tasks is not None
+        kpts = self.get_label(labels, LabelType.KEYPOINTS)[0]
+        boxes = self.get_label(labels, LabelType.BOUNDINGBOX)[0]
+
         nkpts = (kpts.shape[1] - 2) // 3
         label = torch.zeros((len(boxes), nkpts * 3 + 6))
         label[:, :2] = boxes[:, :2]
@@ -122,8 +112,8 @@ def prepare(self, outputs: Packet[Tensor], labels: Labels):
         label_list_kpt_map = []
         image_size = self.node.original_in_shape[1:]
 
-        output_kpts: list[Tensor] = outputs["keypoints"]
-        output_bboxes: list[Tensor] = outputs["boundingbox"]
+        output_kpts = self.get_input_tensors(outputs, LabelType.KEYPOINTS)
+        output_bboxes = self.get_input_tensors(outputs, LabelType.BOUNDINGBOX)
         for i in range(len(output_kpts)):
             output_list_kpt_map.append(
                 {
diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
index cfbae11f..4cbd1cac 100644
--- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
+++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
@@ -5,12 +5,7 @@
 from torch import Tensor
 from torchvision.ops import box_convert
 
-from luxonis_train.utils.types import (
-    KeypointProtocol,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.types import Labels, LabelType, Packet
 
 from .base_metric import BaseMetric
 
@@ -30,6 +25,8 @@ class ObjectKeypointSimilarity(
     groundtruth_keypoints: list[Tensor]
     groundtruth_scales: list[Tensor]
 
+    supported_labels = [LabelType.KEYPOINTS]
+
     def __init__(
         self,
         n_keypoints: int | None = None,
@@ -52,19 +49,16 @@ def __init__(
         @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use
             the one from definition. Defaults to C{True}.
         """
-        super().__init__(
-            required_labels=[LabelType.KEYPOINTS], protocol=KeypointProtocol, **kwargs
-        )
+        super().__init__(**kwargs)
 
         if n_keypoints is None and self.node is None:
             raise ValueError(
-                f"Either `n_keypoints` or `node` must be provided "
-                f"to {self.__class__.__name__}."
+                f"Either `n_keypoints` or `node` must be provided to {self.name}."
             )
         self.n_keypoints = n_keypoints or self.node.n_keypoints
 
-        self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.__class__.__name__)
-        self.area_factor = get_area_factor(area_factor, self.__class__.__name__)
+        self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.name)
+        self.area_factor = get_area_factor(area_factor, self.name)
         self.use_cocoeval_oks = use_cocoeval_oks
 
         self.add_state("pred_keypoints", default=[], dist_reduce_fx=None)
@@ -74,8 +68,9 @@ def __init__(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        kpts_labels = labels["keypoints"][0]
-        bbox_labels = labels["boundingbox"][0]
+        assert self.node.tasks is not None
+        kpts_labels = self.get_label(labels, LabelType.KEYPOINTS)[0]
+        bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX)[0]
         num_keypoints = (kpts_labels.shape[1] - 2) // 3
         label = torch.zeros((len(bbox_labels), num_keypoints * 3 + 6))
         label[:, :2] = bbox_labels[:, :2]
@@ -88,7 +83,9 @@ def prepare(
         label_list_oks = []
         image_size = self.node.original_in_shape[1:]
 
-        for i, pred_kpt in enumerate(outputs["keypoints"]):
+        for i, pred_kpt in enumerate(
+            self.get_input_tensors(outputs, LabelType.KEYPOINTS)
+        ):
             output_list_oks.append({"keypoints": pred_kpt})
 
             curr_label = label[label[:, 0] == i].to(pred_kpt.device)
@@ -273,11 +270,14 @@ def get_sigmas(
 
 def get_area_factor(area_factor: float | None, class_name: str | None) -> float:
     """Set the default area factor if not defined."""
+    factor = 0.53
     if area_factor is None:
-        warn_msg = "Default area_factor of 0.53 is being used bbox area scaling."
+        warn_msg = (
+            f"Default area_factor of {factor} is being used for bbox area scaling."
+        )
         if class_name:
             warn_msg = f"[{class_name}] {warn_msg}"
         logger.warning(warn_msg)
-        return 0.53
+        return factor
     else:
         return area_factor
diff --git a/luxonis_train/attached_modules/visualizers/base_visualizer.py b/luxonis_train/attached_modules/visualizers/base_visualizer.py
index 050c9f4a..5fa6db62 100644
--- a/luxonis_train/attached_modules/visualizers/base_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/base_visualizer.py
@@ -62,5 +62,4 @@ def run(
         inputs: Packet[Tensor],
         labels: Labels,
     ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]]:
-        self.validate(inputs, labels)
         return self(label_canvas, prediction_canvas, *self.prepare(inputs, labels))
diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
index 14dd1ab9..df3ac933 100644
--- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
@@ -3,18 +3,15 @@
 import torch
 from torch import Tensor
 
-from luxonis_train.utils.types import BBoxProtocol, LabelType
+from luxonis_train.utils.types import LabelType
 
 from .base_visualizer import BaseVisualizer
-from .utils import (
-    Color,
-    draw_bounding_box_labels,
-    draw_bounding_boxes,
-    get_color,
-)
+from .utils import Color, draw_bounding_box_labels, draw_bounding_boxes, get_color
 
 
 class BBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
+    supported_labels = [LabelType.BOUNDINGBOX]
+
     def __init__(
         self,
         labels: dict[int, str] | list[str] | None = None,
@@ -49,19 +46,18 @@ def __init__(
         @type font_size: int | None
         @param font_size: The font size to use for the labels. Defaults to C{None}.
         """
-        super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX], protocol=BBoxProtocol, **kwargs
-        )
+        super().__init__(**kwargs)
         if isinstance(labels, list):
             labels = {i: label for i, label in enumerate(labels)}
 
-        self.labels = labels or {
+        self.bbox_labels = labels or {
             i: label for i, label in enumerate(self.node.class_names)
         }
+
         if colors is None:
-            colors = {label: get_color(i) for i, label in self.labels.items()}
+            colors = {label: get_color(i) for i, label in self.bbox_labels.items()}
         if isinstance(colors, list):
-            colors = {self.labels[i]: color for i, color in enumerate(colors)}
+            colors = {self.bbox_labels[i]: color for i, color in enumerate(colors)}
         self.colors = colors
         self.fill = fill
         self.width = width
@@ -180,7 +176,7 @@ def forward(
             label_canvas,
             targets,
             color_dict=self.colors,
-            label_dict=self.labels,
+            label_dict=self.bbox_labels,
             draw_labels=self.draw_labels,
             fill=self.fill,
             font=self.font,
@@ -190,7 +186,7 @@ def forward(
         predictions_viz = self.draw_predictions(
             prediction_canvas,
             predictions,
-            label_dict=self.labels,
+            label_dict=self.bbox_labels,
             color_dict=self.colors,
             draw_labels=self.draw_labels,
             fill=self.fill,
diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py
index e5920d21..20a5710e 100644
--- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py
@@ -2,17 +2,16 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
 from .base_visualizer import BaseVisualizer
-from .utils import (
-    figure_to_torch,
-    numpy_to_torch_img,
-    torch_img_to_numpy,
-)
+from .utils import figure_to_torch, numpy_to_torch_img, torch_img_to_numpy
 
 
 class ClassificationVisualizer(BaseVisualizer[Tensor, Tensor]):
+    supported_labels = [LabelType.CLASSIFICATION]
+
     def __init__(
         self,
         include_plot: bool = True,
diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
index 18d45ece..287d5e1c 100644
--- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
@@ -1,21 +1,16 @@
 from copy import deepcopy
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.utils.types import (
-    LabelType,
-)
-
 from .base_visualizer import BaseVisualizer
-from .utils import (
-    Color,
-    draw_keypoint_labels,
-    draw_keypoints,
-)
+from .utils import Color, draw_keypoint_labels, draw_keypoints
 
 
 class KeypointVisualizer(BaseVisualizer[list[Tensor], Tensor]):
+    supported_labels = [LabelType.KEYPOINTS]
+
     def __init__(
         self,
         visibility_threshold: float = 0.5,
@@ -40,7 +35,7 @@ def __init__(
         @param nonvisible_color: Color of nonvisible keypoints. If C{None}, nonvisible
             keypoints are not drawn. Defaults to C{None}.
         """
-        super().__init__(required_labels=[LabelType.KEYPOINTS], **kwargs)
+        super().__init__(**kwargs)
         self.visibility_threshold = visibility_threshold
         self.connectivity = connectivity
         self.visible_color = visible_color
diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
index 99b64bf0..c7925ecc 100644
--- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
@@ -1,11 +1,7 @@
 from torch import Tensor
 
 from luxonis_train.utils.registry import VISUALIZERS
-from luxonis_train.utils.types import (
-    Kwargs,
-    Labels,
-    Packet,
-)
+from luxonis_train.utils.types import Kwargs, Labels, Packet
 
 from .base_visualizer import BaseVisualizer
 
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index f5348873..b65fd43a 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -3,21 +3,18 @@
 import torch
 from torch import Tensor
 
-from luxonis_train.utils.types import Labels, LabelType, Packet, SegmentationProtocol
+from luxonis_train.utils.types import LabelType
 
 from .base_visualizer import BaseVisualizer
-from .utils import (
-    Color,
-    draw_segmentation_labels,
-    get_color,
-    seg_output_to_bool,
-)
+from .utils import Color, draw_segmentation_labels, get_color, seg_output_to_bool
 
 logger = logging.getLogger(__name__)
 log_disable = False
 
 
 class SegmentationVisualizer(BaseVisualizer[Tensor, Tensor]):
+    supported_labels = [LabelType.SEGMENTATION]
+
     def __init__(
         self,
         colors: Color | list[Color] = "#5050FF",
@@ -32,11 +29,7 @@ def __init__(
         @type alpha: float
         @param alpha: Alpha value of the segmentation masks. Defaults to C{0.6}.
         """
-        super().__init__(
-            protocol=SegmentationProtocol,
-            required_labels=[LabelType.SEGMENTATION],
-            **kwargs,
-        )
+        super().__init__(**kwargs)
         if not isinstance(colors, list):
             colors = [colors]
 
@@ -44,9 +37,6 @@ def __init__(
         self.background_class = background_class
         self.alpha = alpha
 
-    def prepare(self, output: Packet[Tensor], label: Labels) -> tuple[Tensor, Tensor]:
-        return output[self.node.task][0], label[self.task][0]
-
     @staticmethod
     def draw_predictions(
         canvas: Tensor,
diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py
index 16d173e7..51c9541d 100644
--- a/luxonis_train/callbacks/luxonis_progress_bar.py
+++ b/luxonis_train/callbacks/luxonis_progress_bar.py
@@ -28,6 +28,7 @@ def get_metrics(
     ) -> dict[str, int | str | float | dict[str, float]]:
         # NOTE: there might be a cleaner way of doing this
         items = super().get_metrics(trainer, pl_module)
+        items.pop("v_num", None)
         if trainer.training and pl_module.training_step_outputs:
             items["Loss"] = pl_module.training_step_outputs[-1]["loss"].item()
         return items
diff --git a/luxonis_train/core/inferer.py b/luxonis_train/core/inferer.py
index f7a6def3..80a89d35 100644
--- a/luxonis_train/core/inferer.py
+++ b/luxonis_train/core/inferer.py
@@ -3,9 +3,7 @@
 
 import cv2
 
-from luxonis_train.attached_modules.visualizers import (
-    get_unnormalized_images,
-)
+from luxonis_train.attached_modules.visualizers import get_unnormalized_images
 from luxonis_train.utils.config import Config
 
 from .trainer import Trainer
@@ -17,7 +15,7 @@ def __init__(
         cfg: str | dict[str, Any] | Config | None = None,
         opts: list[str] | tuple[str, ...] | None = None,
         view: Literal["train", "test", "val"] = "val",
-        save_dir: Path | None = None,
+        save_dir: str | Path | None = None,
     ):
         opts = list(opts or [])
         opts += ["trainer.batch_size", "1"]
@@ -28,7 +26,7 @@ def __init__(
             self.loader = self.pytorch_loaders["test"]
         else:
             self.loader = self.pytorch_loaders["val"]
-        self.save_dir = save_dir
+        self.save_dir = Path(save_dir) if save_dir is not None else None
         if self.save_dir is not None:
             self.save_dir.mkdir(exist_ok=True, parents=True)
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index bebca871..f541b2da 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -5,10 +5,7 @@
 
 import lightning.pytorch as pl
 import torch
-from lightning.pytorch.callbacks import (
-    ModelCheckpoint,
-    RichModelSummary,
-)
+from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from torch import Size, Tensor, nn
 
@@ -31,11 +28,7 @@
 )
 from luxonis_train.nodes import BaseNode
 from luxonis_train.utils.config import AttachedModuleConfig, Config
-from luxonis_train.utils.general import (
-    DatasetMetadata,
-    to_shape_packet,
-    traverse_graph,
-)
+from luxonis_train.utils.general import DatasetMetadata, to_shape_packet, traverse_graph
 from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry
 from luxonis_train.utils.tracker import LuxonisTrackerPL
 from luxonis_train.utils.types import Kwargs, Labels, Packet
@@ -148,7 +141,7 @@ def __init__(
 
         for node_cfg in self.cfg.model.nodes:
             node_name = node_cfg.name
-            Node = BaseNode.REGISTRY.get(node_name)
+            Node: type[BaseNode] = BaseNode.REGISTRY.get(node_name)
             node_name = node_cfg.alias or node_name
             if node_cfg.freezing.active:
                 epochs = self.cfg.trainer.epochs
@@ -159,7 +152,26 @@ def __init__(
                 else:
                     unfreeze_after = int(node_cfg.freezing.unfreeze_after * epochs)
                 frozen_nodes.append((node_name, unfreeze_after))
-            nodes[node_name] = (Node, {**node_cfg.params, "task": node_cfg.task})
+
+            if node_cfg.task is not None:
+                if Node.tasks is None:
+                    raise ValueError(
+                        f"Cannot define tasks for node {node_name}."
+                        "This node doesn't specify any tasks."
+                    )
+                if isinstance(node_cfg.task, str):
+                    assert Node.tasks
+                    if len(Node.tasks) > 1:
+                        raise ValueError(
+                            f"Node {node_name} specifies multiple tasks, "
+                            "but only one task is specified in the config. "
+                            "Specify the tasks as a dictionary instead."
+                        )
+
+                    node_cfg.task = {next(iter(Node.tasks)): node_cfg.task}
+                else:
+                    node_cfg.task = {**Node._process_tasks(Node.tasks), **node_cfg.task}
+            nodes[node_name] = (Node, {**node_cfg.params, "_tasks": node_cfg.task})
 
             # Handle inputs for this node
             if node_cfg.input_sources:
diff --git a/luxonis_train/models/predefined_models/README.md b/luxonis_train/models/predefined_models/README.md
index ddf0b46d..bdf49178 100644
--- a/luxonis_train/models/predefined_models/README.md
+++ b/luxonis_train/models/predefined_models/README.md
@@ -42,6 +42,7 @@ See an example configuration file using this predefined model [here](../../../co
 | Key               | Type                              | Default value | Description                                |
 | ----------------- | --------------------------------- | ------------- | ------------------------------------------ |
 | task              | Literal\["binary", "multiclass"\] | "binary"      | Type of the task of the model.             |
+| task_name         | str \| None                       | None          | Custom task name for the head.             |
 | backbone          | str                               | "MicroNet"    | Name of the node to be used as a backbone. |
 | backbone_params   | dict                              | {}            | Additional parameters to the backbone.     |
 | head_params       | dict                              | {}            | Additional parameters to the head.         |
@@ -65,14 +66,15 @@ See an example configuration file using this predefined model [here](../../../co
 
 **Params**
 
-| Key               | Type | Default value | Description                               |
-| ----------------- | ---- | ------------- | ----------------------------------------- |
-| use_neck          | bool | True          | Whether to include the neck in the model. |
-| backbone_params   | dict | {}            | Additional parameters to the backbone.    |
-| neck_params       | dict | {}            | Additional parameters to the neck.        |
-| head_params       | dict | {}            | Additional parameters to the head.        |
-| loss_params       | dict | {}            | Additional parameters to the loss.        |
-| visualizer_params | dict | {}            | Additional parameters to the visualizer.  |
+| Key               | Type        | Default value | Description                               |
+| ----------------- | ----------- | ------------- | ----------------------------------------- |
+| task_name         | str \| None | None          | Custom task name for the head.            |
+| use_neck          | bool        | True          | Whether to include the neck in the model. |
+| backbone_params   | dict        | {}            | Additional parameters to the backbone.    |
+| neck_params       | dict        | {}            | Additional parameters to the neck.        |
+| head_params       | dict        | {}            | Additional parameters to the head.        |
+| loss_params       | dict        | {}            | Additional parameters to the loss.        |
+| visualizer_params | dict        | {}            | Additional parameters to the visualizer.  |
 
 ## KeypointDetectionModel
 
@@ -84,8 +86,10 @@ See an example configuration file using this predefined model [here](../../../co
 | ------------------------------------------------------------------------------------------------------- | ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- |
 | [EfficientRep](../../nodes/README.md#efficientrep)                                                      | kpt_detection_backbone | Backbone of the model.                                                                                                                               |
 | [RepPANNeck](../../nodes/README.md#reppanneck)                                                          | kpt_detection_neck     | Neck of the model.                                                                                                                                   |
-| [ImplicitKeypointBBoxHead](../../nodes/README.md#implicitkeypointbboxhead)                              | kpt_detection_head     | Head of the model.                                                                                                                                   |
-| [ImplicitKeypointBBoxLoss](../../attached_modules/losses/README.md#implicitkeypointbboxloss)            | kpt_detection_loss     | Loss of the model.                                                                                                                                   |
+| [ImplicitKeypointBBoxHead](../../nodes/README.md#implicitkeypointbboxhead)                              | kpt_detection_head     | Possible head of the model, changes depending on the value of `head_type` argument.                                                                  |
+| [EfficientKeypointBBoxHead](../../nodes/README.md#efficientkeypointbboxhead)                            | kpt_detection_head     | Possible head of the model, changes depending on the value of `head_type` argument                                                                   |
+| [ImplicitKeypointBBoxLoss](../../attached_modules/losses/README.md#implicitkeypointbboxloss)            | kpt_detection_loss     | Loss of the model if the `head_type` is set to "ImplicitKeypointBBoxHead"                                                                            |
+| [EfficientKeypointBBoxLoss](../../attached_modules/losses/README.md#efficientkeypointbboxloss)          | kpt_detection_loss     | Loss of the model if `head_type` is set to "EfficientKeypointBBoxHead".                                                                              |
 | [ObjectKeypointSimilarity](../../attached_modules/metrics/README.md#objectkeypointsimilarity)           | kpt_detection_oks      | Main metric of the model.                                                                                                                            |
 | [MeanAveragePrecisionKeypoints](../../attached_modules/metrics/README.md#meanaverageprecisionkeypoints) | kpt_detection_map      | Secondary metric of the model.                                                                                                                       |
 | [BBoxVisualizer](../../attached_modules/visualizers/README.md#bboxvisualizer)                           |                        | Visualizer for bounding boxes. Combined with keypoint visualizer in [MultiVisualizer](../../attached_modules/visualizers/README.md#multivisualizer). |
@@ -93,15 +97,18 @@ See an example configuration file using this predefined model [here](../../../co
 
 **Params**
 
-| Key                    | Type | Default value | Description                                       |
-| ---------------------- | ---- | ------------- | ------------------------------------------------- |
-| use_neck               | bool | True          | Whether to include the neck in the model.         |
-| backbone_params        | dict | {}            | Additional parameters to the backbone.            |
-| neck_params            | dict | {}            | Additional parameters to the neck.                |
-| head_params            | dict | {}            | Additional parameters to the head.                |
-| loss_params            | dict | {}            | Additional parameters to the loss.                |
-| kpt_visualizer_params  | dict | {}            | Additional parameters to the keypoint visualizer. |
-| bbox_visualizer_params | dict | {}            | Additional parameters to the bbox visualizer.     |
+| Key                    | Type        | Default value                                             | Description                                       |
+| ---------------------- | ----------- | --------------------------------------------------------- | ------------------------------------------------- |
+| use_neck               | bool        | True                                                      | Whether to include the neck in the model.         |
+| backbone_params        | dict        | {}                                                        | Additional parameters to the backbone.            |
+| neck_params            | dict        | {}                                                        | Additional parameters to the neck.                |
+| head_params            | dict        | {}                                                        | Additional parameters to the head.                |
+| head_type              | str         | "ImplicitKeypointBBoxHead" \| "EfficientKeypointBBoxHead" | Type of the head.                                 |
+| loss_params            | dict        | {}                                                        | Additional parameters to the loss.                |
+| kpt_visualizer_params  | dict        | {}                                                        | Additional parameters to the keypoint visualizer. |
+| bbox_visualizer_params | dict        | {}                                                        | Additional parameters to the bbox visualizer.     |
+| bbox_task_name         | str \| None | None                                                      | Custom task name for the detection head.          |
+| kpt_task_name          | str \| None | None                                                      | Custom task name for the keypoint head.           |
 
 ## ClassificationModel
 
@@ -125,6 +132,7 @@ See an example configuration file using this predefined model [here](../../../co
 | Key               | Type                                  | Default value | Description                                |
 | ----------------- | ------------------------------------- | ------------- | ------------------------------------------ |
 | task              | Literal\["multiclass", "multilabel"\] | "multiclass"  | Type of the task of the model.             |
+| task_name         | str \| None                           | None          | Custom task name for the head.             |
 | backbone          | str                                   | "MicroNet"    | Name of the node to be used as a backbone. |
 | backbone_params   | dict                                  | {}            | Additional parameters to the backbone.     |
 | head_params       | dict                                  | {}            | Additional parameters to the head.         |
diff --git a/luxonis_train/models/predefined_models/classification_model.py b/luxonis_train/models/predefined_models/classification_model.py
index 33e56242..c9d782eb 100644
--- a/luxonis_train/models/predefined_models/classification_model.py
+++ b/luxonis_train/models/predefined_models/classification_model.py
@@ -20,6 +20,7 @@ class ClassificationModel(BasePredefinedModel):
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
     visualizer_params: Kwargs = field(default_factory=dict)
+    task_name: str | None = None
 
     @property
     def nodes(self) -> list[ModelNodeConfig]:
@@ -36,6 +37,7 @@ def nodes(self) -> list[ModelNodeConfig]:
                 inputs=["classification_backbone"],
                 freezing=self.head_params.pop("freezing", {}),
                 params=self.head_params,
+                task=self.task_name,
             ),
         ]
 
diff --git a/luxonis_train/models/predefined_models/detection_model.py b/luxonis_train/models/predefined_models/detection_model.py
index 41a7dfdc..e9db4462 100644
--- a/luxonis_train/models/predefined_models/detection_model.py
+++ b/luxonis_train/models/predefined_models/detection_model.py
@@ -19,6 +19,7 @@ class DetectionModel(BasePredefinedModel):
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
     visualizer_params: Kwargs = field(default_factory=dict)
+    task_name: str | None = None
 
     @property
     def nodes(self) -> list[ModelNodeConfig]:
@@ -48,6 +49,7 @@ def nodes(self) -> list[ModelNodeConfig]:
                 freezing=self.head_params.pop("freezing", {}),
                 inputs=["detection_neck"] if self.use_neck else ["detection_backbone"],
                 params=self.head_params,
+                task=self.task_name,
             )
         )
         return nodes
diff --git a/luxonis_train/models/predefined_models/keypoint_detection_model.py b/luxonis_train/models/predefined_models/keypoint_detection_model.py
index 96bef885..588911c6 100644
--- a/luxonis_train/models/predefined_models/keypoint_detection_model.py
+++ b/luxonis_train/models/predefined_models/keypoint_detection_model.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass, field
+from typing import Literal
 
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
@@ -18,8 +19,13 @@ class KeypointDetectionModel(BasePredefinedModel):
     neck_params: Kwargs = field(default_factory=dict)
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
+    head_type: Literal[
+        "ImplicitKeypointBBoxHead", "EfficientKeypointBBoxHead"
+    ] = "ImplicitKeypointBBoxHead"
     kpt_visualizer_params: Kwargs = field(default_factory=dict)
     bbox_visualizer_params: Kwargs = field(default_factory=dict)
+    bbox_task_name: str | None = None
+    kpt_task_name: str | None = None
 
     @property
     def nodes(self) -> list[ModelNodeConfig]:
@@ -42,15 +48,22 @@ def nodes(self) -> list[ModelNodeConfig]:
                 )
             )
 
+        task = {}
+        if self.bbox_task_name is not None:
+            task["bbox"] = self.bbox_task_name
+        if self.kpt_task_name is not None:
+            task["keypoints"] = self.kpt_task_name
+
         nodes.append(
             ModelNodeConfig(
-                name="ImplicitKeypointBBoxHead",
+                name=self.head_type,
                 alias="kpt_detection_head",
                 inputs=["kpt_detection_neck"]
                 if self.use_neck
                 else ["kpt_detection_backbone"],
                 freezing=self.head_params.pop("freezing", {}),
                 params=self.head_params,
+                task=task,
             )
         )
         return nodes
@@ -59,7 +72,7 @@ def nodes(self) -> list[ModelNodeConfig]:
     def losses(self) -> list[LossModuleConfig]:
         return [
             LossModuleConfig(
-                name="ImplicitKeypointBBoxLoss",
+                name=self.head_type.replace("Head", "Loss"),
                 attached_to="kpt_detection_head",
                 params=self.loss_params,
                 weight=1.0,
diff --git a/luxonis_train/models/predefined_models/segmentation_model.py b/luxonis_train/models/predefined_models/segmentation_model.py
index 9bc936a7..b5e81f76 100644
--- a/luxonis_train/models/predefined_models/segmentation_model.py
+++ b/luxonis_train/models/predefined_models/segmentation_model.py
@@ -20,6 +20,7 @@ class SegmentationModel(BasePredefinedModel):
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
     visualizer_params: Kwargs = field(default_factory=dict)
+    task_name: str | None = None
 
     @property
     def nodes(self) -> list[ModelNodeConfig]:
@@ -36,6 +37,7 @@ def nodes(self) -> list[ModelNodeConfig]:
                 inputs=["segmentation_backbone"],
                 freezing=self.head_params.pop("freezing", {}),
                 params=self.head_params,
+                task=self.task_name,
             ),
         ]
 
diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md
index 6a29d237..2f147e23 100644
--- a/luxonis_train/nodes/README.md
+++ b/luxonis_train/nodes/README.md
@@ -24,10 +24,16 @@ arbitrarily as long as the two nodes are compatible with each other.
 
 Every node takes these parameters:
 
-| Key          | Type        | Default value | Description                                                                                                               |
-| ------------ | ----------- | ------------- | ------------------------------------------------------------------------------------------------------------------------- |
-| attach_index | int \| None | None          | Index of previous output that the head attaches to. Each node has a sensible default. Usually should not be manually set. |
-| n_classes    | int \| None | None          | Number of classes in the dataset. Inferred from the dataset if not provided.                                              |
+| Key       | Type        | Default value | Description                                                                  |
+| --------- | ----------- | ------------- | ---------------------------------------------------------------------------- |
+| n_classes | int \| None | None          | Number of classes in the dataset. Inferred from the dataset if not provided. |
+
+In addition, the following class attributes can be overriden:
+
+| Key          | Type                                                                | Default value | Description                                                                                                                                   |
+| ------------ | ------------------------------------------------------------------- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
+| attach_index | int \| "all" \| Tuple\[int, int\] \| Tuple\[int, int, int\] \| None | None          | Index of previous output that the head attaches to. Each node has a sensible default. Usually should not be manually set in most cases.       |
+| tasks        | List\[LabelType\] \| Dict\[LabelType, str\] \| None                 | None          | Tasks supported by the node. Should be overriden for head nodes. Either a list of tasks or a dictionary mapping tasks to their default names. |
 
 Additional parameters for specific nodes are listed below.
 
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 8ee03591..34322be5 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -50,6 +50,47 @@ class BaseNode(
     The L{run} method combines the C{unwrap}, C{forward} and C{wrap} methods
     together with input validation.
 
+    When subclassing, the following methods should be implemented:
+        - L{forward}: Forward pass of the module.
+        - L{unwrap}: Optional. Unwraps the inputs from the input packet.
+          The default implementation expects a single input with `features` key.
+        - L{wrap}: Optional. Wraps the output of the forward pass
+          into a `Packet[Tensor]`. The default implementation expects wraps the output
+          of the forward pass into a packet with either "features" or the task name as the key.
+
+    Additionally, the following class attributes can be defined:
+        - L{input_protocols}: List of input protocols used to validate inputs to the node.
+        - L{attach_index}: Index of previous output that this node attaches to.
+        - L{tasks}: Dictionary of tasks that the node supports.
+
+    Example::
+        class MyNode(BaseNode):
+            # equivalent to `tasks = {LabelType.CLASSIFICATION: "classification"}`
+            tasks = [LabelType.CLASSIFICATION]
+
+            def __init__(self, **kwargs):
+                super().__init__(**kwargs)
+                self.nn = nn.Sequential(
+                    nn.Linear(10, 10),
+                    nn.ReLU(),
+                    nn.Linear(10, 10),
+                )
+
+            # Roughly equivalent to the default implementation
+            def unwrap(self, inputs: list[Packet[Tensor]]) -> Tensor:
+                assert len(inputs) == 1
+                assert "features" in inputs[0]
+                return inputs[0]["features"]
+
+            def forward(self, inputs: Tensor) -> Tensor:
+                return self.nn(inputs)
+
+            # Roughly equivalent to the default implementation
+            def wrap(output: Tensor) -> Packet[Tensor]:
+                # The key of the main node output have to be the same as the
+                # default task name for it to be automatically recognized
+                # by the attached modules.
+                return {"classification": [output]}
 
     @type input_shapes: list[Packet[Size]] | None
     @param input_shapes: List of input shapes for the module.
@@ -62,16 +103,6 @@ class BaseNode(
     @param dataset_metadata: Metadata of the dataset.
         Some nodes won't function if not provided.
 
-    @type attach_index: AttachIndexType
-    @param attach_index: Index of previous output that this node attaches to.
-        Can be a single integer to specify a single output, a tuple of
-        two or three integers to specify a range of outputs or `"all"` to
-        specify all outputs. Defaults to "all". Python indexing conventions apply.
-
-    @type in_protocols: list[type[BaseModel]]
-    @param in_protocols: List of input protocols used to validate inputs to the node.
-        Defaults to [FeaturesProtocol].
-
     @type n_classes: int | None
     @param n_classes: Number of classes in the dataset. Provide only
         in case `dataset_metadata` is not provided. Defaults to None.
@@ -79,24 +110,52 @@ class BaseNode(
     @type in_sizes: Size | list[Size] | None
     @param in_sizes: List of input sizes for the node.
         Provide only in case the `input_shapes` were not provided.
+
+    @type _tasks: dict[LabelType, str] | None
+    @param _tasks: Dictionary of tasks that the node supports. Overrides the
+        class L{tasks} attribute. Shouldn't be provided by the user in most cases.
+
+    @type input_protocols: list[type[BaseModel]]
+    @ivar input_protocols: List of input protocols used to validate inputs to the node.
+        Defaults to [L{FeaturesProtocol}].
+
+    @type attach_index: AttachIndexType
+    @ivar attach_index: Index of previous output that this node attaches to.
+        Can be a single integer to specify a single output, a tuple of
+        two or three integers to specify a range of outputs or `"all"` to
+        specify all outputs. Defaults to "all". Python indexing conventions apply.
+
+    @type tasks: list[LabelType] | dict[LabelType, str] | None
+    @ivar tasks: Dictionary of tasks that the node supports. Should be defined
+        by the user as a class attribute. The key is the task type and the value
+        is the name of the task. For example:
+        C{{LabelType.CLASSIFICATION: "classification"}}.
+        Only needs to be defined for head nodes.
     """
 
+    input_protocols: list[type[BaseModel]] = [FeaturesProtocol]
+    attach_index: AttachIndexType
+    tasks: list[LabelType] | dict[LabelType, str] | None = None
+
     def __init__(
         self,
         *,
         input_shapes: list[Packet[Size]] | None = None,
         original_in_shape: Size | None = None,
         dataset_metadata: DatasetMetadata | None = None,
-        attach_index: AttachIndexType | None = None,
-        in_protocols: list[type[BaseModel]] | None = None,
         n_classes: int | None = None,
         in_sizes: Size | list[Size] | None = None,
-        task: str | None = None,
-        _task_type: LabelType | None = None,
+        _tasks: dict[LabelType, str] | None = None,
     ):
         super().__init__()
 
-        if attach_index is None:
+        self._tasks = None
+        if _tasks is not None:
+            self._tasks = _tasks
+        elif self.tasks is not None:
+            self._tasks = self._process_tasks(self.tasks)
+
+        if getattr(self, "attach_index", None) is None:
             parameters = inspect.signature(self.forward).parameters
             inputs_forward_type = parameters.get(
                 "inputs", parameters.get("input", parameters.get("x", None))
@@ -108,14 +167,6 @@ def __init__(
                 self.attach_index = -1
             else:
                 self.attach_index = "all"
-        else:
-            self.attach_index = attach_index
-
-        self.in_protocols = in_protocols or [FeaturesProtocol]
-        self._task_type = _task_type
-        if task is None and self._task_type is not None:
-            task = self._task_type.value
-        self._task = task
 
         self._input_shapes = input_shapes
         self._original_in_shape = original_in_shape
@@ -128,28 +179,121 @@ def __init__(
         self._epoch = 0
         self._in_sizes = in_sizes
 
-    def _non_set_error(self, name: str) -> ValueError:
-        return ValueError(
-            f"{self.__class__.__name__} is trying to access `{name}`, "
-            "but it was not set during initialization. "
-        )
+    @staticmethod
+    def _process_tasks(
+        tasks: dict[LabelType, str] | list[LabelType],
+    ) -> dict[LabelType, str]:
+        if isinstance(tasks, dict):
+            return tasks
+        if isinstance(tasks, list):
+            return {task: task.value for task in tasks}
+
+    def get_task_name(self, task: LabelType) -> str:
+        """Gets the name of a task for a particular C{LabelType}.
+
+        @type task: LabelType
+        @param task: Task to get the name for.
+        @rtype: str
+        @return: Name of the task.
+        @raises ValueError: If the task is not supported by the node.
+        """
+        if not self._tasks:
+            raise ValueError(f"Node {self.name} does not have any tasks defined.")
+
+        if task not in self._tasks:
+            raise ValueError(
+                f"Node {self.name} does not support the {task.value} task."
+            )
+        return self._tasks[task]
+
+    @property
+    def name(self) -> str:
+        return self.__class__.__name__
 
     @property
     def task(self) -> str:
         """Getter for the task."""
-        if self._task is None:
-            raise self._non_set_error("task")
-        return self._task
+        if not self._tasks:
+            raise ValueError(f"{self.name} does not have any tasks defined.")
+
+        if len(self._tasks) > 1:
+            raise ValueError(
+                f"Node {self.name} has multiple tasks defined. "
+                "Use `get_task_name` method instead."
+            )
+        return next(iter(self._tasks.values()))
+
+    def get_n_classes(self, task: LabelType) -> int:
+        """Gets the number of classes for a particular task.
+
+        @type task: LabelType
+        @param task: Task to get the number of classes for.
+        @rtype: int
+        @return: Number of classes for the task.
+        """
+        return self.dataset_metadata.n_classes(self.get_task_name(task))
+
+    def get_class_names(self, task: LabelType) -> list[str]:
+        """Gets the class names for a particular task.
+
+        @type task: LabelType
+        @param task: Task to get the class names for.
+        @rtype: list[str]
+        @return: Class names for the task.
+        """
+        return self.dataset_metadata.class_names(self.get_task_name(task))
 
     @property
     def n_classes(self) -> int:
         """Getter for the number of classes."""
-        return self.dataset_metadata.n_classes(self.task)
+        if not self._tasks:
+            raise ValueError(
+                f"{self.name} does not have any tasks defined, "
+                "`BaseNode.n_classes` property cannot be used. "
+                "Either override the `tasks` class attribute, "
+                "pass the `n_classes` attribute to the constructor or call "
+                "the `BaseNode.dataset_metadata.n_classes` method manually."
+            )
+        elif len(self._tasks) == 1:
+            return self.dataset_metadata.n_classes(self.task)
+        else:
+            n_classes = [
+                self.dataset_metadata.n_classes(self.get_task_name(task))
+                for task in self._tasks
+            ]
+            if len(set(n_classes)) == 1:
+                return n_classes[0]
+            raise ValueError(
+                "Node defines multiple tasks but they have different number of classes. "
+                "This is likely an error, as the number of classes should be the same."
+                "If it is intended, use `BaseNode.get_n_classes` instead."
+            )
 
     @property
     def class_names(self) -> list[str]:
         """Getter for the class names."""
-        return self.dataset_metadata.class_names(self.task)
+        if not self._tasks:
+            raise ValueError(
+                f"{self.name} does not have any tasks defined, "
+                "`BaseNode.class_names` property cannot be used. "
+                "Either override the `tasks` class attribute, "
+                "pass the `n_classes` attribute to the constructor or call "
+                "the `BaseNode.dataset_metadata.class_names` method manually."
+            )
+        elif len(self._tasks) == 1:
+            return self.dataset_metadata.class_names(self.task)
+        else:
+            class_names = [
+                self.dataset_metadata.class_names(self.get_task_name(task))
+                for task in self._tasks
+            ]
+            if all(set(names) == set(class_names[0]) for names in class_names):
+                return class_names[0]
+            raise ValueError(
+                "Node defines multiple tasks but they have different class names. "
+                "This is likely an error, as the class names should be the same. "
+                "If it is intended, use `BaseNode.get_class_names` instead."
+            )
 
     @property
     def input_shapes(self) -> list[Packet[Size]]:
@@ -209,7 +353,7 @@ def in_sizes(self) -> Size | list[Size]:
         features = self.input_shapes[0].get("features")
         if features is None:
             raise IncompatibleException(
-                f"Feature field is missing in {self.__class__.__name__}. "
+                f"Feature field is missing in {self.name}. "
                 "The default implementation of `in_sizes` cannot be used."
             )
         shapes = self.get_attached(self.input_shapes[0]["features"])
@@ -288,6 +432,11 @@ def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT:
         @rtype: ForwardInputT
         @return: Prepared inputs, ready to be passed to the L{forward} method.
         """
+        if len(inputs) > 1:
+            raise IncompatibleException(
+                f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead."
+                "If the node expects multiple inputs, the `unwrap` method should be overridden."
+            )
         return self.get_attached(inputs[0]["features"])  # type: ignore
 
     @abstractmethod
@@ -305,7 +454,23 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]:
         """Wraps the output of the forward pass into a `Packet[Tensor]`.
 
         The default implementation expects a single tensor or a list of tensors
-        and wraps them into a Packet with `features` key.
+        and wraps them into a Packet with either the node task as a key
+        or "features" key if task is not defined.
+
+        Example::
+
+            >>> class FooNode(BaseNode):
+            ...     tasks = [LabelType.CLASSIFICATION]
+            ...
+            ... class BarNode(BaseNode):
+            ...     pass
+            ...
+            >>> node = FooNode()
+            >>> node.wrap(torch.rand(1, 10))
+            {"classification": [Tensor(1, 10)]}
+            >>> node = BarNode()
+            >>> node.wrap([torch.rand(1, 10), torch.rand(1, 10)])
+            {"features": [Tensor(1, 10), Tensor(1, 10)]}
 
         @type output: ForwardOutputT
         @param output: Output of the forward pass.
@@ -323,12 +488,16 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]:
                 raise IncompatibleException(
                     "Default `wrap` expects a single tensor or a list of tensors."
                 )
-        return {self._task or "features": outputs}
+        try:
+            task = self.task
+        except ValueError:
+            task = "features"
+        return {task: outputs}
 
     def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
         """Combines the forward pass with the wrapping and unwrapping of the inputs.
 
-        Additionally validates the inputs against `in_protocols`.
+        Additionally validates the inputs against `input_protocols`.
 
         @type inputs: list[Packet[Tensor]]
         @param inputs: Inputs to the module.
@@ -341,24 +510,28 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
         """
         unwrapped = self.unwrap(self.validate(inputs))
         outputs = self(unwrapped)
-        return self.wrap(outputs)
+        wrapped = self.wrap(outputs)
+        str_tasks = [task.value for task in self._tasks] if self._tasks else []
+        for key in list(wrapped.keys()):
+            if key in str_tasks:
+                value = wrapped.pop(key)
+                wrapped[self.get_task_name(LabelType(key))] = value
+        return wrapped
 
     def validate(self, data: list[Packet[Tensor]]) -> list[Packet[Tensor]]:
-        """Validates the inputs against `in_protocols`."""
-        if len(data) != len(self.in_protocols):
+        """Validates the inputs against `input_protocols`."""
+        if len(data) != len(self.input_protocols):
             raise IncompatibleException(
-                f"Node {self.__class__.__name__} expects {len(self.in_protocols)} inputs, "
+                f"Node {self.name} expects {len(self.input_protocols)} inputs, "
                 f"but got {len(data)} inputs instead."
             )
         try:
             return [
                 validate_packet(d, protocol)
-                for d, protocol in zip(data, self.in_protocols)
+                for d, protocol in zip(data, self.input_protocols)
             ]
         except ValidationError as e:
-            raise IncompatibleException.from_validation_error(
-                e, self.__class__.__name__
-            ) from e
+            raise IncompatibleException.from_validation_error(e, self.name) from e
 
     T = TypeVar("T", Tensor, Size)
 
@@ -418,3 +591,9 @@ def _get_nth_size(self, idx: int) -> int | list[int]:
                 return sizes[idx]
             case list(sizes):
                 return [size[idx] for size in sizes]
+
+    def _non_set_error(self, name: str) -> ValueError:
+        return ValueError(
+            f"{self.name} is trying to access `{name}`, "
+            "but it was not set during initialization. "
+        )
diff --git a/luxonis_train/nodes/bisenet_head.py b/luxonis_train/nodes/bisenet_head.py
index 8bac3573..54128cad 100644
--- a/luxonis_train/nodes/bisenet_head.py
+++ b/luxonis_train/nodes/bisenet_head.py
@@ -18,6 +18,8 @@ class BiSeNetHead(BaseNode[Tensor, Tensor]):
     in_height: int
     in_channels: int
 
+    tasks: list[LabelType] = [LabelType.SEGMENTATION]
+
     def __init__(
         self,
         intermediate_channels: int = 64,
@@ -30,7 +32,7 @@ def __init__(
         @param intermediate_channels: How many intermediate channels to use.
             Defaults to C{64}.
         """
-        super().__init__(task=LabelType.SEGMENTATION, **kwargs)
+        super().__init__(**kwargs)
 
         original_height = self.original_in_shape[1]
         upscale_factor = 2 ** infer_upscale_factor(self.in_height, original_height)
diff --git a/luxonis_train/nodes/classification_head.py b/luxonis_train/nodes/classification_head.py
index ceadbc60..d33faeb5 100644
--- a/luxonis_train/nodes/classification_head.py
+++ b/luxonis_train/nodes/classification_head.py
@@ -1,12 +1,13 @@
 from torch import Tensor, nn
 
-from luxonis_train.utils.types import LabelType, Packet
+from luxonis_train.utils.types import LabelType
 
 from .base_node import BaseNode
 
 
 class ClassificationHead(BaseNode[Tensor, Tensor]):
     in_channels: int
+    tasks: list[LabelType] = [LabelType.CLASSIFICATION]
 
     def __init__(
         self,
@@ -19,7 +20,7 @@ def __init__(
         @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults
             to C{0.2}.
         """
-        super().__init__(_task_type=LabelType.CLASSIFICATION, **kwargs)
+        super().__init__(**kwargs)
 
         self.head = nn.Sequential(
             nn.AdaptiveAvgPool2d(1),
@@ -30,6 +31,3 @@ def __init__(
 
     def forward(self, inputs: Tensor) -> Tensor:
         return self.head(inputs)
-
-    def wrap(self, output: Tensor) -> Packet[Tensor]:
-        return {"classification": [output]}
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/efficient_bbox_head.py
index 23728af1..37f23e8b 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/efficient_bbox_head.py
@@ -24,6 +24,7 @@ class EfficientBBoxHead(
     BaseNode[list[Tensor], tuple[list[Tensor], list[Tensor], list[Tensor]]]
 ):
     in_channels: list[int]
+    tasks: list[LabelType] = [LabelType.BOUNDINGBOX]
 
     def __init__(
         self,
@@ -50,9 +51,7 @@ def __init__(
         @type max_det: int
         @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
-        super().__init__(
-            _task_type=kwargs.pop("_task_type", LabelType.BOUNDINGBOX), **kwargs
-        )
+        super().__init__(**kwargs)
 
         self.n_heads = n_heads
 
@@ -99,7 +98,7 @@ def wrap(
                 conf, _ = out_cls.max(1, keepdim=True)
                 out = torch.cat([out_reg, conf, out_cls], dim=1)
                 outputs.append(out)
-            return {"boundingbox": outputs}
+            return {self.task: outputs}
 
         cls_tensor = torch.cat(
             [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
@@ -118,7 +117,7 @@ def wrap(
         else:
             boxes = self._process_to_bbox((features, cls_tensor, reg_tensor))
             return {
-                "boundingbox": boxes,
+                self.task: boxes,
                 "features": features,
                 "class_scores": [cls_tensor],
                 "distributions": [reg_tensor],
diff --git a/luxonis_train/nodes/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/efficient_keypoint_bbox_head.py
index dabb62c5..ae527f8f 100644
--- a/luxonis_train/nodes/efficient_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/efficient_keypoint_bbox_head.py
@@ -15,6 +15,8 @@
 
 
 class EfficientKeypointBBoxHead(EfficientBBoxHead):
+    tasks: list[LabelType] = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]
+
     def __init__(
         self,
         n_keypoints: int | None = None,
@@ -51,7 +53,6 @@ def __init__(
             conf_thres=conf_thres,
             iou_thres=iou_thres,
             max_det=max_det,
-            _task_type=LabelType.KEYPOINTS,
             **kwargs,
         )
 
@@ -146,13 +147,13 @@ def wrap(
         )
         return {
             "boundingbox": [detection[:, :6] for detection in detections],
-            "features": features,
-            "class_scores": [cls_tensor],
-            "distributions": [reg_tensor],
             "keypoints": [
                 detection[:, 6:].reshape(-1, self.n_keypoints, 3)
                 for detection in detections
             ],
+            "features": features,
+            "class_scores": [cls_tensor],
+            "distributions": [reg_tensor],
             "keypoints_raw": [kpt_tensor],
         }
 
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index dde27ed5..5b18bf37 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -5,10 +5,7 @@
 import torch
 from torch import Tensor, nn
 
-from luxonis_train.nodes.blocks import (
-    KeypointBlock,
-    LearnableMulAddConv,
-)
+from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv
 from luxonis_train.utils.boxutils import (
     non_max_suppression,
     process_bbox_predictions,
@@ -22,6 +19,8 @@
 
 
 class ImplicitKeypointBBoxHead(BaseNode):
+    tasks: list[LabelType] = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]
+
     def __init__(
         self,
         n_keypoints: int | None = None,
@@ -57,7 +56,7 @@ def __init__(
         @type max_det: int
         @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
-        super().__init__(_task_type=LabelType.KEYPOINTS, **kwargs)
+        super().__init__(**kwargs)
 
         if anchors is None:
             logger.info("No anchors provided, generating them automatically.")
diff --git a/luxonis_train/nodes/rexnetv1.py b/luxonis_train/nodes/rexnetv1.py
index 4999d6a1..181ad325 100644
--- a/luxonis_train/nodes/rexnetv1.py
+++ b/luxonis_train/nodes/rexnetv1.py
@@ -4,13 +4,10 @@
 @license: U{MIT<https://github.com/clovaai/rexnet/blob/master/LICENSE>}
 """
 
-
 import torch
 from torch import Tensor, nn
 
-from luxonis_train.nodes.blocks import (
-    ConvModule,
-)
+from luxonis_train.nodes.blocks import ConvModule
 from luxonis_train.utils.general import make_divisible
 
 from .base_node import BaseNode
diff --git a/luxonis_train/nodes/segmentation_head.py b/luxonis_train/nodes/segmentation_head.py
index 67461eb0..cddfe8db 100644
--- a/luxonis_train/nodes/segmentation_head.py
+++ b/luxonis_train/nodes/segmentation_head.py
@@ -9,7 +9,7 @@
 
 from luxonis_train.nodes.blocks import UpBlock
 from luxonis_train.utils.general import infer_upscale_factor
-from luxonis_train.utils.types import LabelType, Packet
+from luxonis_train.utils.types import LabelType
 
 from .base_node import BaseNode
 
@@ -17,6 +17,7 @@
 class SegmentationHead(BaseNode[Tensor, Tensor]):
     in_height: int
     in_channels: int
+    tasks: list[LabelType] = [LabelType.SEGMENTATION]
 
     def __init__(self, **kwargs):
         """Basic segmentation FCN head.
@@ -26,7 +27,7 @@ def __init__(self, **kwargs):
         @type kwargs: Any
         @param kwargs: Additional arguments to pass to L{BaseNode}.
         """
-        super().__init__(_task_type=LabelType.SEGMENTATION, **kwargs)
+        super().__init__(**kwargs)
 
         original_height = self.original_in_shape[1]
         num_up = infer_upscale_factor(self.in_height, original_height, strict=False)
@@ -44,8 +45,5 @@ def __init__(self, **kwargs):
             nn.Conv2d(in_channels, self.n_classes, kernel_size=1),
         )
 
-    def wrap(self, output: Tensor) -> Packet[Tensor]:
-        return {"segmentation": [output]}
-
     def forward(self, inputs: Tensor) -> Tensor:
         return self.head(inputs)
diff --git a/luxonis_train/utils/assigners/atts_assigner.py b/luxonis_train/utils/assigners/atts_assigner.py
index f4989b54..9a0466da 100644
--- a/luxonis_train/utils/assigners/atts_assigner.py
+++ b/luxonis_train/utils/assigners/atts_assigner.py
@@ -2,12 +2,7 @@
 import torch.nn.functional as F
 from torch import Tensor, nn
 
-from .utils import (
-    batch_iou,
-    bbox_iou,
-    candidates_in_gt,
-    fix_collisions,
-)
+from .utils import batch_iou, bbox_iou, candidates_in_gt, fix_collisions
 
 
 class ATSSAssigner(nn.Module):
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 2e8460ca..7ce08cf5 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -2,6 +2,7 @@
 import sys
 from typing import Annotated, Any, Literal
 
+from luxonis_ml.data import LabelType
 from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
 from pydantic import BaseModel, ConfigDict, Field, model_validator
 from typing_extensions import Self
@@ -40,7 +41,7 @@ class ModelNodeConfig(CustomBaseModel):
     input_sources: list[str] = []  # From data loader
     params: dict[str, Any] = {}
     freezing: FreezingConfig = FreezingConfig()
-    task: str | None = None
+    task: str | dict[LabelType, str] | None = None
 
 
 class PredefinedModelConfig(CustomBaseModel):
diff --git a/luxonis_train/utils/loaders/__init__.py b/luxonis_train/utils/loaders/__init__.py
index d25e3856..eaa08ff7 100644
--- a/luxonis_train/utils/loaders/__init__.py
+++ b/luxonis_train/utils/loaders/__init__.py
@@ -1,8 +1,4 @@
-from .base_loader import (
-    BaseLoaderTorch,
-    LuxonisLoaderTorchOutput,
-    collate_fn,
-)
+from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput, collate_fn
 from .luxonis_loader_torch import LuxonisLoaderTorch
 
 __all__ = [
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index 1c4bb8b5..15b61dd0 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -1,12 +1,7 @@
 from typing import Literal
 
 import numpy as np
-from luxonis_ml.data import (
-    BucketStorage,
-    BucketType,
-    LuxonisDataset,
-    LuxonisLoader,
-)
+from luxonis_ml.data import BucketStorage, BucketType, LuxonisDataset, LuxonisLoader
 from torch import Size, Tensor
 
 from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput
diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py
index 5bebc7e4..375ab565 100644
--- a/luxonis_train/utils/types.py
+++ b/luxonis_train/utils/types.py
@@ -37,7 +37,7 @@ def from_validation_error(cls, val_error: ValidationError, class_name: str):
     @classmethod
     def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str):
         return cls(
-            f"{class_name} requires {task} label, but it was not found in "
+            f"{class_name} requires '{task}' label, but it was not found in "
             f"the label dictionary. Available labels: {present_tasks}."
         )
 
@@ -56,17 +56,5 @@ def get_task(cls) -> str:
         )
 
 
-class SegmentationProtocol(BaseProtocol):
-    segmentation: Annotated[list[Tensor], Field(min_length=1)]
-
-
-class KeypointProtocol(BaseProtocol):
-    keypoints: Annotated[list[Tensor], Field(min_length=1)]
-
-
-class BBoxProtocol(BaseProtocol):
-    boundingbox: Annotated[list[Tensor], Field(min_length=1)]
-
-
 class FeaturesProtocol(BaseProtocol):
     features: Annotated[list[Tensor], Field(min_length=1)]
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 6c15cace..12876e69 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">75%</text>
-        <text x="80" y="14">75%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">78%</text>
+        <text x="80" y="14">78%</text>
     </g>
 </svg>
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 73909431..7995bfa7 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -4,7 +4,7 @@
 import gdown
 import pytest
 import torchvision
-from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.data import LabelType, LuxonisDataset
 from luxonis_ml.data.parsers import LuxonisParser
 from luxonis_ml.utils import environ
 
@@ -37,6 +37,34 @@ def create_coco_dataset():
     parser.parse(random_split=True)
 
 
+@pytest.fixture(scope="session", autouse=True)
+def create_coco_multitask_dataset():
+    url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
+    output_folder = "../data/"
+    output_zip = os.path.join(output_folder, "COCO_people_subset.zip")
+
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+
+    if not os.path.exists(output_zip) and not os.path.exists(
+        os.path.join(output_folder, "COCO_people_subset")
+    ):
+        gdown.download(url, output_zip, quiet=False)
+
+    parser = LuxonisParser(
+        output_zip,
+        dataset_name="coco_test_multitask",
+        delete_existing=True,
+        task_mapping={
+            LabelType.KEYPOINTS: "keypoints-task",
+            LabelType.SEGMENTATION: "segmentation-task",
+            LabelType.CLASSIFICATION: "classification-task",
+            LabelType.BOUNDINGBOX: "boundingbox-task",
+        },
+    )
+    parser.parse(random_split=True)
+
+
 def _create_cifar10(dataset_name: str, task_names: list[str]) -> None:
     dataset = create_dataset(dataset_name)
     output_folder = "../data/"
diff --git a/tests/integration/test_multi_input.py b/tests/integration/test_multi_input.py
index 8f1eef23..575f653d 100644
--- a/tests/integration/test_multi_input.py
+++ b/tests/integration/test_multi_input.py
@@ -4,13 +4,12 @@
 
 import pytest
 import torch
-from torch import Tensor
-from torch.nn.parameter import Parameter
+from torch import Tensor, nn
 
 from luxonis_train.core import Exporter, Inferer, Trainer
 from luxonis_train.nodes import BaseNode
 from luxonis_train.utils.loaders import BaseLoaderTorch
-from luxonis_train.utils.types import FeaturesProtocol, LabelType
+from luxonis_train.utils.types import FeaturesProtocol, LabelType, Packet
 
 
 class CustomMultiInputLoader(BaseLoaderTorch):
@@ -26,7 +25,7 @@ def input_shape(self):
             "pointcloud": torch.Size([1000, 3]),
         }
 
-    def __getitem__(self, idx):
+    def __getitem__(self, _):
         # Fake data
         left = torch.rand(3, 224, 224, dtype=torch.float32)
         right = torch.rand(3, 224, 224, dtype=torch.float32)
@@ -41,6 +40,7 @@ def __getitem__(self, idx):
 
         # Fake labels
         segmap = torch.zeros(1, 224, 224, dtype=torch.float32)
+        segmap[0, 100:150, 100:150] = 1
         labels = {
             "segmentation": (segmap, LabelType.SEGMENTATION),
         }
@@ -57,9 +57,9 @@ def get_classes(self) -> dict[LabelType, list[str]]:
 class MultiInputTestBaseNode(BaseNode):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
-        self.scalar = Parameter(torch.tensor(1.0), requires_grad=True)
+        self.scalar = nn.Parameter(torch.tensor(1.0), requires_grad=True)
 
-    def forward(self, inputs):
+    def forward(self, inputs: list[Tensor]):
         return [self.scalar * inp for inp in inputs]
 
     def unwrap(self, inputs: list[dict[str, list[Tensor]]]):
@@ -67,60 +67,56 @@ def unwrap(self, inputs: list[dict[str, list[Tensor]]]):
 
 
 class FullBackbone(MultiInputTestBaseNode):
-    def __init__(self, **kwargs):
-        in_protocols = [FeaturesProtocol] * 4
-        super().__init__(**kwargs)
-        self.in_protocols = in_protocols
+    input_protocols = [FeaturesProtocol] * 4
 
 
 class RGBDBackbone(MultiInputTestBaseNode):
-    def __init__(self, **kwargs):
-        in_protocols = [FeaturesProtocol] * 3
-        super().__init__(**kwargs)
-        self.in_protocols = in_protocols
+    input_protocols = [FeaturesProtocol] * 3
 
 
 class PointcloudBackbone(MultiInputTestBaseNode):
-    def __init__(self, **kwargs):
-        in_protocols = [FeaturesProtocol]
-        super().__init__(**kwargs)
-        self.in_protocols = in_protocols
+    input_protocols = [FeaturesProtocol]
 
 
 class FusionNeck(MultiInputTestBaseNode):
-    def __init__(self, **kwargs):
-        in_protocols = [
-            FeaturesProtocol,
-            FeaturesProtocol,
-            FeaturesProtocol,
-        ]
-        super().__init__(**kwargs)
-        self.in_protocols = in_protocols
+    input_protocols = [FeaturesProtocol] * 3
 
 
 class FusionNeck2(MultiInputTestBaseNode):
-    def __init__(self, **kwargs):
-        in_protocols = [FeaturesProtocol, FeaturesProtocol, FeaturesProtocol]
-        super().__init__(**kwargs)
-        self.in_protocols = in_protocols
+    input_protocols = [FeaturesProtocol] * 3
 
 
 class CustomSegHead1(MultiInputTestBaseNode):
+    tasks = {LabelType.SEGMENTATION: "segmentation"}
+    input_protocols = [FeaturesProtocol]
+
     def __init__(self, **kwargs):
-        in_protocols = [FeaturesProtocol]
-        super().__init__(**kwargs, _task_type=LabelType.SEGMENTATION)
-        self.in_protocols = in_protocols
+        super().__init__(**kwargs)
+        self.conv = nn.Conv2d(1, 1, 3, padding=1)
+
+    def unwrap(self, inputs: list[Packet[Tensor]]) -> Tensor:
+        assert len(inputs) == 1
+        return inputs[0]["features"][-1]
+
+    def forward(self, inputs: Tensor):
+        return [self.conv(inputs)]
 
 
 class CustomSegHead2(MultiInputTestBaseNode):
+    tasks = {LabelType.SEGMENTATION: "segmentation"}
+    input_protocols = [FeaturesProtocol] * 3
+
     def __init__(self, **kwargs):
-        in_protocols = [
-            FeaturesProtocol,
-            FeaturesProtocol,
-            FeaturesProtocol,
-        ]
-        super().__init__(**kwargs, _task_type=LabelType.SEGMENTATION)
-        self.in_protocols = in_protocols
+        super().__init__(**kwargs)
+        self.conv = nn.Conv2d(1, 1, 3, padding=1)
+
+    def unwrap(self, inputs: list[Packet[Tensor]]):
+        return [packet["features"][-1] for packet in inputs]
+
+    def forward(self, inputs: list[Tensor]):
+        fn1, _, disp = inputs
+        x = fn1 + disp
+        return [self.conv(x)]
 
 
 @pytest.fixture(scope="function", autouse=True)
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index efb3ded7..52ee2f0b 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -5,10 +5,12 @@
 
 import pytest
 
+TEST_OUTPUT = Path("tests/test-output")
+
 
 @pytest.fixture(scope="function", autouse=True)
 def clear_output():
-    shutil.rmtree("output", ignore_errors=True)
+    shutil.rmtree(TEST_OUTPUT, ignore_errors=True)
 
 
 @pytest.mark.parametrize(
@@ -24,13 +26,15 @@ def test_sanity(config_file):
         "[]",
         "trainer.batch_size",
         "1",
+        "tracker.save_directory",
+        str(TEST_OUTPUT),
     ]
     result = subprocess.run(
         ["luxonis_train", "train", "--config", f"configs/{config_file}", *opts],
     )
     assert result.returncode == 0
 
-    opts += ["model.weights", str(list(Path("output").rglob("*.ckpt"))[0])]
+    opts += ["model.weights", str(list(TEST_OUTPUT.rglob("*.ckpt"))[0])]
     opts += ["exporter.onnx.opset_version", "11"]
 
     result = subprocess.run(
@@ -84,6 +88,8 @@ def test_tuner():
             "4",
             "trainer.batch_size",
             "1",
+            "tracker.save_directory",
+            str(TEST_OUTPUT),
         ],
     )
     assert result.returncode == 0
diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py
index 112b321a..0209c192 100644
--- a/tests/unittests/test_utils/test_loaders/test_base_loader.py
+++ b/tests/unittests/test_utils/test_loaders/test_base_loader.py
@@ -1,9 +1,7 @@
 import pytest
 import torch
 
-from luxonis_train.utils.loaders import (
-    collate_fn,
-)
+from luxonis_train.utils.loaders import collate_fn
 from luxonis_train.utils.types import LabelType
 
 

From c116982f1c898ca5fb858664f83f5c8e8e32a650 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 13 Aug 2024 01:38:54 +0200
Subject: [PATCH 44/75] Miscellaneous Fixes and Features (#55)

---
 .gitignore                                    |   1 +
 configs/classification_model.yaml             |   0
 configs/coco_model.yaml                       |   0
 configs/coco_multitask_model.yaml             | 188 ---------------
 configs/detection_model.yaml                  |   0
 configs/example_export.yaml                   |   0
 configs/example_tuning.yaml                   |   0
 configs/keypoint_bbox_model.yaml              |   0
 configs/resnet_multitask_model.yaml           | 110 ---------
 configs/segmentation_model.yaml               |   0
 .../attached_modules/base_attached_module.py  |   4 +
 .../losses/implicit_keypoint_bbox_loss.py     |   2 +-
 .../losses/smooth_bce_with_logits.py          |   4 +-
 .../visualizers/segmentation_visualizer.py    |  21 +-
 luxonis_train/callbacks/test_on_train_end.py  |   1 +
 luxonis_train/core/archiver.py                |   1 +
 luxonis_train/core/core.py                    |   2 -
 luxonis_train/core/exporter.py                |   1 +
 luxonis_train/core/trainer.py                 |   2 +-
 luxonis_train/core/tuner.py                   |   4 +-
 luxonis_train/models/luxonis_model.py         |  25 +-
 luxonis_train/nodes/base_node.py              |  31 ++-
 .../nodes/efficient_keypoint_bbox_head.py     |  16 +-
 .../nodes/implicit_keypoint_bbox_head.py      |  12 -
 luxonis_train/utils/boxutils.py               |   2 +-
 luxonis_train/utils/config.py                 |   8 +-
 luxonis_train/utils/general.py                |  60 ++---
 luxonis_train/utils/loaders/base_loader.py    |   4 +-
 .../utils/loaders/luxonis_loader_torch.py     |   7 +-
 tests/configs/parking_lot_config.yaml         | 224 ++++++++++++++++++
 tests/integration/conftest.py                 | 217 ++++++++++++-----
 ..._multi_input.py => multi_input_modules.py} |  39 ---
 tests/integration/test_sanity.py              | 126 +++++-----
 33 files changed, 555 insertions(+), 557 deletions(-)
 mode change 100755 => 100644 configs/classification_model.yaml
 mode change 100755 => 100644 configs/coco_model.yaml
 delete mode 100755 configs/coco_multitask_model.yaml
 mode change 100755 => 100644 configs/detection_model.yaml
 mode change 100755 => 100644 configs/example_export.yaml
 mode change 100755 => 100644 configs/example_tuning.yaml
 mode change 100755 => 100644 configs/keypoint_bbox_model.yaml
 delete mode 100644 configs/resnet_multitask_model.yaml
 mode change 100755 => 100644 configs/segmentation_model.yaml
 create mode 100644 tests/configs/parking_lot_config.yaml
 rename tests/integration/{test_multi_input.py => multi_input_modules.py} (76%)

diff --git a/.gitignore b/.gitignore
index 1204d2e2..53c9f325 100644
--- a/.gitignore
+++ b/.gitignore
@@ -147,3 +147,4 @@ models_venv/*
 
 # vscode settings
 .vscode
+tests/data
diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml
old mode 100755
new mode 100644
diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
old mode 100755
new mode 100644
diff --git a/configs/coco_multitask_model.yaml b/configs/coco_multitask_model.yaml
deleted file mode 100755
index 7cf8541a..00000000
--- a/configs/coco_multitask_model.yaml
+++ /dev/null
@@ -1,188 +0,0 @@
-# An example configuration for a more complex network.
-
-
-model:
-  name: coco_test
-  nodes:
-    - name: EfficientRep
-      params:
-        channels_list: [64, 128, 256, 512, 1024]
-        num_repeats: [1, 6, 12, 18, 6]
-        depth_mul: 0.33
-        width_mul: 0.33
-
-    - name: RepPANNeck
-      inputs:
-        - EfficientRep
-      params:
-        channels_list: [256, 128, 128, 256, 256, 512]
-        num_repeats: [12, 12, 12, 12]
-        depth_mul: 0.33
-        width_mul: 0.33
-
-    - name: ImplicitKeypointBBoxHead
-      task:
-        keypoints: keypoints-task
-        boundingbox: boundingbox-task
-      inputs:
-        - RepPANNeck
-      params:
-        conf_thres: 0.25
-        iou_thres: 0.45
-
-    - name: SegmentationHead
-      task: segmentation-task
-      inputs:
-        - RepPANNeck
-
-    - name: EfficientBBoxHead
-      task: boundingbox-task
-      inputs:
-        - RepPANNeck
-      params:
-        conf_thres: 0.75
-        iou_thres: 0.45
-
-  losses:
-    - name: AdaptiveDetectionLoss
-      attached_to: EfficientBBoxHead
-    - name: BCEWithLogitsLoss
-      attached_to: SegmentationHead
-    - name: ImplicitKeypointBBoxLoss
-      attached_to: ImplicitKeypointBBoxHead
-      params:
-        keypoint_regression_loss_weight: 0.5
-        keypoint_visibility_loss_weight: 0.7
-        bbox_loss_weight: 0.05
-        objectness_loss_weight: 0.2
-
-  metrics:
-    - name: ObjectKeypointSimilarity
-      is_main_metric: true
-      attached_to: ImplicitKeypointBBoxHead
-    - name: MeanAveragePrecisionKeypoints
-      attached_to: ImplicitKeypointBBoxHead
-    - name: MeanAveragePrecision
-      attached_to: EfficientBBoxHead
-    - name: F1Score
-      attached_to: SegmentationHead
-      params:
-        task: binary
-    - name: JaccardIndex
-      attached_to: SegmentationHead
-      params:
-        task: binary
-
-  visualizers:
-    - name: MultiVisualizer
-      attached_to: ImplicitKeypointBBoxHead
-      params:
-        visualizers:
-          - name: KeypointVisualizer
-            params:
-              nonvisible_color: blue
-          - name: BBoxVisualizer
-            params:
-              colors:
-                person: "#FF5055"
-    - name: SegmentationVisualizer
-      attached_to: SegmentationHead
-      params:
-        colors: "#FF5055"
-    - name: BBoxVisualizer
-      attached_to: EfficientBBoxHead
-
-tracker:
-  project_name: coco_test_multitask
-  save_directory: output
-  is_tensorboard: True
-  is_wandb: False
-  wandb_entity: luxonis
-  is_mlflow: False
-
-loader:
-  train_view: train
-  val_view: val
-  test_view: test
-
-  params:
-    dataset_name: coco_test_multitask
-
-trainer:
-  accelerator: auto
-  devices: auto
-  strategy: auto
-
-  num_sanity_val_steps: 1
-  profiler: null
-  verbose: True
-  batch_size: 4
-  accumulate_grad_batches: 1
-  epochs: &epochs 200
-  num_workers: 0
-  train_metrics_interval: -1
-  validation_interval: 10
-  num_log_images: 8
-  skip_last_batch: True
-  log_sub_losses: True
-  save_top_k: 3
-
-  preprocessing:
-    train_image_size: [&height 256, &width 320]
-    keep_aspect_ratio: False
-    train_rgb: True
-    normalize:
-      active: True
-    augmentations:
-      - name: Defocus
-        params:
-          p: 0.1
-      - name: Sharpen
-        params:
-          p: 0.1
-      - name: Flip
-      - name: RandomRotate90
-      - name: Mosaic4
-        params:
-          out_width: *width
-          out_height: *height
-
-  callbacks:
-    - name: LearningRateMonitor
-      params:
-        logging_interval: step
-    - name: MetadataLogger
-      params:
-        hyperparams: ["trainer.epochs", trainer.batch_size]
-    - name: EarlyStopping
-      params:
-        patience: 3
-        monitor: val/loss
-        mode: min
-        verbose: true
-    - name: ExportOnTrainEnd
-    - name: TestOnTrainEnd
-
-  optimizer:
-    name: SGD
-    params:
-      lr: 0.02
-      momentum: 0.937
-      nesterov: True
-      weight_decay: 0.0005
-
-  scheduler:
-    name: CosineAnnealingLR
-    params:
-      T_max: *epochs
-      eta_min: 0
-
-exporter:
-  onnx:
-    opset_version: 11
-
-tuner:
-  params:
-    trainer.optimizer.name_categorical: ["Adam", "SGD"]
-    trainer.optimizer.params.lr_float: [0.0001, 0.001]
-    trainer.batch_size_int: [4, 16, 4]
diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml
old mode 100755
new mode 100644
diff --git a/configs/example_export.yaml b/configs/example_export.yaml
old mode 100755
new mode 100644
diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
old mode 100755
new mode 100644
diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml
old mode 100755
new mode 100644
diff --git a/configs/resnet_multitask_model.yaml b/configs/resnet_multitask_model.yaml
deleted file mode 100644
index 844c83d4..00000000
--- a/configs/resnet_multitask_model.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-
-model:
-  name: resnet50_classification
-  nodes:
-    - name: ResNet
-      params:
-        variant: "50"
-        download_weights: True
-
-    - name: ClassificationHead
-      alias: ClassificationHead_1
-      task: classification_1
-      inputs:
-        - ResNet
-
-    - name: ClassificationHead
-      alias: ClassificationHead_2
-      task: classification_2
-      inputs:
-        - ResNet
-
-    - name: ClassificationHead
-      alias: ClassificationHead_3
-      task: classification_3
-      inputs:
-        - ResNet
-
-  losses:
-    - name: CrossEntropyLoss
-      alias: CrossEntropyLoss_1
-      attached_to: ClassificationHead_1
-
-    - name: CrossEntropyLoss
-      alias: CrossEntropyLoss_2
-      attached_to: ClassificationHead_2
-
-    - name: CrossEntropyLoss
-      alias: CrossEntropyLoss_3
-      attached_to: ClassificationHead_3
-
-  metrics:
-    - name: Accuracy
-      is_main_metric: true
-      alias: Accuracy_1
-      attached_to: ClassificationHead_1
-
-    - name: Accuracy
-      alias: Accuracy_2
-      attached_to: ClassificationHead_2
-
-    - name: Accuracy
-      alias: Accuracy_3
-      attached_to: ClassificationHead_3
-
-  visualizers:
-    - name: ClassificationVisualizer
-      alias: ClassificationVisualizer_1
-      attached_to: ClassificationHead_1
-      params:
-        font_scale: 0.5
-        color: [255, 0, 0]
-        thickness: 2
-        include_plot: True
-
-    - name: ClassificationVisualizer
-      alias: ClassificationVisualizer_2
-      attached_to: ClassificationHead_2
-      params:
-        font_scale: 0.5
-        color: [255, 0, 0]
-        thickness: 2
-        include_plot: True
-
-    - name: ClassificationVisualizer
-      alias: ClassificationVisualizer_3
-      attached_to: ClassificationHead_3
-      params:
-        font_scale: 0.5
-        color: [255, 0, 0]
-        thickness: 2
-        include_plot: True
-
-loader:
-  params:
-    dataset_name: cifar10_task_test
-
-trainer:
-  batch_size: 4
-  epochs: &epochs 200
-  num_workers: 4
-  validation_interval: 10
-  num_log_images: 8
-
-  preprocessing:
-    train_image_size: [&height 224, &width 224]
-    keep_aspect_ratio: False
-    normalize:
-      active: True
-
-  callbacks:
-    - name: ExportOnTrainEnd
-    - name: TestOnTrainEnd
-
-  optimizer:
-    name: SGD
-    params:
-      lr: 0.02
-
-  scheduler:
-    name: ConstantLR
diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml
old mode 100755
new mode 100644
diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py
index e86cf24f..17a4c277 100644
--- a/luxonis_train/attached_modules/base_attached_module.py
+++ b/luxonis_train/attached_modules/base_attached_module.py
@@ -133,6 +133,10 @@ def get_label(
         @rtype: tuple[Tensor, LabelType]
         @return: Extracted label and its type.
         """
+        if label_type is None:
+            if len(self.required_labels) == 1:
+                label_type = self.required_labels[0]
+
         if label_type is not None:
             task_name = self.node.get_task_name(label_type)
             if task_name not in labels:
diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
index a10d1f31..d174c555 100644
--- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
@@ -167,7 +167,7 @@ def prepare(
         targets[:, self.box_offset + 2 :: 3] = kpts[:, 3::3]  # insert kp y coordinates
         targets[:, self.box_offset + 3 :: 3] = kpts[:, 4::3]  # insert kp visibility
 
-        n_targets = len(targets)
+        n_targets = targets.shape[0]
 
         class_targets: list[Tensor] = []
         box_targets: list[Tensor] = []
diff --git a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
index 2f92b230..ac976428 100644
--- a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
+++ b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
@@ -67,5 +67,7 @@ def forward(self, predictions: list[Tensor], target: Tensor) -> Tensor:
             self.negative_smooth_const,
             device=prediction.device,
         )
-        smoothed_target[torch.arange(len(target)), target] = self.positive_smooth_const
+        smoothed_target[
+            torch.arange(target.shape[0]), target
+        ] = self.positive_smooth_const
         return self.criterion.forward(prediction, smoothed_target)
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index b65fd43a..85b93ce1 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -18,7 +18,8 @@ class SegmentationVisualizer(BaseVisualizer[Tensor, Tensor]):
     def __init__(
         self,
         colors: Color | list[Color] = "#5050FF",
-        background_class: int | None = None,
+        background_class: int | None = 0,
+        background_color: Color = "#000000",
         alpha: float = 0.6,
         **kwargs,
     ):
@@ -26,6 +27,12 @@ def __init__(
 
         @type colors: L{Color} | list[L{Color}]
         @param colors: Color of the segmentation masks. Defaults to C{"#5050FF"}.
+        @type background_class: int | None
+        @param background_class: Index of the background class. Defaults to C{0}.
+          If set, the background class will be drawn with the `background_color`.
+        @type background_color: L{Color} | None
+        @param background_color: Color of the background class.
+            Defaults to C{"#000000"}.
         @type alpha: float
         @param alpha: Alpha value of the segmentation masks. Defaults to C{0.6}.
         """
@@ -35,6 +42,7 @@ def __init__(
 
         self.colors = colors
         self.background_class = background_class
+        self.background_color = background_color
         self.alpha = alpha
 
     @staticmethod
@@ -43,10 +51,11 @@ def draw_predictions(
         predictions: Tensor,
         colors: list[Color] | None = None,
         background_class: int | None = None,
+        background_color: Color = "#000000",
         **kwargs,
     ) -> Tensor:
         colors = SegmentationVisualizer._adjust_colors(
-            predictions, colors, background_class
+            predictions, colors, background_class, background_color
         )
         viz = torch.zeros_like(canvas)
         for i in range(len(canvas)):
@@ -63,10 +72,11 @@ def draw_targets(
         targets: Tensor,
         colors: list[Color] | None = None,
         background_class: int | None = None,
+        background_color: Color = "#000000",
         **kwargs,
     ) -> Tensor:
         colors = SegmentationVisualizer._adjust_colors(
-            targets, colors, background_class
+            targets, colors, background_class, background_color
         )
         viz = torch.zeros_like(canvas)
         for i in range(len(viz)):
@@ -108,6 +118,7 @@ def forward(
             colors=self.colors,
             alpha=self.alpha,
             background_class=self.background_class,
+            background_color=self.background_color,
             **kwargs,
         )
         predictions_vis = self.draw_predictions(
@@ -116,6 +127,7 @@ def forward(
             colors=self.colors,
             alpha=self.alpha,
             background_class=self.background_class,
+            background_color=self.background_color,
             **kwargs,
         )
         return targets_vis, predictions_vis
@@ -125,6 +137,7 @@ def _adjust_colors(
         data: Tensor,
         colors: list[Color] | None = None,
         background_class: int | None = None,
+        background_color: Color = "#000000",
     ) -> list[Color]:
         global log_disable
         n_classes = data.size(1)
@@ -142,5 +155,5 @@ def _adjust_colors(
         log_disable = True
         colors = [get_color(i) for i in range(data.size(1))]
         if background_class is not None:
-            colors[background_class] = "#000000"
+            colors[background_class] = background_color
         return colors
diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py
index bf7db341..3c799ec9 100644
--- a/luxonis_train/callbacks/test_on_train_end.py
+++ b/luxonis_train/callbacks/test_on_train_end.py
@@ -19,6 +19,7 @@ def on_train_end(
             if isinstance(callback, ModelCheckpoint)
         }
 
+        assert pl_module._core is not None
         trainer.test(pl_module, pl_module._core.pytorch_loaders["test"])
 
         # Restore the paths
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index 9e2b7c5a..a376b9a7 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -46,6 +46,7 @@ def __init__(
             dataset_metadata=self.dataset_metadata,
             save_dir=self.run_save_dir,
             input_shape=self.loaders["train"].input_shape,
+            _core=self,
         )
 
         self.model_name = self.cfg.model.name
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 3c1acf7f..b3c57935 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -52,8 +52,6 @@ def __init__(
         else:
             self.cfg = Config.get_config(cfg, opts)
 
-        opts = opts or []
-
         if self.cfg.use_rich_text:
             rich.traceback.install(suppress=[pl, torch], show_locals=False)
 
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index f73e7ec8..7d941cb3 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -84,6 +84,7 @@ def __init__(
             save_dir=self.run_save_dir,
             input_shape=self.input_shape,
             dataset_metadata=self.dataset_metadata,
+            _core=self,
         )
 
     def _get_modelconverter_config(self, onnx_path: str) -> dict[str, Any]:
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index 90c15059..9cdd9dfe 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -54,8 +54,8 @@ def __init__(
             dataset_metadata=self.dataset_metadata,
             save_dir=self.run_save_dir,
             input_shape=self.loaders["train"].input_shape,
+            _core=self,
         )
-        self.lightning_module._core = self
 
         def graceful_exit(signum: int, _):
             logger.info(f"{signal.Signals(signum).name} received, stopping training...")
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
index 13d56ca4..67ec953b 100644
--- a/luxonis_train/core/tuner.py
+++ b/luxonis_train/core/tuner.py
@@ -22,7 +22,7 @@ class Tuner(Core):
     def __init__(
         self,
         cfg: str | dict[str, Any] | Config | None = None,
-        opts: list[str] | tuple[str, ...] | None = None,
+        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
     ):
         """Main API which is used to perform hyperparameter tunning.
 
@@ -145,8 +145,8 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             dataset_metadata=self.dataset_metadata,
             save_dir=run_save_dir,
             input_shape=self.loaders["train"].input_shape,
+            _core=self,
         )
-        lightning_module._core = self
         callbacks: list[pl.Callback] = (
             [LuxonisProgressBar()] if self.cfg.use_rich_text else []
         )
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index f541b2da..296c85ee 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -7,6 +7,7 @@
 import torch
 from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
+from luxonis_ml.data import LuxonisDataset
 from torch import Size, Tensor, nn
 
 import luxonis_train
@@ -16,6 +17,7 @@
     BaseMetric,
     BaseVisualizer,
 )
+from luxonis_train.attached_modules.metrics.common import TorchMetricWrapper
 from luxonis_train.attached_modules.visualizers import (
     combine_visualizations,
     get_unnormalized_images,
@@ -86,7 +88,6 @@ class LuxonisModel(pl.LightningModule):
     """
 
     _trainer: pl.Trainer
-    _core: "luxonis_train.core.Core"
     logger: LuxonisTrackerPL
 
     def __init__(
@@ -95,6 +96,8 @@ def __init__(
         save_dir: str,
         input_shape: dict[str, Size],
         dataset_metadata: DatasetMetadata | None = None,
+        *,
+        _core: "luxonis_train.core.Core | None" = None,
         **kwargs,
     ):
         """Constructs an instance of `LuxonisModel` from `Config`.
@@ -115,6 +118,7 @@ def __init__(
         super().__init__(**kwargs)
 
         self._export: bool = False
+        self._core = _core
 
         self.cfg = cfg
         self.original_in_shape = input_shape
@@ -793,7 +797,24 @@ def _init_attached_module(
         Module = registry.get(cfg.name)
         module_name = cfg.alias or cfg.name
         node_name = cfg.attached_to
-        module = Module(**cfg.params, node=self.nodes[node_name])
+        node: BaseNode = self.nodes[node_name]  # type: ignore
+        if issubclass(Module, TorchMetricWrapper):
+            if "task" not in cfg.params and self._core is not None:
+                loader = self._core.loaders["train"]
+                dataset = getattr(loader, "dataset", None)
+                if isinstance(dataset, LuxonisDataset):
+                    n_classes = len(dataset.get_classes()[1][node.task])
+                    if n_classes == 1:
+                        cfg.params["task"] = "binary"
+                    else:
+                        cfg.params["task"] = "multiclass"
+                    logger.warning(
+                        f"Parameter 'task' not specified for `TorchMetric` based '{module_name}' metric. "
+                        f"Assuming task type based on the number of classes: {cfg.params['task']}. "
+                        "If this is incorrect, please specify the 'task' parameter in the config."
+                    )
+
+        module = Module(**cfg.params, node=node)
         storage[node_name][module_name] = module  # type: ignore
         return module_name, node_name
 
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 34322be5..3b549e0a 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -144,6 +144,7 @@ def __init__(
         original_in_shape: Size | None = None,
         dataset_metadata: DatasetMetadata | None = None,
         n_classes: int | None = None,
+        n_keypoints: int | None = None,
         in_sizes: Size | list[Size] | None = None,
         _tasks: dict[LabelType, str] | None = None,
     ):
@@ -170,11 +171,9 @@ def __init__(
 
         self._input_shapes = input_shapes
         self._original_in_shape = original_in_shape
-        if n_classes is not None:
-            if dataset_metadata is not None:
-                raise ValueError("Cannot set both `dataset_metadata` and `n_classes`.")
-            dataset_metadata = DatasetMetadata(n_classes=n_classes)
         self._dataset_metadata = dataset_metadata
+        self._n_classes = n_classes
+        self._n_keypoints = n_keypoints
         self._export = False
         self._epoch = 0
         self._in_sizes = in_sizes
@@ -243,9 +242,33 @@ def get_class_names(self, task: LabelType) -> list[str]:
         """
         return self.dataset_metadata.class_names(self.get_task_name(task))
 
+    @property
+    def n_keypoints(self) -> int:
+        """Getter for the number of keypoints."""
+        if self._n_keypoints is not None:
+            return self._n_keypoints
+
+        if self._tasks:
+            if LabelType.KEYPOINTS not in self._tasks:
+                raise (ValueError(f"{self.name} does not support keypoints."))
+            return self.dataset_metadata.n_keypoints(
+                self.get_task_name(LabelType.KEYPOINTS)
+            )
+
+        raise ValueError(
+            f"{self.name} does not have any tasks defined, "
+            "`BaseNode.n_keypoints` property cannot be used. "
+            "Either override the `tasks` class attribute, "
+            "pass the `n_keypoints` attribute to the constructor or call "
+            "the `BaseNode.dataset_metadata.get_n_keypoints` method manually."
+        )
+
     @property
     def n_classes(self) -> int:
         """Getter for the number of classes."""
+        if self._n_classes is not None:
+            return self._n_classes
+
         if not self._tasks:
             raise ValueError(
                 f"{self.name} does not have any tasks defined, "
diff --git a/luxonis_train/nodes/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/efficient_keypoint_bbox_head.py
index ae527f8f..03d29296 100644
--- a/luxonis_train/nodes/efficient_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/efficient_keypoint_bbox_head.py
@@ -19,7 +19,6 @@ class EfficientKeypointBBoxHead(EfficientBBoxHead):
 
     def __init__(
         self,
-        n_keypoints: int | None = None,
         n_heads: Literal[2, 3, 4] = 3,
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
@@ -31,10 +30,6 @@ def __init__(
         Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
         Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
 
-        @param n_keypoints: Number of keypoints. If not defined, inferred
-            from the dataset metadata (if provided). Defaults to C{None}.
-        @type n_keypoints: int | None
-
         @param n_heads: Number of output heads. Defaults to C{3}.
             B{Note:} Should be same also on neck in most cases.
         @type n_heads: int
@@ -56,16 +51,7 @@ def __init__(
             **kwargs,
         )
 
-        n_keypoints = n_keypoints or self.dataset_metadata._n_keypoints
-
-        if n_keypoints is None:
-            raise ValueError(
-                "Number of keypoints must be specified either in the constructor or "
-                "in the dataset metadata."
-            )
-
-        self.n_keypoints = n_keypoints
-        self.nk = n_keypoints * 3
+        self.nk = self.n_keypoints * 3
 
         mid_ch = max(self.in_channels[0] // 4, self.nk)
         self.kpt_layers = nn.ModuleList(
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index 5b18bf37..79e3fb79 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -23,7 +23,6 @@ class ImplicitKeypointBBoxHead(BaseNode):
 
     def __init__(
         self,
-        n_keypoints: int | None = None,
         num_heads: int = 3,
         anchors: list[list[float]] | None = None,
         init_coco_biases: bool = True,
@@ -39,9 +38,6 @@ def __init__(
 
         TODO: more technical documentation
 
-        @type n_keypoints: int | None
-        @param n_keypoints: Number of keypoints. If not defined, inferred
-            from the dataset metadata (if provided). Defaults to C{None}.
         @type num_heads: int
         @param num_heads: Number of output heads. Defaults to C{3}.
             B{Note:} Should be same also on neck in most cases.
@@ -67,14 +63,6 @@ def __init__(
         self.iou_thres = iou_thres
         self.max_det = max_det
 
-        n_keypoints = n_keypoints or self.dataset_metadata._n_keypoints
-
-        if n_keypoints is None:
-            raise ValueError(
-                "Number of keypoints must be specified either in the constructor or "
-                "in the dataset metadata."
-            )
-        self.n_keypoints = n_keypoints
         self.num_heads = num_heads
 
         self.box_offset = 5
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 64a8b8dd..3a206c75 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -710,6 +710,6 @@ def compute_iou_loss(
             raise ValueError(f"Unknown reduction type `{reduction}`")
     else:
         loss_iou = torch.tensor(0.0).to(pred_bboxes.device)
-        iou = torch.zeros([len(target_bboxes)]).to(pred_bboxes.device)
+        iou = torch.zeros([target_bboxes.shape[0]]).to(pred_bboxes.device)
 
     return loss_iou, iou.detach().clamp(0)
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 7ce08cf5..13149d1b 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -114,9 +114,15 @@ def check_unique_names(self) -> Self:
         ]:
             names = set()
             for obj in objects:
+                obj: AttachedModuleConfig
                 name = obj.alias or obj.name
                 if name in names:
-                    raise ValueError(f"Duplicate name `{name}` in `{section}` section.")
+                    if obj.alias is None:
+                        obj.alias = f"{name}_{obj.attached_to}"
+                    if obj.alias in names:
+                        raise ValueError(
+                            f"Duplicate name `{name}` in `{section}` section."
+                        )
                 names.add(name)
         return self
 
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index 099beb66..96e6b766 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -9,7 +9,7 @@
 
 from luxonis_train.utils.boxutils import anchors_from_dataset
 from luxonis_train.utils.loaders import BaseLoaderTorch
-from luxonis_train.utils.types import LabelType, Packet
+from luxonis_train.utils.types import Packet
 
 
 # TODO: could be moved to luxonis-ml?
@@ -20,11 +20,8 @@ class DatasetMetadata:
     def __init__(
         self,
         *,
-        classes: dict[LabelType, list[str]] | None = None,
-        n_classes: int | None = None,
-        n_keypoints: int | None = None,
-        keypoint_names: list[str] | None = None,
-        connectivity: list[tuple[int, int]] | None = None,
+        classes: dict[str, list[str]] | None = None,
+        n_keypoints: dict[str, int] | None = None,
         loader: DataLoader | None = None,
     ):
         """An object containing metadata about the dataset. Used to infer the number of
@@ -45,21 +42,12 @@ def __init__(
         @type loader: DataLoader | None
         @param loader: Dataset loader.
         """
-        if classes is None and n_classes is not None:
-            classes = {
-                LabelType(lbl): [str(i) for i in range(n_classes)]
-                for lbl in LabelType.__members__
-            }
-        self._classes = classes
-        self._keypoint_names = keypoint_names
-        self._connectivity = connectivity
-        self._n_keypoints = n_keypoints
-        if self._n_keypoints is None and self._keypoint_names is not None:
-            self._n_keypoints = len(self._keypoint_names)
+        self._classes = classes or {}
+        self._n_keypoints = n_keypoints or {}
         self._loader = loader
 
     @property
-    def classes(self) -> dict[LabelType, list[str]]:
+    def classes(self) -> dict[str, list[str]]:
         """Dictionary mapping label types to lists of class names.
 
         @type: dict[LabelType, list[str]]
@@ -96,6 +84,18 @@ def n_classes(self, task: str | None) -> int:
                 )
         return n_classes
 
+    def n_keypoints(self, task: str | None) -> int:
+        if task is not None:
+            if task not in self._n_keypoints:
+                raise ValueError(f"Task '{task}' is not present in the dataset.")
+            return self._n_keypoints[task]
+        if len(self._n_keypoints) > 1:
+            raise ValueError(
+                "The dataset specifies multiple keypoint tasks, "
+                "please specify the 'task' argument to get the number of keypoints."
+            )
+        return next(iter(self._n_keypoints.values()))
+
     def class_names(self, task: str | None) -> list[str]:
         """Gets the class names for the specified task.
 
@@ -160,29 +160,9 @@ def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
         @return: Instance of L{DatasetMetadata} created from the provided dataset.
         """
         classes = loader.get_classes()
-        skeletons = loader.get_skeletons()
-
-        keypoint_names = None
-        connectivity = None
-
-        if skeletons is not None:
-            if len(skeletons) == 1:
-                task_name = next(iter(skeletons))
-                class_name = next(iter(skeletons[task_name]))
-                keypoint_names = skeletons[task_name][class_name]["labels"]
-                connectivity = skeletons[task_name][class_name]["edges"]
-
-            elif len(skeletons) > 1:
-                raise NotImplementedError(
-                    "The dataset defines multiclass keypoint detection. "
-                    "This is not yet supported."
-                )
+        n_keypoints = loader.get_n_keypoints()
 
-        return cls(
-            classes=classes,
-            keypoint_names=keypoint_names,
-            connectivity=connectivity,
-        )
+        return cls(classes=classes, n_keypoints=n_keypoints)
 
 
 def make_divisible(x: int | float, divisor: int) -> int:
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index c4f22428..5f3bba59 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -97,7 +97,7 @@ def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
         ...
 
     @abstractmethod
-    def get_classes(self) -> dict[LabelType, list[str]]:
+    def get_classes(self) -> dict[str, list[str]]:
         """Gets classes according to computer vision task.
 
         @rtype: dict[LabelType, list[str]]
@@ -105,7 +105,7 @@ def get_classes(self) -> dict[LabelType, list[str]]:
         """
         pass
 
-    def get_skeletons(self) -> dict[str, dict] | None:
+    def get_n_keypoints(self) -> dict[str, int] | None:
         """Returns the dictionary defining the semantic skeleton for each class using
         keypoints.
 
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index 15b61dd0..8545dad2 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -10,7 +10,7 @@
 class LuxonisLoaderTorch(BaseLoaderTorch):
     def __init__(
         self,
-        dataset_name: str | None = None,
+        dataset_name: str,
         team_id: str | None = None,
         bucket_type: Literal["internal", "external"] = "internal",
         bucket_storage: Literal["local", "s3", "gcs", "azure"] = "local",
@@ -54,5 +54,6 @@ def get_classes(self) -> dict[str, list[str]]:
         _, classes = self.dataset.get_classes()
         return {task: classes[task] for task in classes}
 
-    def get_skeletons(self) -> dict[str, dict] | None:
-        return self.dataset.get_skeletons()
+    def get_n_keypoints(self) -> dict[str, int]:
+        skeletons = self.dataset.get_skeletons()
+        return {task: len(skeletons[task][0]) for task in skeletons}
diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml
new file mode 100644
index 00000000..53760045
--- /dev/null
+++ b/tests/configs/parking_lot_config.yaml
@@ -0,0 +1,224 @@
+
+model:
+  name: parking_lot_model
+  nodes:
+
+    - name: ReXNetV1_lite
+      alias: rexnet-detection-backbone
+
+    - name: EfficientRep
+      alias: efficient-detection-backbone
+      params:
+        channels_list: [64, 128, 256, 512, 1024]
+        num_repeats: [1, 6, 12, 18, 6]
+        depth_mul: 0.33
+        width_mul: 0.33
+
+    - name: RepPANNeck
+      alias: efficient-detection-neck
+      inputs:
+        - efficient-detection-backbone
+      params:
+        channels_list: [256, 128, 128, 256, 256, 512]
+        num_repeats: [12, 12, 12, 12]
+        depth_mul: 0.33
+        width_mul: 0.33
+
+    - name: MicroNet
+      alias: color-segmentation-backbone
+
+    - name: MobileOne
+      alias: brand-segmentation-backbone
+
+    - name: MobileNetV2
+      alias: vehicle-type-segmentation-backbone
+
+    - name: ContextSpatial
+      alias: context-brand-segmentation-backbone
+
+    - name: EfficientBBoxHead
+      alias: bbox-head
+      inputs:
+        - efficient-detection-neck
+
+    - name: ImplicitKeypointBBoxHead
+      alias: car-detection-head
+      inputs:
+        - rexnet-detection-backbone
+      task:
+        keypoints: car-keypoints
+        boundingbox: car-boundingbox
+      params:
+        conf_thres: 0.25
+        iou_thres: 0.45
+
+    - name: EfficientKeypointBBoxHead
+      alias: motorbike-detection-head
+      task:
+        keypoints: motorbike-keypoints
+        boundingbox: motorbike-boundingbox
+      inputs:
+        - efficient-detection-neck
+      params:
+        conf_thres: 0.25
+        iou_thres: 0.45
+
+    - name: BiSeNetHead
+      alias: context-brand-segmentation-head
+      task: brand_segmentation
+      inputs:
+        - context-brand-segmentation-backbone
+
+    - name: SegmentationHead
+      alias: color-segmentation-head
+      task: color_segmentation
+      inputs:
+        - color-segmentation-backbone
+
+    - name: SegmentationHead
+      alias: any-vehicle-segmentation-head
+      task: vehicle_segmentation
+      inputs:
+        - vehicle-type-segmentation-backbone
+
+    - name: BiSeNetHead
+      alias: brand-segmentation-head
+      task: brand_segmentation
+      inputs:
+        - brand-segmentation-backbone
+
+    - name: BiSeNetHead
+      alias: vehicle-type-segmentation-head
+      task: vehicle_type_segmentation
+      inputs:
+        - vehicle-type-segmentation-backbone
+
+  losses:
+    - name: AdaptiveDetectionLoss
+      attached_to: bbox-head
+    - name: BCEWithLogitsLoss
+      attached_to: any-vehicle-segmentation-head
+    - name: CrossEntropyLoss
+      attached_to: vehicle-type-segmentation-head
+    - name: CrossEntropyLoss
+      attached_to: context-brand-segmentation-head
+    - name: CrossEntropyLoss
+      attached_to: color-segmentation-head
+    - name: SoftmaxFocalLoss
+      attached_to: brand-segmentation-head
+    - name: ImplicitKeypointBBoxLoss
+      attached_to: car-detection-head
+    - name: EfficientKeypointBBoxLoss
+      attached_to: motorbike-detection-head
+
+  metrics:
+    - name: ObjectKeypointSimilarity
+      attached_to: car-detection-head
+    - name: MeanAveragePrecisionKeypoints
+      attached_to: motorbike-detection-head
+    - name: MeanAveragePrecision
+      attached_to: bbox-head
+      is_main_metric: true
+    - name: F1Score
+      attached_to: any-vehicle-segmentation-head
+    - name: JaccardIndex
+      attached_to: color-segmentation-head
+    - name: Accuracy
+      attached_to: vehicle-type-segmentation-head
+    - name: Precision
+      attached_to: brand-segmentation-head
+    - name: Recall
+      attached_to: context-brand-segmentation-head
+
+  visualizers:
+    - name: MultiVisualizer
+      alias: multi-visualizer-car
+      attached_to: car-detection-head
+      params:
+        visualizers:
+          - name: KeypointVisualizer
+            params:
+              nonvisible_color: blue
+          - name: BBoxVisualizer
+
+    - name: MultiVisualizer
+      alias: multi-visualizer-motorbike
+      attached_to: motorbike-detection-head
+      params:
+        visualizers:
+          - name: KeypointVisualizer
+            params:
+              nonvisible_color: blue
+          - name: BBoxVisualizer
+
+    - name: SegmentationVisualizer
+      alias: color-segmentation-visualizer
+      attached_to: color-segmentation-head
+    - name: SegmentationVisualizer
+      alias: vehicle-type-segmentation-visualizer
+      attached_to: vehicle-type-segmentation-head
+    - name: SegmentationVisualizer
+      alias: vehicle-segmentation-visualizer
+      attached_to: any-vehicle-segmentation-head
+    - name: SegmentationVisualizer
+      alias: context-brand-segmentation-visualizer
+      attached_to: context-brand-segmentation-head
+    - name: SegmentationVisualizer
+      alias: brand-segmentation-visualizer
+      attached_to: brand-segmentation-head
+    - name: BBoxVisualizer
+      alias: bbox-visualizer
+      attached_to: bbox-head
+
+tracker:
+  project_name: Parking_Lot
+  is_tensorboard: True
+
+loader:
+  train_view: val
+  params:
+    dataset_name: D1ParkingLot
+
+trainer:
+  accelerator: auto
+  devices: auto
+  strategy: auto
+
+  num_sanity_val_steps: 1
+  profiler: null
+  verbose: True
+  batch_size: 2
+  accumulate_grad_batches: 1
+  epochs: 200
+  num_workers: 8
+  train_metrics_interval: -1
+  validation_interval: 10
+  num_log_images: 8
+  skip_last_batch: True
+  log_sub_losses: True
+  save_top_k: 3
+
+  preprocessing:
+    train_image_size: [256, 320]
+    keep_aspect_ratio: False
+    train_rgb: True
+    normalize:
+      active: True
+    augmentations:
+      - name: Defocus
+        params:
+          p: 0.1
+      - name: Sharpen
+        params:
+          p: 0.1
+
+  callbacks:
+    - name: LearningRateMonitor
+      params:
+        logging_interval: step
+    - name: MetadataLogger
+      params:
+        hyperparams: ["trainer.epochs", trainer.batch_size]
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 7995bfa7..0de426a7 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,21 +1,161 @@
+import json
 import os
+from collections import defaultdict
 from pathlib import Path
 
+import cv2
 import gdown
 import pytest
 import torchvision
-from luxonis_ml.data import LabelType, LuxonisDataset
+from luxonis_ml.data import LuxonisDataset
 from luxonis_ml.data.parsers import LuxonisParser
-from luxonis_ml.utils import environ
-
-Path(environ.LUXONISML_BASE_PATH).mkdir(exist_ok=True)
-
+from luxonis_ml.data.utils.data_utils import rgb_to_bool_masks
+from luxonis_ml.utils import LuxonisFileSystem, environ
+
+WORK_DIR = Path("tests", "data")
+
+environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml"
+
+
+@pytest.fixture
+def parking_lot_dataset() -> LuxonisDataset:
+    url = "gs://luxonis-test-bucket/luxonis-ml-test-data/D1_ParkingSlotTest"
+    base_path = LuxonisFileSystem.download(url, WORK_DIR)
+    mask_brand_path = base_path / "mask_brand"
+    mask_color_path = base_path / "mask_color"
+    kpt_mask_path = base_path / "keypoints_mask_vehicle"
+
+    def generator():
+        filenames: dict[int, Path] = {}
+        for base_path in [kpt_mask_path, mask_brand_path, mask_color_path]:
+            for sequence_path in list(sorted(base_path.glob("sequence.*"))):
+                frame_data = sequence_path / "step0.frame_data.json"
+                with open(frame_data) as f:
+                    data = json.load(f)["captures"][0]
+                    frame_data = data["annotations"]
+                    sequence_num = int(sequence_path.suffix[1:])
+                    filename = data["filename"]
+                    if filename is not None:
+                        filename = sequence_path / filename
+                        filenames[sequence_num] = filename
+                    else:
+                        filename = filenames[sequence_num]
+                    W, H = data["dimension"]
+
+                annotations = {
+                    anno["@type"].split(".")[-1]: anno for anno in frame_data
+                }
 
-def create_dataset(name: str) -> LuxonisDataset:
-    if LuxonisDataset.exists(name):
-        dataset = LuxonisDataset(name)
-        dataset.delete_dataset()
-    return LuxonisDataset(name)
+                bbox_classes = {}
+                bboxes = {}
+
+                for bbox_annotation in annotations.get(
+                    "BoundingBox2DAnnotation", defaultdict(list)
+                )["values"]:
+                    class_ = bbox_annotation["labelName"].split("-")[-1].lower()
+                    if class_ == "motorbiek":
+                        class_ = "motorbike"
+                    x, y = bbox_annotation["origin"]
+                    w, h = bbox_annotation["dimension"]
+                    instance_id = bbox_annotation["instanceId"]
+                    bbox_classes[instance_id] = class_
+                    bboxes[instance_id] = [x / W, y / H, w / W, h / H]
+                    yield {
+                        "file": filename,
+                        "annotation": {
+                            "type": "boundingbox",
+                            "class": class_,
+                            "x": x / W,
+                            "y": y / H,
+                            "w": w / W,
+                            "h": h / H,
+                            "instance_id": instance_id,
+                        },
+                    }
+
+                for kpt_annotation in annotations.get(
+                    "KeypointAnnotation", defaultdict(list)
+                )["values"]:
+                    keypoints = kpt_annotation["keypoints"]
+                    instance_id = kpt_annotation["instanceId"]
+                    class_ = bbox_classes[instance_id]
+                    bbox = bboxes[instance_id]
+                    kpts = []
+
+                    if class_ == "motorbike":
+                        keypoints = keypoints[:3]
+                    else:
+                        keypoints = keypoints[3:]
+
+                    for kp in keypoints:
+                        x, y = kp["location"]
+                        kpts.append([x / W, y / H, kp["state"]])
+
+                    yield {
+                        "file": filename,
+                        "annotation": {
+                            "type": "detection",
+                            "class": class_,
+                            "task": class_,
+                            "keypoints": kpts,
+                            "instance_id": instance_id,
+                            "boundingbox": {
+                                "x": bbox[0],
+                                "y": bbox[1],
+                                "w": bbox[2],
+                                "h": bbox[3],
+                            },
+                        },
+                    }
+
+                vehicle_type_segmentation = annotations[
+                    "SemanticSegmentationAnnotation"
+                ]
+                mask = cv2.cvtColor(
+                    cv2.imread(
+                        str(sequence_path / vehicle_type_segmentation["filename"])
+                    ),
+                    cv2.COLOR_BGR2RGB,
+                )
+                classes = {
+                    inst["labelName"]: inst["pixelValue"][:3]
+                    for inst in vehicle_type_segmentation["instances"]
+                }
+                if base_path == kpt_mask_path:
+                    task = "vehicle_type_segmentation"
+                elif base_path == mask_brand_path:
+                    task = "brand_segmentation"
+                else:
+                    task = "color_segmentation"
+                for class_, mask_ in rgb_to_bool_masks(
+                    mask, classes, add_background_class=True
+                ):
+                    yield {
+                        "file": filename,
+                        "annotation": {
+                            "type": "mask",
+                            "class": class_,
+                            "task": task,
+                            "mask": mask_,
+                        },
+                    }
+                if base_path == mask_color_path:
+                    yield {
+                        "file": filename,
+                        "annotation": {
+                            "type": "mask",
+                            "class": "vehicle",
+                            "task": "vehicle_segmentation",
+                            "mask": mask.astype(bool)[..., 0]
+                            | mask.astype(bool)[..., 1]
+                            | mask.astype(bool)[..., 2],
+                        },
+                    }
+
+    dataset = LuxonisDataset("__D1ParkingSLot-test", delete_existing=True)
+    dataset.add(generator())
+    dataset.make_splits()
+    return dataset
 
 
 @pytest.fixture(scope="session", autouse=True)
@@ -38,35 +178,8 @@ def create_coco_dataset():
 
 
 @pytest.fixture(scope="session", autouse=True)
-def create_coco_multitask_dataset():
-    url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
-    output_folder = "../data/"
-    output_zip = os.path.join(output_folder, "COCO_people_subset.zip")
-
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
-
-    if not os.path.exists(output_zip) and not os.path.exists(
-        os.path.join(output_folder, "COCO_people_subset")
-    ):
-        gdown.download(url, output_zip, quiet=False)
-
-    parser = LuxonisParser(
-        output_zip,
-        dataset_name="coco_test_multitask",
-        delete_existing=True,
-        task_mapping={
-            LabelType.KEYPOINTS: "keypoints-task",
-            LabelType.SEGMENTATION: "segmentation-task",
-            LabelType.CLASSIFICATION: "classification-task",
-            LabelType.BOUNDINGBOX: "boundingbox-task",
-        },
-    )
-    parser.parse(random_split=True)
-
-
-def _create_cifar10(dataset_name: str, task_names: list[str]) -> None:
-    dataset = create_dataset(dataset_name)
+def create_cifar10_dataset():
+    dataset = LuxonisDataset("cifar10_test", delete_existing=True)
     output_folder = "../data/"
     if not os.path.exists(output_folder):
         os.makedirs(output_folder)
@@ -92,25 +205,13 @@ def CIFAR10_subset_generator():
                 break
             path = os.path.join(output_folder, f"cifar_{i}.png")
             image.save(path)
-            for task_name in task_names:
-                yield {
-                    "file": path,
-                    "annotation": {
-                        "type": "classification",
-                        "task": task_name,
-                        "class": classes[label],
-                    },
-                }
+            yield {
+                "file": path,
+                "annotation": {
+                    "type": "classification",
+                    "class": classes[label],
+                },
+            }
 
     dataset.add(CIFAR10_subset_generator())
     dataset.make_splits()
-
-
-@pytest.fixture(scope="session", autouse=True)
-def create_cifar10_dataset():
-    _create_cifar10("cifar10_test", ["classification"])
-
-
-@pytest.fixture(scope="session", autouse=True)
-def create_cifar10_task_dataset():
-    _create_cifar10("cifar10_task_test", [f"classification_{i}" for i in [1, 2, 3]])
diff --git a/tests/integration/test_multi_input.py b/tests/integration/multi_input_modules.py
similarity index 76%
rename from tests/integration/test_multi_input.py
rename to tests/integration/multi_input_modules.py
index 575f653d..bac43091 100644
--- a/tests/integration/test_multi_input.py
+++ b/tests/integration/multi_input_modules.py
@@ -1,12 +1,6 @@
-import os
-import shutil
-from pathlib import Path
-
-import pytest
 import torch
 from torch import Tensor, nn
 
-from luxonis_train.core import Exporter, Inferer, Trainer
 from luxonis_train.nodes import BaseNode
 from luxonis_train.utils.loaders import BaseLoaderTorch
 from luxonis_train.utils.types import FeaturesProtocol, LabelType, Packet
@@ -117,36 +111,3 @@ def forward(self, inputs: list[Tensor]):
         fn1, _, disp = inputs
         x = fn1 + disp
         return [self.conv(x)]
-
-
-@pytest.fixture(scope="function", autouse=True)
-def clear_output():
-    shutil.rmtree("output", ignore_errors=True)
-
-
-@pytest.mark.parametrize(
-    "config_file", [path for path in os.listdir("configs") if "multi_input" in path]
-)
-def test_sanity(config_file):
-    # Test training
-    trainer = Trainer(f"configs/{config_file}")
-    trainer.train()
-    # Test evaluation
-    trainer.test(view="val")
-
-    # Test export
-    Exporter(f"configs/{config_file}").export("test_export_multi_input.onnx")
-    # Cleanup after exporter
-    assert os.path.exists("test_export_multi_input.onnx")
-    os.remove("test_export_multi_input.onnx")
-
-    # Test inference
-    Inferer(
-        f"configs/{config_file}",
-        opts=None,
-        view="train",
-        save_dir=Path("infer_save_dir"),
-    ).infer()
-    # Cleanup after inferer
-    assert os.path.exists("infer_save_dir")
-    shutil.rmtree("infer_save_dir")
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index 52ee2f0b..f040c032 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -1,95 +1,79 @@
-import os
 import shutil
-import subprocess
 from pathlib import Path
 
 import pytest
+from luxonis_ml.data import LuxonisDataset
+from multi_input_modules import *
 
-TEST_OUTPUT = Path("tests/test-output")
+from luxonis_train.core import Exporter, Inferer, Trainer, Tuner
+from luxonis_train.utils.config import Config
+
+TEST_OUTPUT = Path("tests/integration/_test-output")
+INFER_PATH = Path("tests/integration/_infer_save_dir")
+ONNX_PATH = Path("tests/integration/_model.onnx")
+STUDY_PATH = Path("study_local.db")
+
+OPTS = {
+    "trainer.epochs": 1,
+    "trainer.batch_size": 1,
+    "trainer.validation_interval": 1,
+    "trainer.callbacks": "[]",
+    "tracker.save_directory": str(TEST_OUTPUT),
+    "tuner.n_trials": 4,
+}
 
 
 @pytest.fixture(scope="function", autouse=True)
 def clear_output():
+    Config.clear_instance()
+    yield
     shutil.rmtree(TEST_OUTPUT, ignore_errors=True)
+    STUDY_PATH.unlink(missing_ok=True)
+    ONNX_PATH.unlink(missing_ok=True)
+    shutil.rmtree(INFER_PATH, ignore_errors=True)
 
 
 @pytest.mark.parametrize(
-    "config_file", [path for path in os.listdir("configs") if "model" in path]
+    "config_file", [str(path) for path in Path("configs").glob("*model*")]
 )
-def test_sanity(config_file):
-    opts = [
-        "trainer.epochs",
-        "1",
-        "trainer.validation_interval",
-        "1",
-        "trainer.callbacks",
-        "[]",
-        "trainer.batch_size",
-        "1",
-        "tracker.save_directory",
-        str(TEST_OUTPUT),
-    ]
-    result = subprocess.run(
-        ["luxonis_train", "train", "--config", f"configs/{config_file}", *opts],
+def test_simple_models(config_file: str):
+    trainer = Trainer(
+        config_file,
+        opts=OPTS,
     )
-    assert result.returncode == 0
+    trainer.train()
+    trainer.test()
 
-    opts += ["model.weights", str(list(TEST_OUTPUT.rglob("*.ckpt"))[0])]
-    opts += ["exporter.onnx.opset_version", "11"]
+    Exporter(config_file).export("test_export.onnx")
 
-    result = subprocess.run(
-        ["luxonis_train", "export", "--config", f"configs/{config_file}", *opts],
-    )
 
-    assert result.returncode == 0
+def test_multi_input():
+    config_file = "configs/example_multi_input.yaml"
+    trainer = Trainer(config_file, opts=OPTS)
+    trainer.train()
+    trainer.test(view="val")
 
-    result = subprocess.run(
-        ["luxonis_train", "eval", "--config", f"configs/{config_file}", *opts],
-    )
+    assert not ONNX_PATH.exists()
+    Exporter(config_file).export(str(ONNX_PATH))
+    assert ONNX_PATH.exists()
+
+    assert not INFER_PATH.exists()
+    Inferer(config_file, view="val", save_dir=INFER_PATH).infer()
+    assert INFER_PATH.exists()
 
-    assert result.returncode == 0
-
-    save_dir = Path("sanity_infer_save_dir")
-    shutil.rmtree(save_dir, ignore_errors=True)
-
-    result = subprocess.run(
-        [
-            "luxonis_train",
-            "infer",
-            "--save-dir",
-            str(save_dir),
-            "--config",
-            f"configs/{config_file}",
-            *opts,
-        ],
-    )
 
-    assert result.returncode == 0
-    assert save_dir.exists()
-    assert len(list(save_dir.rglob("*.png"))) > 0
-    shutil.rmtree(save_dir, ignore_errors=True)
+def test_custom_tasks(parking_lot_dataset: LuxonisDataset):
+    config_file = "tests/configs/parking_lot_config.yaml"
+    Trainer(
+        config_file,
+        opts=OPTS
+        | {
+            "loader.params.dataset_name": parking_lot_dataset.dataset_name,
+        },
+    ).train()
 
 
 def test_tuner():
-    Path("study_local.db").unlink(missing_ok=True)
-    result = subprocess.run(
-        [
-            "luxonis_train",
-            "tune",
-            "--config",
-            "configs/example_tuning.yaml",
-            "trainer.epochs",
-            "1",
-            "trainer.validation_interval",
-            "1",
-            "trainer.callbacks",
-            "[]",
-            "tuner.n_trials",
-            "4",
-            "trainer.batch_size",
-            "1",
-            "tracker.save_directory",
-            str(TEST_OUTPUT),
-        ],
-    )
-    assert result.returncode == 0
+    tuner = Tuner("configs/example_tuning.yaml", opts=OPTS)
+    tuner.tune()
+    assert STUDY_PATH.exists()

From 04d008b3cd0a27be7120735caeab06ab2879da5b Mon Sep 17 00:00:00 2001
From: conorsim <60359299+conorsim@users.noreply.github.com>
Date: Wed, 14 Aug 2024 20:51:35 -0600
Subject: [PATCH 45/75] Fix: Update NN Archive Generation (#56)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: jkbmrz <jakob.mraz@gmail.com>
Co-authored-by: Martin Kozlovský <martin.kozlovsky@luxonis.com>
---
 luxonis_train/core/archiver.py                | 71 +++++++++++--------
 .../nodes/enums/head_categorization.py        | 12 ++--
 luxonis_train/utils/general.py                |  7 +-
 3 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index a376b9a7..24b10c1d 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -1,7 +1,7 @@
 import os
 from logging import getLogger
 from pathlib import Path
-from typing import Any
+from typing import Any, List
 
 import onnx
 from luxonis_ml.nn_archive.archive_generator import ArchiveGenerator
@@ -89,6 +89,7 @@ def _mult(lst: list[float | int]) -> list[float]:
                 name=input_name,
                 dtype=inputs_dict[input_name]["dtype"],
                 shape=inputs_dict[input_name]["shape"],
+                layout=inputs_dict[input_name]["layout"],
                 preprocessing=preprocessing,
             )
 
@@ -163,7 +164,13 @@ def _get_onnx_inputs(self, executable_path: str):
                     shape.append(d.dim_value)
                 else:
                     raise ValueError("Unsupported input dimension identifier type")
-            inputs_dict[input.name] = {"dtype": dtype, "shape": shape}
+            if shape[1] == 3:
+                layout = "NCHW"
+            elif shape[3] == 3:
+                layout = "NHWC"
+            else:
+                raise ValueError("Unknown input layout")
+            inputs_dict[input.name] = {"dtype": dtype, "shape": shape, "layout": layout}
         return inputs_dict
 
     def _add_input(
@@ -171,6 +178,7 @@ def _add_input(
         name: str,
         dtype: str,
         shape: list,
+        layout: str,
         preprocessing: dict,
         input_type: str = "image",
     ) -> None:
@@ -184,6 +192,8 @@ def _add_input(
         @param shape: Shape of the input data as a list of integers (e.g. [H,W], [H,W,C], [BS,H,W,C], ...).
         @type preprocessing: dict
         @param preprocessing: Preprocessing steps applied to the input data.
+        @type layout: str
+        @param layout: Lettercode interpretation of the input data dimensions (e.g., 'NCHW').
         @type input_type: str
         @param input_type: Type of input data (e.g., 'image').
         """
@@ -194,6 +204,7 @@ def _add_input(
                 "dtype": dtype,
                 "input_type": input_type,
                 "shape": shape,
+                "layout": layout,
                 "preprocessing": preprocessing,
             }
         )
@@ -240,19 +251,21 @@ def _add_output(self, name: str, dtype: str) -> None:
 
         self.outputs.append({"name": name, "dtype": dtype})
 
-    def _get_classes(self, head_family):
-        if head_family.startswith("Classification"):
-            return self.dataset_metadata._classes["class"]
-        elif head_family.startswith("Object"):
-            return self.dataset_metadata._classes["boundingbox"]
-        elif head_family.startswith("Segmentation"):
-            return self.dataset_metadata._classes["segmentation"]
-        elif head_family.startswith("Keypoint"):
-            return self.dataset_metadata._classes["keypoints"]
-        else:
-            raise ValueError(
-                f"No classes found for the specified head family ({head_family})"
-            )
+    def _get_classes(self, node_name: str, node_task: str | None) -> List[str]:
+        if not node_task:
+            match node_name:
+                case "ClassificationHead":
+                    node_task = "classification"
+                case "EfficientBBoxHead":
+                    node_task = "boundingbox"
+                case "SegmentationHead" | "BiSeNetHead":
+                    node_task = "segmentation"
+                case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead":
+                    node_task = "keypoints"
+                case _:
+                    raise ValueError("Node does not map to a default task.")
+
+        return self.dataset_metadata._classes.get(node_task, [])
 
     def _get_head_specific_parameters(
         self, head_name, head_alias, executable_path
@@ -301,27 +314,25 @@ def _get_head_specific_parameters(
             raise ValueError("Unknown head name")
         return parameters
 
-    def _get_head_outputs(self, head_name) -> dict:
+    def _get_head_outputs(self, head_name) -> List[str]:
         """Get model outputs in a head-specific format.
 
         @type head_name: str
         @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
         """
 
-        head_outputs = {}
         if head_name == "ClassificationHead":
-            head_outputs["predictions"] = self.outputs[0]["name"]
+            return [self.outputs[0]["name"]]
         elif head_name == "EfficientBBoxHead":
-            head_outputs["yolo_outputs"] = [output["name"] for output in self.outputs]
+            return [output["name"] for output in self.outputs]
         elif head_name in ["SegmentationHead", "BiSeNetHead"]:
-            head_outputs["predictions"] = self.outputs[0]["name"]
+            return [self.outputs[0]["name"]]
         elif head_name == "ImplicitKeypointBBoxHead":
-            head_outputs["predictions"] = self.outputs[0]["name"]
+            return [self.outputs[0]["name"]]
         elif head_name == "EfficientKeypointBBoxHead":
-            head_outputs["predictions"] = self.outputs[0]["name"]
+            return [self.outputs[0]["name"]]
         else:
             raise ValueError("Unknown head name")
-        return head_outputs
 
     def _get_heads(self, executable_path):
         """Get model heads.
@@ -337,16 +348,18 @@ def _get_heads(self, executable_path):
             # node_inputs = node.inputs
             if node_alias in self.lightning_module.outputs:
                 if node_name in ImplementedHeads.__members__:
-                    head_family = getattr(ImplementedHeads, node_name).value
-                    classes = self._get_classes(head_family)
+                    parser = getattr(ImplementedHeads, node_name).value
+                    classes = self._get_classes(node_name, node.task)
                     head_outputs = self._get_head_outputs(node_name)
                     head_dict = {
-                        "family": head_family,
+                        "parser": parser,
+                        "metadata": {
+                            "classes": classes,
+                            "n_classes": len(classes),
+                        },
                         "outputs": head_outputs,
-                        "classes": classes,
-                        "n_classes": len(classes),
                     }
-                    head_dict.update(
+                    head_dict["metadata"].update(
                         self._get_head_specific_parameters(
                             node_name, node_alias, executable_path
                         )
diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py
index a2854b3a..d36c9647 100644
--- a/luxonis_train/nodes/enums/head_categorization.py
+++ b/luxonis_train/nodes/enums/head_categorization.py
@@ -4,12 +4,12 @@
 class ImplementedHeads(Enum):
     """Task categorization for the implemented heads."""
 
-    ClassificationHead = "Classification"
-    EfficientBBoxHead = "ObjectDetectionYOLO"
-    ImplicitKeypointBBoxHead = "KeypointDetectionYOLO"
-    EfficientKeypointBBoxHead = "Keypoint"
-    SegmentationHead = "Segmentation"
-    BiSeNetHead = "Segmentation"
+    ClassificationHead = "ClassificationParser"
+    EfficientBBoxHead = "YoloDetectionNetwork"
+    ImplicitKeypointBBoxHead = "YoloDetectionNetwork"
+    EfficientKeypointBBoxHead = "YoloDetectionNetwork"
+    SegmentationHead = "SegmentationParser"
+    BiSeNetHead = "SegmentationParser"
 
 
 class ImplementedHeadsIsSoxtmaxed(Enum):
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index 96e6b766..6f7e027a 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -28,8 +28,8 @@ def __init__(
         classes, number of keypoints, I{etc.} instead of passing them as arguments to
         the model.
 
-        @type classes: dict[LabelType, list[str]] | None
-        @param classes: Dictionary mapping label types to lists of class names. If not
+        @type classes: dict[str, list[str]] | None
+        @param classes: Dictionary mapping task names to lists of class names. If not
             provided, will be inferred from the dataset loader.
         @type n_classes: int | None
         @param n_classes: Number of classes for each label type.
@@ -42,6 +42,7 @@ def __init__(
         @type loader: DataLoader | None
         @param loader: Dataset loader.
         """
+
         self._classes = classes or {}
         self._n_keypoints = n_keypoints or {}
         self._loader = loader
@@ -50,7 +51,7 @@ def __init__(
     def classes(self) -> dict[str, list[str]]:
         """Dictionary mapping label types to lists of class names.
 
-        @type: dict[LabelType, list[str]]
+        @type: dict[str, list[str]]
         @raises ValueError: If classes were not provided during initialization.
         """
         if self._classes is None:

From 4fc61115f01bd489cc0c805b68f99724c7f93298 Mon Sep 17 00:00:00 2001
From: Jernej Sabadin <116955183+JSabadin@users.noreply.github.com>
Date: Thu, 15 Aug 2024 11:53:46 +0200
Subject: [PATCH 46/75] Adaptive Detection Loss Speed-Up (#51)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Martin Kozlovský <martin.kozlovsky@luxonis.com>
---
 .../losses/adaptive_detection_loss.py         | 74 ++++++++++---------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index c495e400..49e35848 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -79,6 +79,12 @@ def __init__(
         self.class_loss_weight = class_loss_weight
         self.iou_loss_weight = iou_loss_weight
 
+        self.anchors = None
+        self.anchor_points = None
+        self.n_anchors_list = None
+        self.stride_tensor = None
+        self.gt_bboxes_scale = None
+
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
@@ -88,33 +94,33 @@ def prepare(
         batch_size = pred_scores.shape[0]
         device = pred_scores.device
 
-        target = self.get_label(labels)[0]
-        gt_bboxes_scale = torch.tensor(
-            [
-                self.original_img_size[1],
-                self.original_img_size[0],
-                self.original_img_size[1],
-                self.original_img_size[0],
-            ],
-            device=device,
-        )
-        (
-            anchors,
-            anchor_points,
-            n_anchors_list,
-            stride_tensor,
-        ) = anchors_for_fpn_features(
-            feats,
-            self.stride,
-            self.grid_cell_size,
-            self.grid_cell_offset,
-            multiply_with_stride=True,
-        )
-
-        anchor_points_strided = anchor_points / stride_tensor
-        pred_bboxes = dist2bbox(pred_distri, anchor_points_strided)
+        target = labels[self.task][0].to(device)
+        if self.gt_bboxes_scale is None:
+            self.gt_bboxes_scale = torch.tensor(
+                [
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                ],
+                device=device,
+            )
+            (
+                self.anchors,
+                self.anchor_points,
+                self.n_anchors_list,
+                self.stride_tensor,
+            ) = anchors_for_fpn_features(
+                feats,
+                self.stride,
+                self.grid_cell_size,
+                self.grid_cell_offset,
+                multiply_with_stride=True,
+            )
+            self.anchor_points_strided = self.anchor_points / self.stride_tensor
 
-        target = self._preprocess_target(target, batch_size, gt_bboxes_scale)
+        target = self._preprocess_target(target, batch_size)
+        pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided)
 
         gt_labels = target[:, :, :1]
         gt_xyxy = target[:, :, 1:]
@@ -128,12 +134,12 @@ def prepare(
                 mask_positive,
                 _,
             ) = self.atts_assigner(
-                anchors,
-                n_anchors_list,
+                self.anchors,
+                self.n_anchors_list,
                 gt_labels,
                 gt_xyxy,
                 mask_gt,
-                pred_bboxes.detach() * stride_tensor,
+                pred_bboxes.detach() * self.stride_tensor,
             )
         else:
             # TODO: log change of assigner (once common Logger)
@@ -145,8 +151,8 @@ def prepare(
                 _,
             ) = self.tal_assigner(
                 pred_scores.detach(),
-                pred_bboxes.detach() * stride_tensor,
-                anchor_points,
+                pred_bboxes.detach() * self.stride_tensor,
+                self.anchor_points,
                 gt_labels,
                 gt_xyxy,
                 mask_gt,
@@ -155,7 +161,7 @@ def prepare(
         return (
             pred_bboxes,
             pred_scores,
-            assigned_bboxes / stride_tensor,
+            assigned_bboxes / self.stride_tensor,
             assigned_labels,
             assigned_scores,
             mask_positive,
@@ -192,7 +198,7 @@ def forward(
 
         return loss, sub_losses
 
-    def _preprocess_target(self, target: Tensor, batch_size: int, scale_tensor: Tensor):
+    def _preprocess_target(self, target: Tensor, batch_size: int):
         """Preprocess target in shape [batch_size, N, 5] where N is maximum number of
         instances in one image."""
         sample_ids, counts = cast(
@@ -204,7 +210,7 @@ def _preprocess_target(self, target: Tensor, batch_size: int, scale_tensor: Tens
         for id, count in zip(sample_ids, counts):
             out_target[id, :count] = target[target[:, 0] == id][:, 1:]
 
-        scaled_target = out_target[:, :, 1:5] * scale_tensor
+        scaled_target = out_target[:, :, 1:5] * self.gt_bboxes_scale
         out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
         return out_target
 

From 6c7045f2bef81dc3e6f85b0fe93089d0e1287da1 Mon Sep 17 00:00:00 2001
From: Jernej Sabadin <116955183+JSabadin@users.noreply.github.com>
Date: Thu, 15 Aug 2024 12:20:18 +0200
Subject: [PATCH 47/75] Remove `num_workers` Hotfix (#58)

---
 luxonis_train/utils/config.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 13149d1b..283bffba 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -326,17 +326,6 @@ class Config(LuxonisConfig):
     tuner: TunerConfig | None = None
     ENVIRON: Environ = Field(Environ(), exclude=True)
 
-    @model_validator(mode="after")
-    def validate_num_workers(self) -> Self:
-        if self.loader.name == "LuxonisLoaderTorch":
-            if self.trainer.num_workers != 0:
-                logger.warning(
-                    "Setting `num_workers` to 0 because of "
-                    "compatibility with LuxonisDataset."
-                )
-                self.trainer.num_workers = 0
-        return self
-
     @model_validator(mode="before")
     @classmethod
     def check_environment(cls, data: Any) -> Any:

From 4af2fab2aa4766d77f78db2b4cb046ec6c441ebf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Fri, 16 Aug 2024 07:19:29 +0200
Subject: [PATCH 48/75] Fixed Retrieving Labels in AdaptiveDetectionLoss (#59)

---
 .../attached_modules/losses/adaptive_detection_loss.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index 49e35848..6a28bff9 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -94,7 +94,7 @@ def prepare(
         batch_size = pred_scores.shape[0]
         device = pred_scores.device
 
-        target = labels[self.task][0].to(device)
+        target = self.get_label(labels)[0]
         if self.gt_bboxes_scale is None:
             self.gt_bboxes_scale = torch.tensor(
                 [

From 9b17a703f1f1e5625ec99f759a0c166ab7abf3ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Mon, 19 Aug 2024 15:28:27 +0200
Subject: [PATCH 49/75] Unified Cores and Cleanup (#57)

---
 .github/workflows/tests.yaml                  |   9 +
 .gitignore                                    |   5 +
 configs/README.md                             |  23 +-
 configs/classification_model.yaml             |   3 -
 configs/detection_model.yaml                  |   3 -
 configs/efficient_coco_model.yaml             |   2 +-
 configs/example_export.yaml                   |   3 -
 configs/example_multi_input.yaml              |   2 -
 configs/example_tuning.yaml                   |   7 +-
 configs/keypoint_bbox_model.yaml              |   3 -
 configs/segmentation_model.yaml               |   3 -
 luxonis_train/__main__.py                     |  32 +-
 luxonis_train/callbacks/README.md             |   2 -
 .../callbacks/archive_on_train_end.py         |  70 +--
 .../callbacks/export_on_train_end.py          |  48 +-
 .../callbacks/luxonis_progress_bar.py         |  86 ++-
 luxonis_train/callbacks/metadata_logger.py    |  10 +-
 luxonis_train/callbacks/test_on_train_end.py  |   7 +-
 luxonis_train/callbacks/upload_checkpoint.py  |  22 +-
 luxonis_train/core/__init__.py                |   9 +-
 luxonis_train/core/archiver.py                | 396 ------------
 luxonis_train/core/core.py                    | 594 +++++++++++++++---
 luxonis_train/core/exporter.py                | 217 -------
 luxonis_train/core/inferer.py                 |  56 --
 luxonis_train/core/trainer.py                 | 185 ------
 luxonis_train/core/tuner.py                   | 267 --------
 luxonis_train/core/utils/archive_utils.py     | 219 +++++++
 luxonis_train/core/utils/export_utils.py      | 119 ++++
 luxonis_train/core/utils/infer_utils.py       |  30 +
 luxonis_train/core/utils/train_utils.py       |  25 +
 luxonis_train/core/utils/tune_utils.py        |  77 +++
 luxonis_train/models/__init__.py              |   4 +-
 ...{luxonis_model.py => luxonis_lightning.py} |  64 +-
 luxonis_train/nodes/blocks/blocks.py          |  22 +-
 luxonis_train/nodes/efficient_bbox_head.py    |   2 +-
 luxonis_train/utils/config.py                 |  78 +--
 luxonis_train/utils/general.py                |  20 +-
 luxonis_train/utils/loaders/base_loader.py    |   2 +-
 .../utils/loaders/luxonis_loader_torch.py     |   2 +-
 luxonis_train/utils/tracker.py                |  26 +-
 tests/integration/conftest.py                 |  17 +-
 tests/integration/multi_input_modules.py      |   2 +-
 tests/integration/test_sanity.py              |  56 +-
 tests/unittests/test_core/__init__.py         |   0
 tests/unittests/test_core/test_archiver.py    | 339 ----------
 45 files changed, 1277 insertions(+), 1891 deletions(-)
 delete mode 100644 luxonis_train/core/archiver.py
 delete mode 100644 luxonis_train/core/exporter.py
 delete mode 100644 luxonis_train/core/inferer.py
 delete mode 100644 luxonis_train/core/trainer.py
 delete mode 100644 luxonis_train/core/tuner.py
 create mode 100644 luxonis_train/core/utils/archive_utils.py
 create mode 100644 luxonis_train/core/utils/export_utils.py
 create mode 100644 luxonis_train/core/utils/infer_utils.py
 create mode 100644 luxonis_train/core/utils/train_utils.py
 create mode 100644 luxonis_train/core/utils/tune_utils.py
 rename luxonis_train/models/{luxonis_model.py => luxonis_lightning.py} (95%)
 delete mode 100644 tests/unittests/test_core/__init__.py
 delete mode 100644 tests/unittests/test_core/test_archiver.py

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index af77c60f..112741e3 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -45,6 +45,15 @@ jobs:
       if: matrix.os == 'macOS-latest'
       run: pip install -e .[dev]
 
+    - name: Authenticate to Google Cloud
+      id: google-auth
+      uses: google-github-actions/auth@v2
+      with:
+        credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
+        create_credentials_file: true
+        export_environment_variables: true
+        token_format: access_token
+
     - name: Run tests with coverage [Ubuntu]
       if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
       run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml
diff --git a/.gitignore b/.gitignore
index 53c9f325..1f2a2381 100644
--- a/.gitignore
+++ b/.gitignore
@@ -148,3 +148,8 @@ models_venv/*
 # vscode settings
 .vscode
 tests/data
+mlartifacts
+mlruns
+wandb
+tests/_data
+tests/integration/_test-output
diff --git a/configs/README.md b/configs/README.md
index 8e3b4935..bf37317c 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -29,16 +29,15 @@ You can create your own config or use/edit one of the examples.
 
 ## Top-level Options
 
-| Key           | Type                  | Default value | Description                                   |
-| ------------- | --------------------- | ------------- | --------------------------------------------- |
-| use_rich_text | bool                  | True          | whether to use rich text for console printing |
-| model         | [Model](#model)       |               | model section                                 |
-| dataset       | [dataset](#dataset)   |               | dataset section                               |
-| train         | [train](#train)       |               | train section                                 |
-| tracker       | [tracker](#tracker)   |               | tracker section                               |
-| trainer       | [trainer](#trainer)   |               | trainer section                               |
-| exporter      | [exporter](#exporter) |               | exporter section                              |
-| tuner         | [tuner](#tuner)       |               | tuner section                                 |
+| Key      | Type                  | Default value | Description      |
+| -------- | --------------------- | ------------- | ---------------- |
+| model    | [Model](#model)       |               | model section    |
+| dataset  | [dataset](#dataset)   |               | dataset section  |
+| train    | [train](#train)       |               | train section    |
+| tracker  | [tracker](#tracker)   |               | tracker section  |
+| trainer  | [trainer](#trainer)   |               | trainer section  |
+| exporter | [exporter](#exporter) |               | exporter section |
+| tuner    | [tuner](#tuner)       |               | tuner section    |
 
 ## Model
 
@@ -214,9 +213,9 @@ Here you can define configuration for exporting.
 | ---------------------- | --------------------------------- | --------------- | ----------------------------------------------------------------------------------------------- |
 | export_save_directory  | str                               | "output_export" | Where to save the exported files.                                                               |
 | input_shape            | list\[int\] \| None               | None            | Input shape of the model. If not provided, inferred from the dataset.                           |
-| export_model_name      | str                               | "model"         | Name of the exported model.                                                                     |
-| data_type              | Literal\["INT8", "FP16", "FP32"\] | "FP16"          | Data type of the exported model.                                                                |
+| data_type              | Literal\["INT8", "FP16", "FP32"\] | "FP16"          | Data type of the exported model. Only used for conversion to BLOB.                              |
 | reverse_input_channels | bool                              | True            | Whether to reverse the image channels in the exported model. Relevant for `.blob` export        |
+| upload                 | bool                              | True            | Whether to upload the files created during export to the current tracker.                       |
 | scale_values           | list\[float\] \| None             | None            | What scale values to use for input normalization. If not provided, inferred from augmentations. |
 | mean_values            | list\[float\] \| None             | None            | What mean values to use for input normalizations. If not provided, inferred from augmentations. |
 | upload_directory       | str \| None                       | None            | Where to upload the exported models.                                                            |
diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml
index 5d2eb1f2..be5a5006 100644
--- a/configs/classification_model.yaml
+++ b/configs/classification_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: cifar10_classification
   predefined_model:
diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml
index 899e317d..45c3431e 100644
--- a/configs/detection_model.yaml
+++ b/configs/detection_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined detection model
 
-
-use_rich_text: True
-
 model:
   name: coco_detection
   predefined_model:
diff --git a/configs/efficient_coco_model.yaml b/configs/efficient_coco_model.yaml
index d9f70647..64aa48e0 100644
--- a/configs/efficient_coco_model.yaml
+++ b/configs/efficient_coco_model.yaml
@@ -95,7 +95,7 @@ trainer:
   batch_size: 4
   accumulate_grad_batches: 1
   epochs: 200
-  num_workers: 0
+  num_workers: 4
   train_metrics_interval: -1
   validation_interval: 10
   num_log_images: 8
diff --git a/configs/example_export.yaml b/configs/example_export.yaml
index 7aadc30c..f86f1dfa 100644
--- a/configs/example_export.yaml
+++ b/configs/example_export.yaml
@@ -1,8 +1,5 @@
 # Example configuration for exporting a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   weights: null  # specify a path to the weights here
diff --git a/configs/example_multi_input.yaml b/configs/example_multi_input.yaml
index 31eaa44e..d185f37e 100644
--- a/configs/example_multi_input.yaml
+++ b/configs/example_multi_input.yaml
@@ -8,8 +8,6 @@ loader:
   # Needs to be set for visualizers and evaluators to work.
   image_source: left
 
-use_rich_text: True
-
 model:
   name: example_multi_input
   nodes:
diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
index 9a8bfd79..b350ea2f 100644
--- a/configs/example_tuning.yaml
+++ b/configs/example_tuning.yaml
@@ -1,8 +1,5 @@
 # Example configuration for tuning a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   predefined_model:
@@ -31,8 +28,8 @@ trainer:
       - name: Flip
 
   batch_size: 4
-  epochs: &epochs 10
-  validation_interval: 1
+  epochs: &epochs 100
+  validation_interval: 10
   num_log_images: 8
 
   scheduler:
diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml
index 8cdd3149..5b1ebb2d 100644
--- a/configs/keypoint_bbox_model.yaml
+++ b/configs/keypoint_bbox_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined keypoint-detection model
 
-
-use_rich_text: True
-
 model:
   name: coco_keypoints
   predefined_model:
diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml
index b7becbfa..a822d7c1 100644
--- a/configs/segmentation_model.yaml
+++ b/configs/segmentation_model.yaml
@@ -1,8 +1,5 @@
 # Example configuration for training a predefined segmentation model
 
-
-use_rich_text: True
-
 model:
   name: coco_segmentation
   predefined_model:
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index eefdaa7e..2b7f3ed3 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -5,7 +5,6 @@
 
 import typer
 import yaml
-from luxonis_ml.data.__main__ import inspect as lxml_inspect
 from luxonis_ml.enums import SplitType
 
 app = typer.Typer(
@@ -49,35 +48,35 @@ def train(
     opts: OptsType = None,
 ):
     """Start training."""
-    from luxonis_train.core import Trainer
+    from luxonis_train.core import LuxonisModel
 
-    Trainer(config, opts, resume=resume).train()
+    LuxonisModel(config, opts).train(resume_weights=resume)
 
 
 @app.command()
-def eval(
+def test(
     config: ConfigType = None, view: ViewType = SplitType.VAL, opts: OptsType = None
 ):
     """Evaluate model."""
-    from luxonis_train.core import Trainer
+    from luxonis_train.core import LuxonisModel
 
-    Trainer(config, opts).test(view=view.value)
+    LuxonisModel(config, opts).test(view=view.value)
 
 
 @app.command()
 def tune(config: ConfigType = None, opts: OptsType = None):
     """Start hyperparameter tuning."""
-    from luxonis_train.core import Tuner
+    from luxonis_train.core import LuxonisModel
 
-    Tuner(config, opts).tune()
+    LuxonisModel(config, opts).tune()
 
 
 @app.command()
 def export(config: ConfigType = None, opts: OptsType = None):
     """Export model."""
-    from luxonis_train.core import Exporter
+    from luxonis_train.core import LuxonisModel
 
-    Exporter(config, opts).export()
+    LuxonisModel(config, opts).export()
 
 
 @app.command()
@@ -88,9 +87,9 @@ def infer(
     opts: OptsType = None,
 ):
     """Run inference."""
-    from luxonis_train.core import Inferer
+    from luxonis_train.core import LuxonisModel
 
-    Inferer(config, opts, view=view.value, save_dir=save_dir).infer()
+    LuxonisModel(config, opts).infer(view=view.value, save_dir=save_dir)
 
 
 @app.command()
@@ -110,6 +109,7 @@ def inspect(
 ):
     """Inspect dataset."""
     from lightning.pytorch import seed_everything
+    from luxonis_ml.data.__main__ import inspect as lxml_inspect
 
     from luxonis_train.utils.config import Config
 
@@ -133,9 +133,7 @@ def inspect(
         lxml_inspect(
             name=cfg.loader.params["dataset_name"],
             view=view,
-            aug_config=Path(
-                f.name,
-            ),
+            aug_config=f.name,
         )
 
 
@@ -151,9 +149,9 @@ def archive(
     opts: OptsType = None,
 ):
     """Generate NN archive."""
-    from luxonis_train.core import Archiver
+    from luxonis_train.core import LuxonisModel
 
-    Archiver(str(config), opts).archive(executable)
+    LuxonisModel(str(config), opts).archive(executable)
 
 
 def version_callback(value: bool):
diff --git a/luxonis_train/callbacks/README.md b/luxonis_train/callbacks/README.md
index 6c4d635b..eb34b081 100644
--- a/luxonis_train/callbacks/README.md
+++ b/luxonis_train/callbacks/README.md
@@ -21,7 +21,6 @@ List of supported callbacks from `lightning.pytorch`.
 - [LearningRateMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor)
 - [ModelCheckpoint](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint)
 - [RichModelSummary](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary)
-  - Added automatically if `use_rich_text` is set to `True` in [config](../../configs/README.md#topleveloptions).
 
 ## ExportOnTrainEnd
 
@@ -36,7 +35,6 @@ Performs export on train end with best weights according to the validation loss.
 ## LuxonisProgressBar
 
 Custom rich text progress bar based on RichProgressBar from Pytorch Lightning.
-Added automatically if `use_rich_text` is set to `True` in [config](../../configs/README.md#topleveloptions).
 
 ## MetadataLogger
 
diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py
index 4f5b6bc2..7d6da67f 100644
--- a/luxonis_train/callbacks/archive_on_train_end.py
+++ b/luxonis_train/callbacks/archive_on_train_end.py
@@ -1,29 +1,20 @@
 import logging
-import os
-from pathlib import Path
-from typing import cast
 
 import lightning.pytorch as pl
 
-from luxonis_train.utils.config import Config
+import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
-from luxonis_train.utils.tracker import LuxonisTrackerPL
+
+logger = logging.getLogger(__name__)
 
 
 @CALLBACKS.register_module()
 class ArchiveOnTrainEnd(pl.Callback):
-    def __init__(self, upload_to_mlflow: bool = False):
-        """Callback that performs archiving of onnx or exported model at the end of
-        training/export. TODO: description.
-
-        @type upload_to_mlflow: bool
-        @param upload_to_mlflow: If set to True, overrides the upload url in Archiver
-            with currently active MLFlow run (if present).
-        """
-        super().__init__()
-        self.upload_to_mlflow = upload_to_mlflow
-
-    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+    def on_train_end(
+        self,
+        _: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> None:
         """Archives the model on train end.
 
         @type trainer: L{pl.Trainer}
@@ -32,41 +23,24 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
         @param pl_module: Pytorch Lightning module.
         @raises RuntimeError: If no best model path is found.
         """
-        from luxonis_train.core.archiver import Archiver
 
-        model_checkpoint_callbacks = [
-            c
-            for c in trainer.callbacks  # type: ignore
-            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
-        ]
-
-        # NOTE: assume that first checkpoint callback is based on val loss
-        best_model_path = model_checkpoint_callbacks[0].best_model_path
+        best_model_path = pl_module.core.get_min_loss_checkpoint_path()
         if not best_model_path:
-            raise RuntimeError(
+            logger.error(
                 "No best model path found. "
                 "Please make sure that ModelCheckpoint callback is present "
-                "and at least one validation epoch has been performed."
+                "and at least one validation epoch has been performed. "
+                "Skipping model archiving."
             )
-        cfg: Config = pl_module.cfg
-        cfg.model.weights = best_model_path
-        if self.upload_to_mlflow:
-            if cfg.tracker.is_mlflow:
-                tracker = cast(LuxonisTrackerPL, trainer.logger)
-                new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
-                cfg.archiver.upload_url = new_upload_url
-            else:
-                logging.getLogger(__name__).warning(
-                    "`upload_to_mlflow` is set to True, "
-                    "but there is  no MLFlow active run, skipping."
-                )
-
-        onnx_path = str(Path(best_model_path).parent.with_suffix(".onnx"))
-        if not os.path.exists(onnx_path):
-            raise FileNotFoundError(
-                "Model executable not found. Make sure to run exporter callback before archiver callback"
+            return
+
+        onnx_path = pl_module.core._exported_models.get("onnx")
+        if onnx_path is None:
+            logger.error(
+                "Model executable not found. "
+                "Make sure to run exporter callback before archiver callback. "
+                "Skipping model archiving."
             )
+            return
 
-        archiver = Archiver(cfg=cfg)
-
-        archiver.archive(onnx_path)
+        pl_module.core.archive(onnx_path)
diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py
index 5d7bf6da..7e8f8a71 100644
--- a/luxonis_train/callbacks/export_on_train_end.py
+++ b/luxonis_train/callbacks/export_on_train_end.py
@@ -1,30 +1,20 @@
 import logging
-from pathlib import Path
-from typing import cast
 
 import lightning.pytorch as pl
 
-from luxonis_train.utils.config import Config
+import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
-from luxonis_train.utils.tracker import LuxonisTrackerPL
 
 logger = logging.getLogger(__name__)
 
 
 @CALLBACKS.register_module()
 class ExportOnTrainEnd(pl.Callback):
-    def __init__(self, upload_to_mlflow: bool = False):
-        """Callback that performs export on train end with best weights according to the
-        validation loss.
-
-        @type upload_to_mlflow: bool
-        @param upload_to_mlflow: If set to True, overrides the upload url in Exporter
-            with currently active MLFlow run (if present).
-        """
-        super().__init__()
-        self.upload_to_mlflow = upload_to_mlflow
-
-    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+    def on_train_end(
+        self,
+        _: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> None:
         """Exports the model on train end.
 
         @type trainer: L{pl.Trainer}
@@ -33,15 +23,8 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
         @param pl_module: Pytorch Lightning module.
         @raises RuntimeError: If no best model path is found.
         """
-        from luxonis_train.core.exporter import Exporter
 
-        model_checkpoint_callbacks = [
-            c
-            for c in trainer.callbacks  # type: ignore
-            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
-        ]
-        # NOTE: assume that first checkpoint callback is based on val loss
-        best_model_path = model_checkpoint_callbacks[0].best_model_path
+        best_model_path = pl_module.core.get_best_metric_checkpoint_path()
         if not best_model_path:
             logger.error(
                 "No model checkpoint found. "
@@ -50,18 +33,5 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
                 "Skipping model export."
             )
             return
-        cfg: Config = pl_module.cfg
-        cfg.model.weights = best_model_path
-        if self.upload_to_mlflow:
-            if cfg.tracker.is_mlflow:
-                tracker = cast(LuxonisTrackerPL, trainer.logger)
-                new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
-                cfg.exporter.upload_url = new_upload_url
-            else:
-                logger.error(
-                    "`upload_to_mlflow` is set to True, "
-                    "but there is no MLFlow active run, skipping."
-                )
-        exporter = Exporter(cfg=cfg)
-        onnx_path = str(Path(best_model_path).parent.with_suffix(".onnx"))
-        exporter.export(onnx_path=onnx_path)
+
+        pl_module.core.export(weights=best_model_path)
diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py
index 51c9541d..bed449f2 100644
--- a/luxonis_train/callbacks/luxonis_progress_bar.py
+++ b/luxonis_train/callbacks/luxonis_progress_bar.py
@@ -1,8 +1,8 @@
 from collections.abc import Mapping
 
 import lightning.pytorch as pl
-import rich
 from lightning.pytorch.callbacks import RichProgressBar
+from rich.console import Console
 from rich.table import Table
 
 from luxonis_train.utils.registry import CALLBACKS
@@ -12,16 +12,23 @@
 class LuxonisProgressBar(RichProgressBar):
     """Custom rich text progress bar based on RichProgressBar from Pytorch Lightning."""
 
-    _console: rich.console.Console
+    _console: Console
 
     def __init__(self):
         super().__init__(leave=True)
 
     def print_single_line(self, text: str, style: str = "magenta") -> None:
-        """Prints single line of text to the console."""
-        self._check_console()
-        text = f"[{style}]{text}[/{style}]"
-        self._console.print(text)
+        """Prints single line of text to the console.
+
+        @type text: str
+        @param text: Text to print.
+        @type style: str
+        @param style: Style of the text. Defaults to C{"magenta"}.
+        """
+        if self._check_console():
+            self._console.print(f"[{style}]{text}[/{style}]")
+        else:
+            print(text)
 
     def get_metrics(
         self, trainer: pl.Trainer, pl_module: pl.LightningModule
@@ -33,16 +40,13 @@ def get_metrics(
             items["Loss"] = pl_module.training_step_outputs[-1]["loss"].item()
         return items
 
-    def _check_console(self) -> None:
+    def _check_console(self) -> bool:
         """Checks if console is set.
 
-        @raises RuntimeError: If console is not set.
+        @rtype: bool
+        @return: True if console is set, False otherwise.
         """
-        if self._console is None:
-            raise RuntimeError(
-                "Console not set. Set `use_rich_text` to `False` "
-                "in your configuration file."
-            )
+        return self._console is not None
 
     def print_table(
         self,
@@ -62,20 +66,24 @@ def print_table(
         @type value_name: str
         @param value_name: Name of the value column. Defaults to C{"Value"}.
         """
-        rich_table = Table(
-            title=title,
-            show_header=True,
-            header_style="bold magenta",
-        )
-        rich_table.add_column(key_name, style="magenta")
-        rich_table.add_column(value_name, style="white")
-        for name, value in table.items():
-            if isinstance(value, float):
-                rich_table.add_row(name, f"{value:.5f}")
-            else:
-                rich_table.add_row(name, str(value))
-        self._check_console()
-        self._console.print(rich_table)
+        if self._check_console():
+            rich_table = Table(
+                title=title,
+                show_header=True,
+                header_style="bold magenta",
+            )
+            rich_table.add_column(key_name, style="magenta")
+            rich_table.add_column(value_name, style="white")
+            for name, value in table.items():
+                if isinstance(value, float):
+                    rich_table.add_row(name, f"{value:.5f}")
+                else:
+                    rich_table.add_row(name, str(value))
+            self._console.print(rich_table)
+        else:
+            print(f"------{title}-----")
+            for name, value in table.items():
+                print(f"{name}: {value}")
 
     def print_tables(
         self, tables: Mapping[str, Mapping[str, int | str | float]]
@@ -103,10 +111,20 @@ def print_results(
         @type metrics: Mapping[str, Mapping[str, int | str | float]]
         @param metrics: Metrics in format {table_name: table}.
         """
-        assert self._console is not None
-
-        self._console.print(f"------{stage}-----", style="bold magenta")
-        self._console.print(f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]")
-        self._console.print("[bold magenta]Metrics:[/bold magenta]")
-        self.print_tables(metrics)
-        self._console.print("---------------", style="bold magenta")
+        if self._check_console():
+            self._console.rule(f"{stage}", style="bold magenta")
+            self._console.print(
+                f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]"
+            )
+            self._console.print("[bold magenta]Metrics:[/bold magenta]")
+            self.print_tables(metrics)
+            self._console.rule(style="bold magenta")
+        else:
+            print(f"------{stage}-----")
+            print(f"Loss: {loss}")
+
+            for node_name, node_metrics in metrics.items():
+                for metric_name, metric_value in node_metrics.items():
+                    print(
+                        f"{stage} metric: {node_name}/{metric_name}: {metric_value:.4f}"
+                    )
diff --git a/luxonis_train/callbacks/metadata_logger.py b/luxonis_train/callbacks/metadata_logger.py
index 5ccf542f..45ff8717 100644
--- a/luxonis_train/callbacks/metadata_logger.py
+++ b/luxonis_train/callbacks/metadata_logger.py
@@ -5,6 +5,7 @@
 import pkg_resources
 import yaml
 
+import luxonis_train
 from luxonis_train.utils.config import Config
 from luxonis_train.utils.registry import CALLBACKS
 
@@ -23,7 +24,9 @@ def __init__(self, hyperparams: list[str]):
         super().__init__()
         self.hyperparams = hyperparams
 
-    def on_fit_start(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+    def on_fit_start(
+        self, _: pl.Trainer, pl_module: "luxonis_train.models.LuxonisLightningModule"
+    ) -> None:
         cfg: Config = pl_module.cfg
 
         hparams = {key: cfg.get(key) for key in self.hyperparams}
@@ -37,12 +40,13 @@ def on_fit_start(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
         if luxonis_train_hash:
             hparams["luxonis_train"] = luxonis_train_hash
 
-        trainer.logger.log_hyperparams(hparams)  # type: ignore
+        pl_module.logger.log_hyperparams(hparams)
         # also save metadata locally
         with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w+") as f:
             yaml.dump(hparams, f, default_flow_style=False)
 
-    def _get_editable_package_git_hash(self, package_name: str) -> str | None:
+    @staticmethod
+    def _get_editable_package_git_hash(package_name: str) -> str | None:
         try:
             distribution = pkg_resources.get_distribution(package_name)
             package_location = osp.join(distribution.location, package_name)
diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py
index 3c799ec9..f2bb09ec 100644
--- a/luxonis_train/callbacks/test_on_train_end.py
+++ b/luxonis_train/callbacks/test_on_train_end.py
@@ -10,7 +10,9 @@ class TestOnTrainEnd(pl.Callback):
     """Callback to perform a test run at the end of the training."""
 
     def on_train_end(
-        self, trainer: pl.Trainer, pl_module: "luxonis_train.models.LuxonisModel"
+        self,
+        trainer: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
     ) -> None:
         # `trainer.test` would delete the paths so we need to save them
         best_paths = {
@@ -19,8 +21,7 @@ def on_train_end(
             if isinstance(callback, ModelCheckpoint)
         }
 
-        assert pl_module._core is not None
-        trainer.test(pl_module, pl_module._core.pytorch_loaders["test"])
+        trainer.test(pl_module, pl_module.core.pytorch_loaders["test"])
 
         # Restore the paths
         for callback in trainer.callbacks:  # type: ignore
diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
index b91f7998..29da59ef 100644
--- a/luxonis_train/callbacks/upload_checkpoint.py
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -5,8 +5,8 @@
 
 import lightning.pytorch as pl
 import torch
-from luxonis_ml.utils.filesystem import LuxonisFileSystem
 
+import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
 
 
@@ -14,16 +14,13 @@
 class UploadCheckpoint(pl.Callback):
     """Callback that uploads best checkpoint based on the validation loss."""
 
-    def __init__(self, upload_directory: str):
+    def __init__(self):
         """Constructs `UploadCheckpoint`.
 
         @type upload_directory: str
         @param upload_directory: Path used as upload directory
         """
         super().__init__()
-        self.fs = LuxonisFileSystem(
-            upload_directory, allow_active_mlflow_run=True, allow_local=False
-        )
         self.logger = logging.getLogger(__name__)
         self.last_logged_epoch = None
         self.last_best_checkpoints = set()
@@ -31,7 +28,7 @@ def __init__(self, upload_directory: str):
     def on_save_checkpoint(
         self,
         trainer: pl.Trainer,
-        _: pl.LightningModule,
+        module: "luxonis_train.models.LuxonisLightningModule",
         checkpoint: dict[str, Any],
     ) -> None:
         # Log only once per epoch in case there are multiple ModelCheckpoint callbacks
@@ -44,22 +41,15 @@ def on_save_checkpoint(
             ]
             for curr_best_checkpoint in checkpoint_paths:
                 if curr_best_checkpoint not in self.last_best_checkpoints:
-                    self.logger.info(
-                        f"Started checkpoint upload to {self.fs.full_path}..."
-                    )
+                    self.logger.info("Uploading checkpoint...")
                     temp_filename = (
                         Path(curr_best_checkpoint).parent.with_suffix(".ckpt").name
                     )
                     torch.save(checkpoint, temp_filename)
+                    module.logger.upload_artifact(temp_filename, typ="weights")
 
-                    self.fs.put_file(
-                        local_path=temp_filename,
-                        remote_path=temp_filename,
-                        mlflow_instance=trainer.logger.experiment.get(  # type: ignore
-                            "mlflow",
-                        ),
-                    )
                     os.remove(temp_filename)
+
                     self.logger.info("Checkpoint upload finished")
                     self.last_best_checkpoints.add(curr_best_checkpoint)
 
diff --git a/luxonis_train/core/__init__.py b/luxonis_train/core/__init__.py
index 7e60f321..6d468af2 100644
--- a/luxonis_train/core/__init__.py
+++ b/luxonis_train/core/__init__.py
@@ -1,8 +1,3 @@
-from .archiver import Archiver
-from .core import Core
-from .exporter import Exporter
-from .inferer import Inferer
-from .trainer import Trainer
-from .tuner import Tuner
+from .core import LuxonisModel
 
-__all__ = ["Exporter", "Trainer", "Tuner", "Inferer", "Archiver", "Core"]
+__all__ = ["LuxonisModel"]
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
deleted file mode 100644
index 24b10c1d..00000000
--- a/luxonis_train/core/archiver.py
+++ /dev/null
@@ -1,396 +0,0 @@
-import os
-from logging import getLogger
-from pathlib import Path
-from typing import Any, List
-
-import onnx
-from luxonis_ml.nn_archive.archive_generator import ArchiveGenerator
-from luxonis_ml.nn_archive.config import CONFIG_VERSION
-from luxonis_ml.nn_archive.config_building_blocks import ObjectDetectionSubtypeYOLO
-from luxonis_ml.utils import LuxonisFileSystem
-
-from luxonis_train.models import LuxonisModel
-from luxonis_train.nodes.enums.head_categorization import (
-    ImplementedHeads,
-    ImplementedHeadsIsSoxtmaxed,
-)
-from luxonis_train.utils.config import Config
-
-from .core import Core
-
-logger = getLogger(__name__)
-
-
-class Archiver(Core):
-    """Main API which is used to construct the NN archive out of a trainig config and
-    model executables."""
-
-    def __init__(
-        self,
-        cfg: str | dict[str, Any] | Config,
-        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
-    ):
-        """Constructs a new Archiver instance.
-
-        @type cfg: str | dict[str, Any] | Config
-        @param cfg: Path to config file or config dict used to setup training.
-        @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
-        @param opts: Argument dict provided through command line,
-            used for config overriding.
-        """
-
-        super().__init__(cfg, opts)
-
-        self.lightning_module = LuxonisModel(
-            cfg=self.cfg,
-            dataset_metadata=self.dataset_metadata,
-            save_dir=self.run_save_dir,
-            input_shape=self.loaders["train"].input_shape,
-            _core=self,
-        )
-
-        self.model_name = self.cfg.model.name
-
-        self.archive_name = self.cfg.archiver.archive_name
-        archive_save_directory = Path(self.cfg.archiver.archive_save_directory)
-        if not archive_save_directory.exists():
-            logger.info(f"Creating archive directory {archive_save_directory}")
-            archive_save_directory.mkdir(parents=True, exist_ok=True)
-        self.archive_save_directory = str(archive_save_directory)
-
-        self.inputs = []
-        self.outputs = []
-        self.heads = []
-
-    def archive(self, executable_path: str):
-        """Runs archiving.
-
-        @type executable_path: str
-        @param executable_path: Path to model executable file (e.g. ONNX model).
-        """
-
-        executable_fname = os.path.split(executable_path)[1]
-        _, executable_suffix = os.path.splitext(executable_fname)
-        self.archive_name += f"_{executable_suffix[1:]}"
-
-        def _mult(lst: list[float | int]) -> list[float]:
-            return [round(x * 255.0, 5) for x in lst]
-
-        preprocessing = {  # TODO: keep preprocessing same for each input?
-            "mean": _mult(self.cfg.trainer.preprocessing.normalize.params["mean"]),
-            "scale": _mult(self.cfg.trainer.preprocessing.normalize.params["std"]),
-            "reverse_channels": self.cfg.trainer.preprocessing.train_rgb,
-            "interleaved_to_planar": False,  # TODO: make it modifiable?
-        }
-
-        inputs_dict = self._get_inputs(executable_path)
-        for input_name in inputs_dict:
-            self._add_input(
-                name=input_name,
-                dtype=inputs_dict[input_name]["dtype"],
-                shape=inputs_dict[input_name]["shape"],
-                layout=inputs_dict[input_name]["layout"],
-                preprocessing=preprocessing,
-            )
-
-        outputs_dict = self._get_outputs(executable_path)
-        for output_name in outputs_dict:
-            self._add_output(name=output_name, dtype=outputs_dict[output_name]["dtype"])
-
-        heads_dict = self._get_heads(executable_path)
-        for head_name in heads_dict:
-            self._add_head(heads_dict[head_name])
-
-        model = {
-            "metadata": {
-                "name": self.model_name,
-                "path": executable_fname,
-            },
-            "inputs": self.inputs,
-            "outputs": self.outputs,
-            "heads": self.heads,
-        }
-
-        cfg_dict = {
-            "config_version": CONFIG_VERSION.__args__[0],
-            "model": model,
-        }
-
-        self.archive_path = ArchiveGenerator(
-            archive_name=self.archive_name,
-            save_path=self.archive_save_directory,
-            cfg_dict=cfg_dict,
-            executables_paths=[executable_path],  # TODO: what if more executables?
-        ).make_archive()
-
-        logger.info(f"archive saved to {self.archive_path}")
-
-        if self.cfg.archiver.upload_url is not None:
-            self._upload()
-
-        return self.archive_path
-
-    def _get_inputs(self, executable_path: str):
-        """Get inputs of a model executable.
-
-        @type executable_path: str
-        @param executable_path: Path to model executable file.
-        """
-
-        _, executable_suffix = os.path.splitext(executable_path)
-        if executable_suffix == ".onnx":
-            return self._get_onnx_inputs(executable_path)
-        else:
-            raise NotImplementedError(
-                f"Missing input reading function for {executable_suffix} models."
-            )
-
-    def _get_onnx_inputs(self, executable_path: str):
-        """Get inputs of an ONNX model executable.
-
-        @type executable_path: str
-        @param executable_path: Path to model executable file.
-        """
-
-        inputs_dict = {}
-        model = onnx.load(executable_path)
-        for input in model.graph.input:
-            tensor_type = input.type.tensor_type
-            dtype_idx = tensor_type.elem_type
-            dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
-            shape = []
-            for d in tensor_type.shape.dim:
-                if d.HasField("dim_value"):
-                    shape.append(d.dim_value)
-                else:
-                    raise ValueError("Unsupported input dimension identifier type")
-            if shape[1] == 3:
-                layout = "NCHW"
-            elif shape[3] == 3:
-                layout = "NHWC"
-            else:
-                raise ValueError("Unknown input layout")
-            inputs_dict[input.name] = {"dtype": dtype, "shape": shape, "layout": layout}
-        return inputs_dict
-
-    def _add_input(
-        self,
-        name: str,
-        dtype: str,
-        shape: list,
-        layout: str,
-        preprocessing: dict,
-        input_type: str = "image",
-    ) -> None:
-        """Add input to self.inputs.
-
-        @type name: str
-        @param name: Name of the input layer.
-        @type dtype: str
-        @param dtype: Data type of the input data (e.g., 'float32').
-        @type shape: list
-        @param shape: Shape of the input data as a list of integers (e.g. [H,W], [H,W,C], [BS,H,W,C], ...).
-        @type preprocessing: dict
-        @param preprocessing: Preprocessing steps applied to the input data.
-        @type layout: str
-        @param layout: Lettercode interpretation of the input data dimensions (e.g., 'NCHW').
-        @type input_type: str
-        @param input_type: Type of input data (e.g., 'image').
-        """
-
-        self.inputs.append(
-            {
-                "name": name,
-                "dtype": dtype,
-                "input_type": input_type,
-                "shape": shape,
-                "layout": layout,
-                "preprocessing": preprocessing,
-            }
-        )
-
-    def _get_outputs(self, executable_path):
-        """Get outputs of a model executable.
-
-        @type executable_path: str
-        @param executable_path: Path to model executable file.
-        """
-
-        _, executable_suffix = os.path.splitext(executable_path)
-        if executable_suffix == ".onnx":
-            return self._get_onnx_outputs(executable_path)
-        else:
-            raise NotImplementedError(
-                f"Missing input reading function for {executable_suffix} models."
-            )
-
-    def _get_onnx_outputs(self, executable_path):
-        """Get outputs of an ONNX model executable.
-
-        @type executable_path: str
-        @param executable_path: Path to model executable file.
-        """
-
-        outputs_dict = {}
-        model = onnx.load(executable_path)
-        for output in model.graph.output:
-            tensor_type = output.type.tensor_type
-            dtype_idx = tensor_type.elem_type
-            dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
-            outputs_dict[output.name] = {"dtype": dtype}
-        return outputs_dict
-
-    def _add_output(self, name: str, dtype: str) -> None:
-        """Add output to self.outputs.
-
-        @type name: str
-        @param name: Name of the output layer.
-        @type dtype: str
-        @param dtype: Data type of the output data (e.g., 'float32').
-        """
-
-        self.outputs.append({"name": name, "dtype": dtype})
-
-    def _get_classes(self, node_name: str, node_task: str | None) -> List[str]:
-        if not node_task:
-            match node_name:
-                case "ClassificationHead":
-                    node_task = "classification"
-                case "EfficientBBoxHead":
-                    node_task = "boundingbox"
-                case "SegmentationHead" | "BiSeNetHead":
-                    node_task = "segmentation"
-                case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead":
-                    node_task = "keypoints"
-                case _:
-                    raise ValueError("Node does not map to a default task.")
-
-        return self.dataset_metadata._classes.get(node_task, [])
-
-    def _get_head_specific_parameters(
-        self, head_name, head_alias, executable_path
-    ) -> dict:
-        """Get parameters specific to head.
-
-        @type head_name: str
-        @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
-        @type head_alias: str
-        @param head_alias: Alias of the head (e.g. 'detection_head').
-        @type executable_path: str
-        @param executable_path: Path to model executable file.
-        """
-
-        parameters = {}
-        if head_name == "ClassificationHead":
-            parameters["is_softmax"] = getattr(
-                ImplementedHeadsIsSoxtmaxed, head_name
-            ).value
-        elif head_name == "EfficientBBoxHead":
-            parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv6.value
-            head_node = self.lightning_module._modules["nodes"][head_alias]
-            parameters["iou_threshold"] = head_node.iou_thres
-            parameters["conf_threshold"] = head_node.conf_thres
-            parameters["max_det"] = head_node.max_det
-        elif head_name in ["SegmentationHead", "BiSeNetHead"]:
-            parameters["is_softmax"] = getattr(
-                ImplementedHeadsIsSoxtmaxed, head_name
-            ).value
-        elif head_name == "ImplicitKeypointBBoxHead":
-            parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value
-            head_node = self.lightning_module._modules["nodes"][head_alias]
-            parameters["iou_threshold"] = head_node.iou_thres
-            parameters["conf_threshold"] = head_node.conf_thres
-            parameters["max_det"] = head_node.max_det
-            parameters["n_keypoints"] = head_node.n_keypoints
-            parameters["anchors"] = head_node.anchors.tolist()
-        elif head_name == "EfficientKeypointBBoxHead":
-            # or appropriate subtype
-            head_node = self.lightning_module._modules["nodes"][head_alias]
-            parameters["iou_threshold"] = head_node.iou_thres
-            parameters["conf_threshold"] = head_node.conf_thres
-            parameters["max_det"] = head_node.max_det
-            parameters["n_keypoints"] = head_node.n_keypoints
-        else:
-            raise ValueError("Unknown head name")
-        return parameters
-
-    def _get_head_outputs(self, head_name) -> List[str]:
-        """Get model outputs in a head-specific format.
-
-        @type head_name: str
-        @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
-        """
-
-        if head_name == "ClassificationHead":
-            return [self.outputs[0]["name"]]
-        elif head_name == "EfficientBBoxHead":
-            return [output["name"] for output in self.outputs]
-        elif head_name in ["SegmentationHead", "BiSeNetHead"]:
-            return [self.outputs[0]["name"]]
-        elif head_name == "ImplicitKeypointBBoxHead":
-            return [self.outputs[0]["name"]]
-        elif head_name == "EfficientKeypointBBoxHead":
-            return [self.outputs[0]["name"]]
-        else:
-            raise ValueError("Unknown head name")
-
-    def _get_heads(self, executable_path):
-        """Get model heads.
-
-        @type executable_path: str
-        @param executable_path: Path to model executable file.
-        """
-        heads_dict = {}
-
-        for node in self.cfg.model.nodes:
-            node_name = node.name
-            node_alias = node.alias
-            # node_inputs = node.inputs
-            if node_alias in self.lightning_module.outputs:
-                if node_name in ImplementedHeads.__members__:
-                    parser = getattr(ImplementedHeads, node_name).value
-                    classes = self._get_classes(node_name, node.task)
-                    head_outputs = self._get_head_outputs(node_name)
-                    head_dict = {
-                        "parser": parser,
-                        "metadata": {
-                            "classes": classes,
-                            "n_classes": len(classes),
-                        },
-                        "outputs": head_outputs,
-                    }
-                    head_dict["metadata"].update(
-                        self._get_head_specific_parameters(
-                            node_name, node_alias, executable_path
-                        )
-                    )
-                    heads_dict[node_name] = head_dict
-        return heads_dict
-
-    def _add_head(self, head_metadata: dict) -> str:
-        """Add head to self.heads.
-
-        @type metadata: dict
-        @param metadata: Parameters required by head to run postprocessing.
-        """
-
-        self.heads.append(head_metadata)
-
-    def _upload(self):
-        """Uploads the archive file to specified s3 bucket.
-
-        @raises ValueError: If upload url was not specified in config file.
-        """
-
-        if self.cfg.archiver.upload_url is None:
-            raise ValueError("Upload url must be specified in config file.")
-
-        fs = LuxonisFileSystem(self.cfg.archiver.upload_url, allow_local=False)
-        logger.info(f"Started Archive upload to {fs.full_path}...")
-
-        fs.put_file(
-            local_path=self.archive_path,
-            remote_path=self.archive_name,
-        )
-
-        logger.info("Files upload finished")
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index b3c57935..44e254db 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -1,7 +1,8 @@
-import os
 import os.path as osp
-from contextlib import suppress
+import signal
+import threading
 from logging import getLogger
+from pathlib import Path
 from typing import Any
 
 import lightning.pytorch as pl
@@ -9,21 +10,35 @@
 import rich.traceback
 import torch
 import torch.utils.data as torch_data
-from lightning.pytorch.utilities import rank_zero_only  # type: ignore
+import yaml
+from lightning.pytorch.utilities import rank_zero_only
 from luxonis_ml.data import Augmentations
-from luxonis_ml.utils import reset_logging, setup_logging
+from luxonis_ml.nn_archive import ArchiveGenerator
+from luxonis_ml.nn_archive.config import CONFIG_VERSION
+from luxonis_ml.utils import LuxonisFileSystem, reset_logging, setup_logging
 
+from luxonis_train.attached_modules.visualizers import get_unnormalized_images
 from luxonis_train.callbacks import LuxonisProgressBar
+from luxonis_train.models import LuxonisLightningModule
 from luxonis_train.utils.config import Config
 from luxonis_train.utils.general import DatasetMetadata
 from luxonis_train.utils.loaders import collate_fn
 from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.tracker import LuxonisTrackerPL
 
+from .utils.export_utils import (
+    blobconverter_export,
+    get_preprocessing,
+    replace_weights,
+    try_onnx_simplify,
+)
+from .utils.infer_utils import render_visualizations
+from .utils.train_utils import create_trainer
+
 logger = getLogger(__name__)
 
 
-class Core:
+class LuxonisModel:
     """Common logic of the core components.
 
     This class contains common logic of the core components (trainer, evaluator,
@@ -52,20 +67,16 @@ def __init__(
         else:
             self.cfg = Config.get_config(cfg, opts)
 
-        if self.cfg.use_rich_text:
-            rich.traceback.install(suppress=[pl, torch], show_locals=False)
-
-        self.rank = rank_zero_only.rank
+        rich.traceback.install(suppress=[pl, torch], show_locals=False)
 
-        self.tracker = self._create_tracker()
-        # NOTE: tracker.experiment has to be called first in order
-        # for the run_id to be initialized
-        # TODO: it shouldn't be a property because of the above
-        with suppress(Exception):
-            _ = self.tracker.experiment
-        self._run_id = self.tracker.run_id
+        self.tracker = LuxonisTrackerPL(
+            rank=rank_zero_only.rank,
+            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
+            _auto_finalize=False,
+            **self.cfg.tracker.model_dump(),
+        )
 
-        self.run_save_dir = os.path.join(
+        self.run_save_dir = osp.join(
             self.cfg.tracker.save_directory, self.tracker.run_name
         )
         self.log_file = osp.join(self.run_save_dir, "luxonis_train.log")
@@ -73,10 +84,7 @@ def __init__(
         # NOTE: to add the file handler (we only get the save dir now,
         # but we want to use the logger before)
         reset_logging()
-        setup_logging(
-            use_rich=self.cfg.use_rich_text,
-            file=self.log_file,
-        )
+        setup_logging(file=self.log_file, use_rich=True)
 
         # NOTE: overriding logger in pl so it uses our logger to log device info
         rank_zero_module.log = logger
@@ -106,20 +114,11 @@ def __init__(
             only_normalize=True,
         )
 
-        self.pl_trainer = pl.Trainer(
-            accelerator=self.cfg.trainer.accelerator,
-            devices=self.cfg.trainer.devices,
-            strategy=self.cfg.trainer.strategy,
-            logger=self.tracker,  # type: ignore
-            max_epochs=self.cfg.trainer.epochs,
-            accumulate_grad_batches=self.cfg.trainer.accumulate_grad_batches,
-            check_val_every_n_epoch=self.cfg.trainer.validation_interval,
-            num_sanity_val_steps=self.cfg.trainer.num_sanity_val_steps,
-            profiler=self.cfg.trainer.profiler,  # for debugging purposes,
-            # NOTE: this is likely PL bug,
-            # should be configurable inside configure_callbacks(),
-            callbacks=LuxonisProgressBar() if self.cfg.use_rich_text else None,
+        self.pl_trainer = create_trainer(
+            self.cfg,
+            logger=self.tracker,
             deterministic=deterministic,
+            callbacks=LuxonisProgressBar(),
         )
 
         self.loaders = {
@@ -140,16 +139,9 @@ def __init__(
             for view in ["train", "val", "test"]
         }
         sampler = None
+        # TODO: implement weighted sampler
         if self.cfg.trainer.use_weighted_sampler:
-            classes_count = self.loaders["train"].get_classes()[1]
-            if len(classes_count) == 0:
-                logger.warning(
-                    "WeightedRandomSampler only available for classification tasks. Using default sampler instead."
-                )
-            else:
-                weights = [1 / i for i in classes_count.values()]
-                num_samples = sum(classes_count.values())
-                sampler = torch_data.WeightedRandomSampler(weights, num_samples)
+            raise NotImplementedError("Weighted sampler is not implemented yet.")
 
         self.pytorch_loaders = {
             view: torch_data.DataLoader(
@@ -170,28 +162,496 @@ def __init__(
         self.dataset_metadata = DatasetMetadata.from_loader(self.loaders["train"])
         self.dataset_metadata.set_loader(self.pytorch_loaders["train"])
 
-        self.cfg.save_data(os.path.join(self.run_save_dir, "config.yaml"))
+        self.cfg.save_data(osp.join(self.run_save_dir, "config.yaml"))
+
+        self.input_shapes = self.loaders["train"].input_shapes
+
+        self.lightning_module = LuxonisLightningModule(
+            cfg=self.cfg,
+            dataset_metadata=self.dataset_metadata,
+            save_dir=self.run_save_dir,
+            input_shapes=self.input_shapes,
+            _core=self,
+        )
+
+        self._exported_models: dict[str, Path] = {}
+
+    def _train(self, resume: str | None, *args, **kwargs):
+        status = "success"
+        try:
+            self.pl_trainer.fit(*args, ckpt_path=resume, **kwargs)
+        except Exception as e:
+            logger.exception("Encountered an exception during training.")
+            status = "failed"
+            raise e
+        finally:
+            self.tracker.upload_artifact(self.log_file, typ="logs")
+            self.tracker._finalize(status)
+
+    def train(
+        self, new_thread: bool = False, resume_weights: str | None = None
+    ) -> None:
+        """Runs training.
+
+        @type new_thread: bool
+        @param new_thread: Runs training in new thread if set to True.
+        @type resume_weights: str | None
+        @param resume_weights: Path to checkpoint to resume training from.
+        """
+
+        if self.cfg.trainer.matmul_precision is not None:
+            logger.info(
+                f"Setting matmul precision to {self.cfg.trainer.matmul_precision}"
+            )
+            torch.set_float32_matmul_precision(self.cfg.trainer.matmul_precision)
+
+        if resume_weights is not None:
+            resume_weights = str(
+                LuxonisFileSystem.download(resume_weights, self.run_save_dir)
+            )
+
+        def graceful_exit(signum: int, _):
+            logger.info(f"{signal.Signals(signum).name} received, stopping training...")
+            ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
+            self.pl_trainer.save_checkpoint(ckpt_path)
+            self.tracker.upload_artifact(
+                ckpt_path, typ="checkpoints", name="resume.ckpt"
+            )
+            self.tracker._finalize(status="failed")
+            exit(0)
+
+        signal.signal(signal.SIGTERM, graceful_exit)
+
+        if not new_thread:
+            logger.info(f"Checkpoints will be saved in: {self.run_save_dir}")
+            logger.info("Starting training...")
+            self._train(
+                resume_weights,
+                self.lightning_module,
+                self.pytorch_loaders["train"],
+                self.pytorch_loaders["val"],
+            )
+            logger.info("Training finished")
+            logger.info(f"Checkpoints saved in: {self.run_save_dir}")
+
+        else:
+            # Every time exception happens in the Thread, this hook will activate
+            def thread_exception_hook(args):
+                self.error_message = str(args.exc_value)
+
+            threading.excepthook = thread_exception_hook
+
+            self.thread = threading.Thread(
+                target=self._train,
+                args=(
+                    resume_weights,
+                    self.lightning_module,
+                    self.pytorch_loaders["train"],
+                    self.pytorch_loaders["val"],
+                ),
+                daemon=True,
+            )
+            self.thread.start()
+
+    def export(
+        self, onnx_save_path: str | None = None, *, weights: str | None = None
+    ) -> None:
+        """Runs export.
+
+        @type onnx_path: str | None
+        @param onnx_path: Path to .onnx model. If not specified, model will be saved
+            to export directory with name specified in config file.
+
+        @raises RuntimeError: If `onnxsim` fails to simplify the model.
+        """
+
+        weights = weights or self.cfg.model.weights
+
+        if weights is None:
+            logger.warning(
+                "No model weights specified. Exporting model without weights."
+            )
+
+        export_save_dir = Path(self.run_save_dir, "export")
+        export_save_dir.mkdir(parents=True, exist_ok=True)
+
+        export_path = export_save_dir / (self.cfg.exporter.name or self.cfg.model.name)
+        onnx_save_path = onnx_save_path or str(export_path.with_suffix(".onnx"))
+
+        with replace_weights(self.lightning_module, weights):
+            output_names = self.lightning_module.export_onnx(
+                onnx_save_path, **self.cfg.exporter.onnx.model_dump()
+            )
+
+        try_onnx_simplify(onnx_save_path)
+        self._exported_models["onnx"] = Path(onnx_save_path)
+
+        scale_values, mean_values, reverse_channels = get_preprocessing(self.cfg)
+
+        if self.cfg.exporter.blobconverter.active:
+            try:
+                blobconverter_export(
+                    self.cfg.exporter,
+                    scale_values,
+                    mean_values,
+                    reverse_channels,
+                    str(export_save_dir),
+                    onnx_save_path,
+                )
+                self._exported_models["blob"] = export_path.with_suffix(".blob")
+            except ImportError:
+                logger.error("Failed to import `blobconverter`")
+                logger.warning(
+                    "`blobconverter` not installed. Skipping .blob model conversion. "
+                    "Ensure `blobconverter` is installed in your environment."
+                )
+
+        if len(self.input_shapes) > 1:
+            logger.error(
+                "Generating modelconverter config for a model "
+                "with multiple inputs is not implemented yet."
+            )
+            return
+
+        modelconverter_config = {
+            "input_model": onnx_save_path,
+            "scale_values": scale_values,
+            "mean_values": mean_values,
+            "reverse_input_channels": reverse_channels,
+            "shape": [1, *next(iter(self.input_shapes.values()))],
+            "outputs": [{"name": name} for name in output_names],
+        }
+
+        for path in self._exported_models.values():
+            if self.cfg.exporter.upload_to_run:
+                self.tracker.upload_artifact(path, typ="export")
+            if self.cfg.exporter.upload_url is not None:
+                LuxonisFileSystem.upload(path, self.cfg.exporter.upload_url)
+
+        with open(export_path.with_suffix(".yaml"), "w") as f:
+            yaml.dump(modelconverter_config, f)
+            if self.cfg.exporter.upload_to_run:
+                self.tracker.upload_artifact(f.name, name=f.name, typ="export")
+            if self.cfg.exporter.upload_url is not None:
+                LuxonisFileSystem.upload(f.name, self.cfg.exporter.upload_url)
+
+    def test(self, new_thread: bool = False, view: str | None = None) -> None:
+        """Runs testing.
+
+        @type new_thread: bool
+        @param new_thread: Runs testing in new thread if set to True.
+        @type view: str | None
+        @param view: Which split to run the tests on. If unset, the value in
+            C{loader.test_view} will be used. Valid values are: 'train', 'val', 'test'.
+            Defauls to None.
+        """
+
+        view = view or self.cfg.loader.test_view
+
+        if view not in self.pytorch_loaders:
+            raise ValueError(
+                f"View {view} is not valid. Valid views are: 'train', 'val', 'test'."
+            )
+        loader = self.pytorch_loaders[view]
+
+        if not new_thread:
+            self.pl_trainer.test(self.lightning_module, loader)
+        else:
+            self.thread = threading.Thread(
+                target=self.pl_trainer.test,
+                args=(self.lightning_module, loader),
+                daemon=True,
+            )
+            self.thread.start()
+
+    def infer(self, view: str = "val", save_dir: str | Path | None = None) -> None:
+        """Runs inference.
+
+        @type view: str
+        @param view: Which split to run the inference on. Valid values are: 'train',
+            'val', 'test'. Defaults to "val".
+        @type save_dir: str | Path | None
+        @param save_dir: Directory where to save the visualizations. If not specified,
+            visualizations will be rendered on the screen.
+        """
+        self.lightning_module.eval()
+
+        if view not in self.pytorch_loaders:
+            raise ValueError(
+                f"View {view} is not valid. Valid views are: 'train', 'val', 'test'."
+            )
+        for inputs, labels in self.pytorch_loaders[view]:
+            images = get_unnormalized_images(self.cfg, inputs)
+            outputs = self.lightning_module.forward(
+                inputs, labels, images=images, compute_visualizations=True
+            )
+            render_visualizations(outputs.visualizations, save_dir)
+
+    def tune(self) -> None:
+        """Runs Optuna tunning of hyperparameters."""
+        import optuna
+        from optuna.integration import PyTorchLightningPruningCallback
+
+        from .utils.tune_utils import get_trial_params
+
+        def _objective(trial: optuna.trial.Trial) -> float:
+            """Objective function used to optimize Optuna study."""
+            cfg_tracker = self.cfg.tracker
+            tracker_params = cfg_tracker.model_dump()
+            child_tracker = LuxonisTrackerPL(
+                rank=rank_zero_only.rank,
+                mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
+                is_sweep=True,
+                **tracker_params,
+            )
 
-    def set_train_augmentations(self, aug: Augmentations) -> None:
-        """Sets augmentations used for training dataset."""
-        self.train_augmentations = aug
+            run_save_dir = osp.join(cfg_tracker.save_directory, child_tracker.run_name)
 
-    def set_val_augmentations(self, aug: Augmentations) -> None:
-        """Sets augmentations used for validation dataset."""
-        self.val_augmentations = aug
+            assert self.cfg.tuner is not None
+            curr_params = get_trial_params(all_augs, self.cfg.tuner.params, trial)
+            curr_params["model.predefined_model"] = None
 
-    def set_test_augmentations(self, aug: Augmentations) -> None:
-        """Sets augmentations used for test dataset."""
-        self.test_augmentations = aug
+            cfg_copy = self.cfg.model_copy(deep=True)
+            cfg_copy.trainer.preprocessing.augmentations = [
+                a
+                for a in cfg_copy.trainer.preprocessing.augmentations
+                if a.name != "Normalize"
+            ]  # manually remove Normalize so it doesn't duplicate it when creating new cfg instance
+            Config.clear_instance()
+            cfg = Config.get_config(cfg_copy.model_dump(), curr_params)
+
+            child_tracker.log_hyperparams(curr_params)
+
+            cfg.save_data(osp.join(run_save_dir, "config.yaml"))
+
+            lightning_module = LuxonisLightningModule(
+                cfg=cfg,
+                dataset_metadata=self.dataset_metadata,
+                save_dir=run_save_dir,
+                input_shapes=self.loaders["train"].input_shapes,
+                _core=self,
+            )
+            callbacks = [LuxonisProgressBar()]
+
+            pruner_callback = PyTorchLightningPruningCallback(trial, monitor="val/loss")
+            callbacks.append(pruner_callback)
+            deterministic = False
+            if self.cfg.trainer.seed:
+                pl.seed_everything(cfg.trainer.seed, workers=True)
+                deterministic = True
+
+            pl_trainer = create_trainer(
+                cfg,
+                logger=child_tracker,
+                callbacks=callbacks,
+                deterministic=deterministic,
+            )
+
+            try:
+                pl_trainer.fit(
+                    lightning_module,  # type: ignore
+                    self.pytorch_loaders["train"],
+                    self.pytorch_loaders["val"],
+                )
+                pruner_callback.check_pruned()
+
+            # Pruning is done by raising an error
+            except optuna.TrialPruned as e:
+                logger.info(e)
+
+            if "val/loss" not in pl_trainer.callback_metrics:
+                raise ValueError(
+                    "No validation loss found. "
+                    "This can happen if `TestOnTrainEnd` callback is used."
+                )
+
+            return pl_trainer.callback_metrics["val/loss"].item()
+
+        cfg_tuner = self.cfg.tuner
+        if cfg_tuner is None:
+            raise ValueError("You have to specify the `tuner` section in config.")
+
+        all_augs = [a.name for a in self.cfg.trainer.preprocessing.augmentations]
+        rank = rank_zero_only.rank
+        cfg_tracker = self.cfg.tracker
+        tracker_params = cfg_tracker.model_dump()
+        # NOTE: wandb doesn't allow multiple concurrent runs, handle this separately
+        tracker_params["is_wandb"] = False
+        self.parent_tracker = LuxonisTrackerPL(
+            rank=rank,
+            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
+            is_sweep=False,
+            **tracker_params,
+        )
+        if self.parent_tracker.is_mlflow:
+            # Experiment needs to be interacted with to create actual MLFlow run
+            self.parent_tracker.experiment["mlflow"].active_run()
+
+        logger.info("Starting tuning...")
+
+        pruner = (
+            optuna.pruners.MedianPruner()
+            if cfg_tuner.use_pruner
+            else optuna.pruners.NopPruner()
+        )
+
+        storage = None
+        if cfg_tuner.storage.active:
+            if cfg_tuner.storage.storage_type == "local":
+                storage = "sqlite:///study_local.db"
+            else:
+                storage = "postgresql://{}:{}@{}:{}/{}".format(
+                    self.cfg.ENVIRON.POSTGRES_USER,
+                    self.cfg.ENVIRON.POSTGRES_PASSWORD,
+                    self.cfg.ENVIRON.POSTGRES_HOST,
+                    self.cfg.ENVIRON.POSTGRES_PORT,
+                    self.cfg.ENVIRON.POSTGRES_DB,
+                )
+
+        study = optuna.create_study(
+            study_name=cfg_tuner.study_name,
+            storage=storage,
+            direction="minimize",
+            pruner=pruner,
+            load_if_exists=cfg_tuner.continue_existing_study,
+        )
+
+        study.optimize(
+            _objective, n_trials=cfg_tuner.n_trials, timeout=cfg_tuner.timeout
+        )
+
+        logger.info(f"Best study parameters: {study.best_params}")
+
+        self.parent_tracker.log_hyperparams(study.best_params)
+
+        if self.cfg.tracker.is_wandb:
+            # If wandb used then init parent tracker separately at the end
+            wandb_parent_tracker = LuxonisTrackerPL(
+                rank=rank_zero_only.rank,
+                **(
+                    self.cfg.tracker.model_dump()
+                    | {"run_name": self.parent_tracker.run_name}
+                ),
+            )
+            wandb_parent_tracker.log_hyperparams(study.best_params)
+
+    def archive(self, path: str | Path | None = None) -> Path:
+        """Generates an NN Archive out of a model executable.
+
+        @type path: str | Path | None
+        @param path: Path to the model executable. If not specified, the model will be
+            exported first.
+        @rtype: Path
+        @return: Path to the generated NN Archive.
+        """
+        from .utils.archive_utils import get_heads, get_inputs, get_outputs
+
+        archive_name = self.cfg.archiver.name or self.cfg.model.name
+        archive_save_directory = Path(self.run_save_dir, "archive")
+        archive_save_directory.mkdir(parents=True, exist_ok=True)
+        inputs = []
+        outputs = []
+        heads = []
+
+        if path is None:
+            if "onnx" not in self._exported_models:
+                logger.info("Exporting model to ONNX...")
+                self.export()
+            path = self._exported_models["onnx"]
+
+        path = Path(path)
+
+        executable_fname = path.name
+        archive_name += path.suffix
+
+        def _mult(lst: list[float | int]) -> list[float]:
+            return [round(x * 255.0, 5) for x in lst]
+
+        preprocessing = {  # TODO: keep preprocessing same for each input?
+            "mean": _mult(self.cfg.trainer.preprocessing.normalize.params["mean"]),
+            "scale": _mult(self.cfg.trainer.preprocessing.normalize.params["std"]),
+            "reverse_channels": self.cfg.trainer.preprocessing.train_rgb,
+            "interleaved_to_planar": False,  # TODO: make it modifiable?
+        }
+
+        inputs_dict = get_inputs(path)
+        for input_name in inputs_dict:
+            inputs.append(
+                {
+                    "name": input_name,
+                    "dtype": inputs_dict[input_name]["dtype"],
+                    "shape": inputs_dict[input_name]["shape"],
+                    "layout": inputs_dict[input_name]["layout"],
+                    "preprocessing": preprocessing,
+                    "input_type": "image",
+                }
+            )
+
+        outputs_dict = get_outputs(path)
+        for output_name in outputs_dict:
+            outputs.append(
+                {"name": output_name, "dtype": outputs_dict[output_name]["dtype"]}
+            )
+
+        heads_dict = get_heads(
+            self.cfg,
+            outputs,
+            self.loaders["train"].get_classes(),
+            self.lightning_module.nodes,  # type: ignore
+        )
+        for head_name in heads_dict:
+            heads.append(heads_dict[head_name])
+
+        model = {
+            "metadata": {
+                "name": self.cfg.model.name,
+                "path": executable_fname,
+            },
+            "inputs": inputs,
+            "outputs": outputs,
+            "heads": heads,
+        }
+
+        cfg_dict = {
+            "config_version": CONFIG_VERSION.__args__[0],  # type: ignore
+            "model": model,
+        }
+
+        archive_path = ArchiveGenerator(
+            archive_name=archive_name,
+            save_path=str(archive_save_directory),
+            cfg_dict=cfg_dict,
+            executables_paths=[str(path)],  # TODO: what if more executables?
+        ).make_archive()
+
+        logger.info(f"NN Archive saved to {archive_path}")
+
+        if self.cfg.archiver.upload_url is not None:
+            LuxonisFileSystem.upload(archive_path, self.cfg.archiver.upload_url)
+
+        if self.cfg.archiver.upload_to_run:
+            self.tracker.upload_artifact(archive_path, typ="archive")
+
+        return Path(archive_path)
 
     @rank_zero_only
-    def get_save_dir(self) -> str:
-        """Return path to directory where checkpoints are saved.
+    def get_status(self) -> tuple[int, int]:
+        """Get current status of training.
 
-        @rtype: str
-        @return: Save directory path
+        @rtype: tuple[int, int]
+        @return: First element is current epoch, second element is total number of
+            epochs.
+        """
+        return self.lightning_module.get_status()
+
+    @rank_zero_only
+    def get_status_percentage(self) -> float:
+        """Return percentage of current training, takes into account early stopping.
+
+        @rtype: float
+        @return: Percentage of current training in range 0-100.
         """
-        return self.run_save_dir
+        return self.lightning_module.get_status_percentage()
 
     @rank_zero_only
     def get_error_message(self) -> str | None:
@@ -203,7 +663,7 @@ def get_error_message(self) -> str | None:
         return self.error_message
 
     @rank_zero_only
-    def get_min_loss_checkpoint_path(self) -> str:
+    def get_min_loss_checkpoint_path(self) -> str | None:
         """Return best checkpoint path with respect to minimal validation loss.
 
         @rtype: str
@@ -212,24 +672,10 @@ def get_min_loss_checkpoint_path(self) -> str:
         return self.pl_trainer.checkpoint_callbacks[0].best_model_path  # type: ignore
 
     @rank_zero_only
-    def get_best_metric_checkpoint_path(self) -> str:
+    def get_best_metric_checkpoint_path(self) -> str | None:
         """Return best checkpoint path with respect to best validation metric.
 
         @rtype: str
         @return: Path to best checkpoint with respect to best validation metric
         """
         return self.pl_trainer.checkpoint_callbacks[1].best_model_path  # type: ignore
-
-    def reset_logging(self) -> None:
-        """Close file handlers to release the log file."""
-        reset_logging()
-
-    def _create_tracker(self, run_id: str | None = None) -> LuxonisTrackerPL:
-        kwargs = self.cfg.tracker.model_dump()
-        if run_id is not None:
-            kwargs["run_id"] = run_id
-        return LuxonisTrackerPL(
-            rank=self.rank,
-            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
-            **kwargs,
-        )
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
deleted file mode 100644
index 7d941cb3..00000000
--- a/luxonis_train/core/exporter.py
+++ /dev/null
@@ -1,217 +0,0 @@
-import os
-import tempfile
-from logging import getLogger
-from pathlib import Path
-from typing import Any
-
-import onnx
-import yaml
-from luxonis_ml.utils import LuxonisFileSystem
-from torch import Size
-
-from luxonis_train.models import LuxonisModel
-from luxonis_train.utils.config import Config
-
-from .core import Core
-
-logger = getLogger(__name__)
-
-
-class Exporter(Core):
-    def __init__(
-        self,
-        cfg: str | dict[str, Any] | Config | None = None,
-        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
-    ):
-        """Provides an interface for exporting models to .onnx and .blob formats.
-
-        @type cfg: str | dict[str, Any] | Config
-        @param cfg: Path to config file or config dict used to setup training.
-
-        @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
-        @param opts: Argument dict provided through command line,
-            used for config overriding.
-        """
-
-        super().__init__(cfg, opts)
-
-        input_shape = self.cfg.exporter.input_shape
-        if self.cfg.model.weights is None:
-            logger.warning(
-                "No model weights specified. Exporting model without weights."
-            )
-        self.local_path = self.cfg.model.weights
-        if input_shape is None:
-            self.input_shape = self.loaders["val"].input_shape
-        else:
-            self.input_shape = Size(input_shape)
-
-        export_path = (
-            Path(self.cfg.exporter.export_save_directory)
-            / self.cfg.exporter.export_model_name
-        )
-
-        if not export_path.parent.exists():
-            logger.info(f"Creating export directory {export_path.parent}")
-            export_path.parent.mkdir(parents=True, exist_ok=True)
-        self.export_path = str(export_path)
-
-        normalize_params = self.cfg.trainer.preprocessing.normalize.params
-        if self.cfg.exporter.scale_values is not None:
-            self.scale_values = self.cfg.exporter.scale_values
-        else:
-            self.scale_values = normalize_params.get("std", None)
-            if self.scale_values:
-                self.scale_values = (
-                    [i * 255 for i in self.scale_values]
-                    if isinstance(self.scale_values, list)
-                    else self.scale_values * 255
-                )
-
-        if self.cfg.exporter.mean_values is not None:
-            self.mean_values = self.cfg.exporter.mean_values
-        else:
-            self.mean_values = normalize_params.get("mean", None)
-            if self.mean_values:
-                self.mean_values = (
-                    [i * 255 for i in self.mean_values]
-                    if isinstance(self.mean_values, list)
-                    else self.mean_values * 255
-                )
-
-        self.lightning_module = LuxonisModel(
-            cfg=self.cfg,
-            save_dir=self.run_save_dir,
-            input_shape=self.input_shape,
-            dataset_metadata=self.dataset_metadata,
-            _core=self,
-        )
-
-    def _get_modelconverter_config(self, onnx_path: str) -> dict[str, Any]:
-        """Generates export config from input config that is compatible with Luxonis
-        modelconverter tool.
-
-        @type onnx_path: str
-        @param onnx_path: Path to .onnx model
-        @rtype: dict[str, Any]
-        @return: Export config.
-        """
-        return {
-            "input_model": onnx_path,
-            "scale_values": self.scale_values,
-            "mean_values": self.mean_values,
-            "reverse_input_channels": self.cfg.exporter.reverse_input_channels,
-            "use_bgr": not self.cfg.trainer.preprocessing.train_rgb,
-            "input_shape": list(self.input_shape),
-            "data_type": self.cfg.exporter.data_type,
-            "output": [{"name": name} for name in self.output_names],
-            "meta": {"description": self.cfg.model.name},
-        }
-
-    def export(self, onnx_path: str | None = None):
-        """Runs export.
-
-        @type onnx_path: str | None
-        @param onnx_path: Path to .onnx model. If not specified, model will be saved
-            to export directory with name specified in config file.
-
-        @raises RuntimeError: If `onnxsim` fails to simplify the model.
-        """
-        onnx_path = onnx_path or self.export_path + ".onnx"
-        self.output_names = self.lightning_module.export_onnx(
-            onnx_path, **self.cfg.exporter.onnx.model_dump()
-        )
-
-        try:
-            import onnxsim
-
-            logger.info("Simplifying ONNX model...")
-            model_onnx = onnx.load(onnx_path)
-            onnx_model, check = onnxsim.simplify(model_onnx)
-            if not check:
-                raise RuntimeError("ONNX simplify failed.")
-            onnx.save(onnx_model, onnx_path)
-            logger.info(f"ONNX model saved to {onnx_path}")
-
-        except ImportError:
-            logger.error("Failed to import `onnxsim`")
-            logger.warning(
-                "`onnxsim` not installed. Skipping ONNX model simplification. "
-                "Ensure `onnxsim` is installed in your environment."
-            )
-
-        files_to_upload = [self.local_path, onnx_path]
-
-        if self.cfg.exporter.blobconverter.active:
-            try:
-                import blobconverter
-
-                logger.info("Converting ONNX to .blob")
-
-                optimizer_params = []
-                if self.scale_values:
-                    optimizer_params.append(f"--scale_values={self.scale_values}")
-                if self.mean_values:
-                    optimizer_params.append(f"--mean_values={self.mean_values}")
-                if self.cfg.exporter.reverse_input_channels:
-                    optimizer_params.append("--reverse_input_channels")
-
-                blob_path = blobconverter.from_onnx(
-                    model=onnx_path,
-                    optimizer_params=optimizer_params,
-                    data_type=self.cfg.exporter.data_type,
-                    shaves=self.cfg.exporter.blobconverter.shaves,
-                    version=self.cfg.exporter.blobconverter.version,
-                    use_cache=False,
-                    output_dir=self.export_path,
-                )
-                files_to_upload.append(blob_path)
-                logger.info(f".blob model saved to {blob_path}")
-
-            except ImportError:
-                logger.error("Failed to import `blobconverter`")
-                logger.warning(
-                    "`blobconverter` not installed. Skipping .blob model conversion. "
-                    "Ensure `blobconverter` is installed in your environment."
-                )
-
-        if self.cfg.exporter.upload_url is not None:
-            self._upload(files_to_upload)
-
-    def _upload(self, files_to_upload: list[str]):
-        """Uploads .pt, .onnx and current config.yaml to specified s3 bucket.
-
-        @type files_to_upload: list[str]
-        @param files_to_upload: List of files to upload.
-        @raises ValueError: If upload url was not specified in config file.
-        """
-
-        if self.cfg.exporter.upload_url is None:
-            raise ValueError("Upload url must be specified in config file.")
-
-        fs = LuxonisFileSystem(self.cfg.exporter.upload_url, allow_local=False)
-        logger.info(f"Started upload to {fs.full_path}...")
-
-        for file in files_to_upload:
-            suffix = Path(file).suffix
-            fs.put_file(
-                local_path=file,
-                remote_path=self.cfg.exporter.export_model_name + suffix,
-            )
-
-        with tempfile.NamedTemporaryFile(prefix="config", suffix=".yaml") as f:
-            self.cfg.save_data(f.name)
-            fs.put_file(local_path=f.name, remote_path="config.yaml")
-
-        onnx_path = os.path.join(
-            fs.full_path, f"{self.cfg.exporter.export_model_name}.onnx"
-        )
-        modelconverter_config = self._get_modelconverter_config(onnx_path)
-
-        with tempfile.NamedTemporaryFile(
-            prefix="config_export", suffix=".yaml", mode="w+"
-        ) as f:
-            yaml.dump(modelconverter_config, f, default_flow_style=False)
-            fs.put_file(local_path=f.name, remote_path="config_export.yaml")
-
-        logger.info("Files upload finished")
diff --git a/luxonis_train/core/inferer.py b/luxonis_train/core/inferer.py
deleted file mode 100644
index 80a89d35..00000000
--- a/luxonis_train/core/inferer.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from pathlib import Path
-from typing import Any, Literal
-
-import cv2
-
-from luxonis_train.attached_modules.visualizers import get_unnormalized_images
-from luxonis_train.utils.config import Config
-
-from .trainer import Trainer
-
-
-class Inferer(Trainer):
-    def __init__(
-        self,
-        cfg: str | dict[str, Any] | Config | None = None,
-        opts: list[str] | tuple[str, ...] | None = None,
-        view: Literal["train", "test", "val"] = "val",
-        save_dir: str | Path | None = None,
-    ):
-        opts = list(opts or [])
-        opts += ["trainer.batch_size", "1"]
-        super().__init__(cfg, opts)
-        if view == "train":
-            self.loader = self.pytorch_loaders["train"]
-        elif view == "test":
-            self.loader = self.pytorch_loaders["test"]
-        else:
-            self.loader = self.pytorch_loaders["val"]
-        self.save_dir = Path(save_dir) if save_dir is not None else None
-        if self.save_dir is not None:
-            self.save_dir.mkdir(exist_ok=True, parents=True)
-
-    def infer(self) -> None:
-        self.lightning_module.eval()
-        k = 0
-        for inputs, labels in self.loader:
-            images = get_unnormalized_images(self.cfg, inputs)
-            outputs = self.lightning_module.forward(
-                inputs, labels, images=images, compute_visualizations=True
-            )
-
-            for node_name, visualizations in outputs.visualizations.items():
-                for viz_name, viz_batch in visualizations.items():
-                    for i, viz in enumerate(viz_batch):
-                        viz_arr = viz.detach().cpu().numpy().transpose(1, 2, 0)
-                        viz_arr = cv2.cvtColor(viz_arr, cv2.COLOR_RGB2BGR)
-                        name = f"{node_name}/{viz_name}/{i}"
-                        if self.save_dir is not None:
-                            name = name.replace("/", "_")
-                            cv2.imwrite(str(self.save_dir / f"{name}_{k}.png"), viz_arr)
-                            k += 1
-                        else:
-                            cv2.imshow(name, viz_arr)
-            if self.save_dir is None:
-                if cv2.waitKey(0) == ord("q"):
-                    exit()
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
deleted file mode 100644
index 9cdd9dfe..00000000
--- a/luxonis_train/core/trainer.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import os.path as osp
-import signal
-import threading
-from logging import getLogger
-from typing import Any, Literal
-
-import torch
-from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-from luxonis_ml.utils import LuxonisFileSystem
-
-from luxonis_train.models import LuxonisModel
-from luxonis_train.utils.config import Config
-from luxonis_train.utils.tracker import LuxonisTrackerPL
-
-from .core import Core
-
-logger = getLogger(__name__)
-
-
-class Trainer(Core):
-    """Main API which is used to create the model, setup pytorch lightning environment
-    and perform training based on provided arguments and config."""
-
-    def __init__(
-        self,
-        cfg: str | dict[str, Any] | Config | None = None,
-        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
-        resume: str | None = None,
-    ):
-        """Constructs a new Trainer instance.
-
-        @type cfg: str | dict[str, Any] | Config
-        @param cfg: Path to config file or config dict used to setup training.
-
-        @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
-        @param opts: Argument dict provided through command line,
-            used for config overriding.
-
-        @type resume: str | None
-        @param resume: Training will resume from this checkpoint.
-        """
-        super().__init__(cfg, opts)
-
-        if self.cfg.trainer.matmul_precision is not None:
-            torch.set_float32_matmul_precision(self.cfg.trainer.matmul_precision)
-
-        if resume is not None:
-            self.resume = str(LuxonisFileSystem.download(resume, self.run_save_dir))
-        else:
-            self.resume = None
-
-        self.lightning_module = LuxonisModel(
-            cfg=self.cfg,
-            dataset_metadata=self.dataset_metadata,
-            save_dir=self.run_save_dir,
-            input_shape=self.loaders["train"].input_shape,
-            _core=self,
-        )
-
-        def graceful_exit(signum: int, _):
-            logger.info(f"{signal.Signals(signum).name} received, stopping training...")
-            ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
-            self.pl_trainer.save_checkpoint(ckpt_path)
-            tracker = self._create_tracker(self._run_id)
-            self._upload_logs(tracker)
-
-            if self.cfg.tracker.is_mlflow:
-                logger.info("Uploading checkpoint to MLFlow.")
-                fs = LuxonisFileSystem(
-                    "mlflow://",
-                    allow_active_mlflow_run=True,
-                    allow_local=False,
-                )
-                fs.put_file(
-                    local_path=ckpt_path,
-                    remote_path="resume.ckpt",
-                    mlflow_instance=tracker.experiment.get("mlflow"),
-                )
-
-            exit(0)
-
-        signal.signal(signal.SIGTERM, graceful_exit)
-
-    def _upload_logs(self, tracker: LuxonisTrackerPL | None = None) -> None:
-        tracker = tracker or self.tracker
-        if self.cfg.tracker.is_mlflow:
-            logger.info("Uploading logs to MLFlow.")
-            fs = LuxonisFileSystem(
-                "mlflow://",
-                allow_active_mlflow_run=True,
-                allow_local=False,
-            )
-            fs.put_file(
-                local_path=self.log_file,
-                remote_path="luxonis_train.log",
-                mlflow_instance=tracker.experiment.get("mlflow"),
-            )
-
-    def _trainer_fit(self, *args, **kwargs):
-        try:
-            self.pl_trainer.fit(*args, ckpt_path=self.resume, **kwargs)
-        except Exception:
-            logger.exception("Encountered exception during training.")
-        finally:
-            self._upload_logs(self._create_tracker(self._run_id))
-
-    def train(self, new_thread: bool = False) -> None:
-        """Runs training.
-
-        @type new_thread: bool
-        @param new_thread: Runs training in new thread if set to True.
-        """
-        if not new_thread:
-            logger.info(f"Checkpoints will be saved in: {self.get_save_dir()}")
-            logger.info("Starting training...")
-            self._trainer_fit(
-                self.lightning_module,
-                self.pytorch_loaders["train"],
-                self.pytorch_loaders["val"],
-            )
-            logger.info("Training finished")
-            logger.info(f"Checkpoints saved in: {self.get_save_dir()}")
-
-        else:
-            # Every time exception happens in the Thread, this hook will activate
-            def thread_exception_hook(args):
-                self.error_message = str(args.exc_value)
-
-            threading.excepthook = thread_exception_hook
-
-            self.thread = threading.Thread(
-                target=self._trainer_fit,
-                args=(
-                    self.lightning_module,
-                    self.pytorch_loaders["train"],
-                    self.pytorch_loaders["val"],
-                ),
-                daemon=True,
-            )
-            self.thread.start()
-
-    def test(
-        self, new_thread: bool = False, view: Literal["train", "val", "test"] = "test"
-    ) -> None:
-        """Runs testing.
-
-        @type new_thread: bool
-        @param new_thread: Runs testing in new thread if set to True.
-        """
-
-        if view == "test":
-            loader = self.pytorch_loaders["test"]
-        elif view == "val":
-            loader = self.pytorch_loaders["val"]
-        elif view == "train":
-            loader = self.pytorch_loaders["train"]
-
-        if not new_thread:
-            self.pl_trainer.test(self.lightning_module, loader)
-        else:
-            self.thread = threading.Thread(
-                target=self.pl_trainer.test,
-                args=(self.lightning_module, loader),
-                daemon=True,
-            )
-            self.thread.start()
-
-    @rank_zero_only
-    def get_status(self) -> tuple[int, int]:
-        """Get current status of training.
-
-        @rtype: tuple[int, int]
-        @return: First element is current epoch, second element is total number of
-            epochs.
-        """
-        return self.lightning_module.get_status()
-
-    @rank_zero_only
-    def get_status_percentage(self) -> float:
-        """Return percentage of current training, takes into account early stopping.
-
-        @rtype: float
-        @return: Percentage of current training in range 0-100.
-        """
-        return self.lightning_module.get_status_percentage()
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
deleted file mode 100644
index 67ec953b..00000000
--- a/luxonis_train/core/tuner.py
+++ /dev/null
@@ -1,267 +0,0 @@
-import os.path as osp
-import random
-from logging import getLogger
-from typing import Any
-
-import lightning.pytorch as pl
-import optuna
-from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-from optuna.integration import PyTorchLightningPruningCallback
-
-from luxonis_train.callbacks import LuxonisProgressBar
-from luxonis_train.models import LuxonisModel
-from luxonis_train.utils import Config
-from luxonis_train.utils.tracker import LuxonisTrackerPL
-
-from .core import Core
-
-logger = getLogger(__name__)
-
-
-class Tuner(Core):
-    def __init__(
-        self,
-        cfg: str | dict[str, Any] | Config | None = None,
-        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
-    ):
-        """Main API which is used to perform hyperparameter tunning.
-
-        @type cfg: str | dict[str, Any] | Config
-        @param cfg: Path to config file or config dict used to setup training.
-
-        @type args: list[str] | tuple[str, ...] | None
-        @param args: Argument dict provided through command line,
-            used for config overriding.
-        """
-        super().__init__(cfg, opts)
-        if self.cfg.tuner is None:
-            raise ValueError("You have to specify the `tuner` section in config.")
-        self.tune_cfg = self.cfg.tuner
-
-        # Parent tracker that only logs the best study parameters at the end
-        rank = rank_zero_only.rank
-        cfg_tracker = self.cfg.tracker
-        tracker_params = cfg_tracker.model_dump()
-        tracker_params[
-            "is_wandb"
-        ] = False  # wandb doesn't allow multiple concurrent runs, handle this separately
-        self.parent_tracker = LuxonisTrackerPL(
-            rank=rank,
-            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
-            is_sweep=False,
-            **tracker_params,
-        )
-        if self.parent_tracker.is_mlflow:
-            # Experiment needs to be interacted with to create actual MLFlow run
-            self.parent_tracker.experiment["mlflow"].active_run()
-
-    def tune(self) -> None:
-        """Runs Optuna tunning of hyperparameters."""
-        logger.info("Starting tuning...")
-
-        pruner = (
-            optuna.pruners.MedianPruner()
-            if self.tune_cfg.use_pruner
-            else optuna.pruners.NopPruner()
-        )
-
-        storage = None
-        if self.tune_cfg.storage.active:
-            if self.tune_cfg.storage.storage_type == "local":
-                storage = "sqlite:///study_local.db"
-            else:
-                storage = "postgresql://{}:{}@{}:{}/{}".format(
-                    self.cfg.ENVIRON.POSTGRES_USER,
-                    self.cfg.ENVIRON.POSTGRES_PASSWORD,
-                    self.cfg.ENVIRON.POSTGRES_HOST,
-                    self.cfg.ENVIRON.POSTGRES_PORT,
-                    self.cfg.ENVIRON.POSTGRES_DB,
-                )
-
-        study = optuna.create_study(
-            study_name=self.tune_cfg.study_name,
-            storage=storage,
-            direction="minimize",
-            pruner=pruner,
-            load_if_exists=self.tune_cfg.continue_existing_study,
-        )
-
-        study.optimize(
-            self._objective,
-            n_trials=self.tune_cfg.n_trials,
-            timeout=self.tune_cfg.timeout,
-        )
-
-        best_study_params = study.best_params
-        logger.info(f"Best study parameters: {best_study_params}")
-
-        self.parent_tracker.log_hyperparams(best_study_params)
-
-        if self.cfg.tracker.is_wandb:
-            # If wandb used then init parent tracker separately at the end
-            wandb_parent_tracker = LuxonisTrackerPL(
-                project_name=self.cfg.tracker.project_name,
-                project_id=self.cfg.tracker.project_id,
-                run_name=self.parent_tracker.run_name,
-                save_directory=self.cfg.tracker.save_directory,
-                is_wandb=True,
-                wandb_entity=self.cfg.tracker.wandb_entity,
-                rank=rank_zero_only.rank,
-            )
-            wandb_parent_tracker.log_hyperparams(best_study_params)
-
-    def _objective(self, trial: optuna.trial.Trial) -> float:
-        """Objective function used to optimize Optuna study."""
-        rank = rank_zero_only.rank
-        cfg_tracker = self.cfg.tracker
-        tracker_params = cfg_tracker.model_dump()
-        child_tracker = LuxonisTrackerPL(
-            rank=rank,
-            mlflow_tracking_uri=self.cfg.ENVIRON.MLFLOW_TRACKING_URI,
-            is_sweep=True,
-            **tracker_params,
-        )
-
-        run_save_dir = osp.join(cfg_tracker.save_directory, child_tracker.run_name)
-
-        curr_params = self._get_trial_params(trial)
-        curr_params["model.predefined_model"] = None
-
-        cfg_copy = self.cfg.model_copy(deep=True)
-        cfg_copy.trainer.preprocessing.augmentations = [
-            a
-            for a in cfg_copy.trainer.preprocessing.augmentations
-            if a.name != "Normalize"
-        ]  # manually remove Normalize so it doesn't duplicate it when creating new cfg instance
-        Config.clear_instance()
-        cfg = Config.get_config(cfg_copy.model_dump(), curr_params)
-
-        child_tracker.log_hyperparams(curr_params)
-
-        cfg.save_data(osp.join(run_save_dir, "config.yaml"))
-
-        lightning_module = LuxonisModel(
-            cfg=cfg,
-            dataset_metadata=self.dataset_metadata,
-            save_dir=run_save_dir,
-            input_shape=self.loaders["train"].input_shape,
-            _core=self,
-        )
-        callbacks: list[pl.Callback] = (
-            [LuxonisProgressBar()] if self.cfg.use_rich_text else []
-        )
-        pruner_callback = PyTorchLightningPruningCallback(trial, monitor="val/loss")
-        callbacks.append(pruner_callback)
-        deterministic = False
-        if self.cfg.trainer.seed:
-            pl.seed_everything(cfg.trainer.seed, workers=True)
-            deterministic = True
-
-        pl_trainer = pl.Trainer(
-            accelerator=cfg.trainer.accelerator,
-            devices=cfg.trainer.devices,
-            strategy=cfg.trainer.strategy,
-            logger=child_tracker,  # type: ignore
-            max_epochs=cfg.trainer.epochs,
-            accumulate_grad_batches=cfg.trainer.accumulate_grad_batches,
-            check_val_every_n_epoch=cfg.trainer.validation_interval,
-            num_sanity_val_steps=cfg.trainer.num_sanity_val_steps,
-            profiler=cfg.trainer.profiler,
-            callbacks=callbacks,
-            deterministic=deterministic,
-        )
-
-        try:
-            pl_trainer.fit(
-                lightning_module,  # type: ignore
-                self.pytorch_loaders["val"],
-                self.pytorch_loaders["train"],
-            )
-
-            pruner_callback.check_pruned()
-
-        except optuna.TrialPruned as e:
-            # Pruning is done by raising an error
-            logger.info(e)
-
-        if "val/loss" not in pl_trainer.callback_metrics:
-            raise ValueError(
-                "No validation loss found. "
-                "This can happen if `TestOnTrainEnd` callback is used."
-            )
-
-        return pl_trainer.callback_metrics["val/loss"].item()
-
-    def _get_trial_params(self, trial: optuna.trial.Trial) -> dict[str, Any]:
-        """Get trial params based on specified config."""
-        cfg_tuner = self.tune_cfg.params
-        new_params = {}
-        for key, value in cfg_tuner.items():
-            key_info = key.split("_")
-            key_name = "_".join(key_info[:-1])
-            key_type = key_info[-1]
-            match key_type, value:
-                case "subset", [list(whole_set), int(subset_size)]:
-                    if key_name.split(".")[-1] != "augmentations":
-                        raise ValueError(
-                            "Subset sampling currently only supported for augmentations"
-                        )
-                    whole_set_indices = self._augs_to_indices(whole_set)
-                    subset = random.sample(whole_set_indices, subset_size)
-                    for aug_id in whole_set_indices:
-                        new_params[f"{key_name}.{aug_id}.active"] = (
-                            True if aug_id in subset else False
-                        )
-                    continue
-                case "categorical", list(lst):
-                    new_value = trial.suggest_categorical(key_name, lst)
-                case "float", [float(low), float(high), *tail]:
-                    step = tail[0] if tail else None
-                    if step is not None and not isinstance(step, float):
-                        raise ValueError(
-                            f"Step for float type must be float, but got {step}"
-                        )
-                    new_value = trial.suggest_float(key_name, low, high, step=step)
-                case "int", [int(low), int(high), *tail]:
-                    step = tail[0] if tail else 1
-                    if not isinstance(step, int):
-                        raise ValueError(
-                            f"Step for int type must be int, but got {step}"
-                        )
-                    new_value = trial.suggest_int(key_name, low, high, step=step)
-                case "loguniform", [float(low), float(high)]:
-                    new_value = trial.suggest_loguniform(key_name, low, high)
-                case "uniform", [float(low), float(high)]:
-                    new_value = trial.suggest_uniform(key_name, low, high)
-                case _, _:
-                    raise KeyError(
-                        f"Combination of {key_type} and {value} not supported"
-                    )
-
-            new_params[key_name] = new_value
-
-        if len(new_params) == 0:
-            raise ValueError(
-                "No paramteres to tune. Specify them under `tuner.params`."
-            )
-        return new_params
-
-    def _augs_to_indices(self, aug_names: list[str]) -> list[int]:
-        """Maps augmentation names to indices."""
-        all_augs = [a.name for a in self.cfg.trainer.preprocessing.augmentations]
-        aug_indices = []
-        for aug_name in aug_names:
-            if aug_name == "Normalize":
-                logger.warn(
-                    f"'{aug_name}' should be tuned directly by adding '...normalize.active_categorical' to the tuner params, skipping."
-                )
-                continue
-            try:
-                index = all_augs.index(aug_name)
-                aug_indices.append(index)
-            except ValueError:
-                logger.warn(
-                    f"Augmentation '{aug_name}' not found under trainer augemntations, skipping."
-                )
-                continue
-        return aug_indices
diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py
new file mode 100644
index 00000000..6f85f4a3
--- /dev/null
+++ b/luxonis_train/core/utils/archive_utils.py
@@ -0,0 +1,219 @@
+from pathlib import Path
+
+import onnx
+from luxonis_ml.nn_archive.config_building_blocks import ObjectDetectionSubtypeYOLO
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.enums.head_categorization import (
+    ImplementedHeads,
+    ImplementedHeadsIsSoxtmaxed,
+)
+from luxonis_train.utils.config import Config
+
+
+def get_inputs(path: Path):
+    """Get inputs of a model executable.
+
+    @type path: Path
+    @param path: Path to model executable file.
+    """
+
+    if path.suffix == ".onnx":
+        return _get_onnx_inputs(str(path))
+    else:
+        raise NotImplementedError(
+            f"Missing input reading function for {path.suffix} models."
+        )
+
+
+def _get_onnx_inputs(path: str) -> dict:
+    """Get inputs of an ONNX model executable.
+
+    @type path: str
+    @param path: Path to model executable file.
+    """
+
+    inputs_dict = {}
+    model = onnx.load(path)
+    for input in model.graph.input:
+        tensor_type = input.type.tensor_type
+        dtype_idx = tensor_type.elem_type
+        dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
+        shape = []
+        for d in tensor_type.shape.dim:
+            if d.HasField("dim_value"):
+                shape.append(d.dim_value)
+            else:
+                raise ValueError("Unsupported input dimension identifier type")
+        if shape[1] == 3:
+            layout = "NCHW"
+        elif shape[3] == 3:
+            layout = "NHWC"
+        else:
+            raise ValueError("Unknown input layout")
+        inputs_dict[input.name] = {"dtype": dtype, "shape": shape, "layout": layout}
+    return inputs_dict
+
+
+def get_outputs(path: Path) -> dict:
+    """Get outputs of a model executable.
+
+    @type path: Path
+    @param path: Path to model executable file.
+    """
+
+    if path.suffix == ".onnx":
+        return _get_onnx_outputs(str(path))
+    else:
+        raise NotImplementedError(
+            f"Missing input reading function for {path.suffix} models."
+        )
+
+
+def _get_onnx_outputs(path: str) -> dict:
+    """Get outputs of an ONNX model executable.
+
+    @type executable_path: str
+    @param executable_path: Path to model executable file.
+    """
+
+    outputs_dict = {}
+    model = onnx.load(path)
+    for output in model.graph.output:
+        tensor_type = output.type.tensor_type
+        dtype_idx = tensor_type.elem_type
+        dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
+        outputs_dict[output.name] = {"dtype": dtype}
+    return outputs_dict
+
+
+def _get_classes(
+    node_name: str, node_task: str | None, classes: dict[str, list[str]]
+) -> list[str]:
+    if not node_task:
+        match node_name:
+            case "ClassificationHead":
+                node_task = "classification"
+            case "EfficientBBoxHead":
+                node_task = "boundingbox"
+            case "SegmentationHead" | "BiSeNetHead":
+                node_task = "segmentation"
+            case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead":
+                node_task = "keypoints"
+            case _:
+                raise ValueError("Node does not map to a default task.")
+
+    return classes.get(node_task, [])
+
+
+def _get_head_specific_parameters(
+    nodes: dict[str, BaseNode], head_name: str, head_alias: str
+) -> dict:
+    """Get parameters specific to head.
+
+    @type nodes: dict[str, BaseNode]
+    @param nodes: Dictionary of nodes.
+    @type head_name: str
+    @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
+    @type head_alias: str
+    @param head_alias: Alias of the head (e.g. 'detection_head').
+    """
+
+    parameters = {}
+    if head_name == "ClassificationHead":
+        parameters["is_softmax"] = getattr(ImplementedHeadsIsSoxtmaxed, head_name).value
+    elif head_name == "EfficientBBoxHead":
+        parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv6.value
+        head_node = nodes[head_alias]
+        parameters["iou_threshold"] = head_node.iou_thres
+        parameters["conf_threshold"] = head_node.conf_thres
+        parameters["max_det"] = head_node.max_det
+    elif head_name in ["SegmentationHead", "BiSeNetHead"]:
+        parameters["is_softmax"] = getattr(ImplementedHeadsIsSoxtmaxed, head_name).value
+    elif head_name == "ImplicitKeypointBBoxHead":
+        parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value
+        head_node = nodes[head_alias]
+        parameters["iou_threshold"] = head_node.iou_thres
+        parameters["conf_threshold"] = head_node.conf_thres
+        parameters["max_det"] = head_node.max_det
+        parameters["n_keypoints"] = head_node.n_keypoints
+        parameters["anchors"] = head_node.anchors.tolist()
+    elif head_name == "EfficientKeypointBBoxHead":
+        # or appropriate subtype
+        head_node = nodes[head_alias]
+        parameters["iou_threshold"] = head_node.iou_thres
+        parameters["conf_threshold"] = head_node.conf_thres
+        parameters["max_det"] = head_node.max_det
+        parameters["n_keypoints"] = head_node.n_keypoints
+    else:
+        raise ValueError("Unknown head name")
+    return parameters
+
+
+def _get_head_outputs(outputs: list[dict], head_name: str) -> list[str]:
+    """Get model outputs in a head-specific format.
+
+    @type head_name: str
+    @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
+    @rtype: list[str]
+    @return: List of output names.
+    """
+
+    if head_name == "ClassificationHead":
+        return [outputs[0]["name"]]
+    elif head_name == "EfficientBBoxHead":
+        return [output["name"] for output in outputs]
+    elif head_name in ["SegmentationHead", "BiSeNetHead"]:
+        return [outputs[0]["name"]]
+    elif head_name == "ImplicitKeypointBBoxHead":
+        return [outputs[0]["name"]]
+    elif head_name == "EfficientKeypointBBoxHead":
+        return [outputs[0]["name"]]
+    else:
+        raise ValueError("Unknown head name")
+
+
+def get_heads(
+    cfg: Config,
+    outputs: list[dict],
+    class_dict: dict[str, list[str]],
+    nodes: dict[str, BaseNode],
+) -> dict[str, dict]:
+    """Get model heads.
+
+    @type cfg: Config
+    @param cfg: Configuration object.
+    @type outputs: list[dict]
+    @param outputs: List of model outputs.
+    @type class_dict: dict[str, list[str]]
+    @param class_dict: Dictionary of classes.
+    @type nodes: dict[str, BaseNode]
+    @param nodes: Dictionary of nodes.
+    """
+    heads_dict = {}
+
+    for node in cfg.model.nodes:
+        node_name = node.name
+        node_alias = node.alias or node_name
+        if node_alias in cfg.model.outputs:
+            if node_name in ImplementedHeads.__members__:
+                parser = getattr(ImplementedHeads, node_name).value
+                task = node.task
+                if isinstance(task, dict):
+                    task = str(next(iter(task)))
+
+                classes = _get_classes(node_name, task, class_dict)
+                head_outputs = _get_head_outputs(outputs, node_name)
+                head_dict = {
+                    "parser": parser,
+                    "metadata": {
+                        "classes": classes,
+                        "n_classes": len(classes),
+                    },
+                    "outputs": head_outputs,
+                }
+                head_dict["metadata"].update(
+                    _get_head_specific_parameters(nodes, node_name, node_alias)
+                )
+                heads_dict[node_name] = head_dict
+    return heads_dict
diff --git a/luxonis_train/core/utils/export_utils.py b/luxonis_train/core/utils/export_utils.py
new file mode 100644
index 00000000..f44b5d1a
--- /dev/null
+++ b/luxonis_train/core/utils/export_utils.py
@@ -0,0 +1,119 @@
+import logging
+from contextlib import contextmanager
+
+import luxonis_train
+from luxonis_train.utils.config import Config, ExportConfig
+
+logger = logging.getLogger(__name__)
+
+
+@contextmanager
+def replace_weights(
+    module: "luxonis_train.models.LuxonisLightningModule", weights: str | None = None
+):
+    old_weights = None
+    if weights is not None:
+        old_weights = module.state_dict()
+        module.load_checkpoint(weights)
+
+    yield
+
+    if old_weights is not None:
+        try:
+            module.load_state_dict(old_weights)
+        except RuntimeError:
+            logger.error(
+                "Failed to strictly load old weights. The model likey underwent reparametrization, "
+                "which is a destructive operation. Loading old weights with strict=False."
+            )
+            module.load_state_dict(old_weights, strict=False)
+        del old_weights
+
+
+def try_onnx_simplify(onnx_path: str) -> None:
+    import onnx
+
+    try:
+        import onnxsim
+
+        logger.info("Simplifying ONNX model...")
+        model_onnx = onnx.load(onnx_path)
+        onnx_model, check = onnxsim.simplify(model_onnx)
+        if not check:
+            raise RuntimeError("ONNX simplify failed.")
+        onnx.save(onnx_model, onnx_path)
+        logger.info(f"ONNX model saved to {onnx_path}")
+
+    except ImportError:
+        logger.error("Failed to import `onnxsim`")
+        logger.warning(
+            "`onnxsim` not installed. Skipping ONNX model simplification. "
+            "Ensure `onnxsim` is installed in your environment."
+        )
+    except RuntimeError:
+        logger.error(
+            "Failed to simplify ONNX model. Proceeding without simplification."
+        )
+
+
+def get_preprocessing(
+    cfg: Config,
+) -> tuple[list[float] | None, list[float] | None, bool]:
+    normalize_params = cfg.trainer.preprocessing.normalize.params
+    if cfg.exporter.scale_values is not None:
+        scale_values = cfg.exporter.scale_values
+    else:
+        scale_values = normalize_params.get("std", None)
+        if scale_values:
+            scale_values = (
+                [round(i * 255, 5) for i in scale_values]
+                if isinstance(scale_values, list)
+                else round(scale_values * 255, 5)
+            )
+
+    if cfg.exporter.mean_values is not None:
+        mean_values = cfg.exporter.mean_values
+    else:
+        mean_values = normalize_params.get("mean", None)
+        if mean_values:
+            mean_values = (
+                [round(i * 255, 5) for i in mean_values]
+                if isinstance(mean_values, list)
+                else round(mean_values * 255, 5)
+            )
+    reverse_channels = cfg.exporter.reverse_input_channels
+
+    return scale_values, mean_values, reverse_channels
+
+
+def blobconverter_export(
+    cfg: ExportConfig,
+    scale_values: list[float] | None,
+    mean_values: list[float] | None,
+    reverse_channels: bool,
+    export_path: str,
+    onnx_path: str,
+) -> str:
+    import blobconverter
+
+    logger.info("Converting ONNX to .blob")
+
+    optimizer_params = []
+    if scale_values:
+        optimizer_params.append(f"--scale_values={scale_values}")
+    if mean_values:
+        optimizer_params.append(f"--mean_values={mean_values}")
+    if reverse_channels:
+        optimizer_params.append("--reverse_input_channels")
+
+    blob_path = blobconverter.from_onnx(
+        model=onnx_path,
+        optimizer_params=optimizer_params,
+        data_type=cfg.data_type,
+        shaves=cfg.blobconverter.shaves,
+        version=cfg.blobconverter.version,
+        use_cache=False,
+        output_dir=export_path,
+    )
+    logger.info(f".blob model saved to {blob_path}")
+    return blob_path
diff --git a/luxonis_train/core/utils/infer_utils.py b/luxonis_train/core/utils/infer_utils.py
new file mode 100644
index 00000000..17696705
--- /dev/null
+++ b/luxonis_train/core/utils/infer_utils.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import cv2
+from torch import Tensor
+
+
+def render_visualizations(
+    visualizations: dict[str, dict[str, Tensor]], save_dir: str | Path | None
+) -> None:
+    save_dir = Path(save_dir) if save_dir is not None else None
+    if save_dir is not None:
+        save_dir.mkdir(exist_ok=True, parents=True)
+
+    i = 0
+    for node_name, vzs in visualizations.items():
+        for viz_name, viz_batch in vzs.items():
+            for i, viz in enumerate(viz_batch):
+                viz_arr = viz.detach().cpu().numpy().transpose(1, 2, 0)
+                viz_arr = cv2.cvtColor(viz_arr, cv2.COLOR_RGB2BGR)
+                name = f"{node_name}/{viz_name}/{i}"
+                if save_dir is not None:
+                    name = name.replace("/", "_")
+                    cv2.imwrite(str(save_dir / f"{name}_{i}.png"), viz_arr)
+                    i += 1
+                else:
+                    cv2.imshow(name, viz_arr)
+
+    if save_dir is None:
+        if cv2.waitKey(0) == ord("q"):
+            exit()
diff --git a/luxonis_train/core/utils/train_utils.py b/luxonis_train/core/utils/train_utils.py
new file mode 100644
index 00000000..3a45a85b
--- /dev/null
+++ b/luxonis_train/core/utils/train_utils.py
@@ -0,0 +1,25 @@
+import lightning.pytorch as pl
+
+from luxonis_train.utils.config import Config
+
+
+def create_trainer(cfg: Config, **kwargs) -> pl.Trainer:
+    """Creates Pytorch Lightning trainer.
+
+    @type cfg: Config
+    @param cfg: Configuration object.
+    @param kwargs: Additional arguments to pass to the trainer.
+    @rtype: pl.Trainer
+    @return: Pytorch Lightning trainer.
+    """
+    return pl.Trainer(
+        accelerator=cfg.trainer.accelerator,
+        devices=cfg.trainer.devices,
+        strategy=cfg.trainer.strategy,
+        max_epochs=cfg.trainer.epochs,
+        accumulate_grad_batches=cfg.trainer.accumulate_grad_batches,
+        check_val_every_n_epoch=cfg.trainer.validation_interval,
+        num_sanity_val_steps=cfg.trainer.num_sanity_val_steps,
+        profiler=cfg.trainer.profiler,
+        **kwargs,
+    )
diff --git a/luxonis_train/core/utils/tune_utils.py b/luxonis_train/core/utils/tune_utils.py
new file mode 100644
index 00000000..e2fe692e
--- /dev/null
+++ b/luxonis_train/core/utils/tune_utils.py
@@ -0,0 +1,77 @@
+import logging
+import random
+from typing import Any
+
+import optuna
+
+logger = logging.getLogger(__name__)
+
+
+def _augs_to_indices(all_augs: list[str], aug_names: list[str]) -> list[int]:
+    """Maps augmentation names to indices."""
+    aug_indices = []
+    for aug_name in aug_names:
+        if aug_name == "Normalize":
+            logger.warn(
+                f"'{aug_name}' should be tuned directly by adding '...normalize.active_categorical' to the tuner params, skipping."
+            )
+            continue
+        try:
+            index = all_augs.index(aug_name)
+            aug_indices.append(index)
+        except ValueError:
+            logger.warn(
+                f"Augmentation '{aug_name}' not found under trainer augemntations, skipping."
+            )
+            continue
+    return aug_indices
+
+
+def get_trial_params(
+    all_augs: list[str], params: dict[str, Any], trial: optuna.trial.Trial
+) -> dict[str, Any]:
+    """Get trial params based on specified config."""
+    new_params = {}
+    for key, value in params.items():
+        key_info = key.split("_")
+        key_name = "_".join(key_info[:-1])
+        key_type = key_info[-1]
+        match key_type, value:
+            case "subset", [list(whole_set), int(subset_size)]:
+                if key_name.split(".")[-1] != "augmentations":
+                    raise ValueError(
+                        "Subset sampling currently only supported for augmentations"
+                    )
+                whole_set_indices = _augs_to_indices(all_augs, whole_set)
+                subset = random.sample(whole_set_indices, subset_size)
+                for aug_id in whole_set_indices:
+                    new_params[f"{key_name}.{aug_id}.active"] = (
+                        True if aug_id in subset else False
+                    )
+                continue
+            case "categorical", list(lst):
+                new_value = trial.suggest_categorical(key_name, lst)
+            case "float", [float(low), float(high), *tail]:
+                step = tail[0] if tail else None
+                if step is not None and not isinstance(step, float):
+                    raise ValueError(
+                        f"Step for float type must be float, but got {step}"
+                    )
+                new_value = trial.suggest_float(key_name, low, high, step=step)
+            case "int", [int(low), int(high), *tail]:
+                step = tail[0] if tail else 1
+                if not isinstance(step, int):
+                    raise ValueError(f"Step for int type must be int, but got {step}")
+                new_value = trial.suggest_int(key_name, low, high, step=step)
+            case "loguniform", [float(low), float(high)]:
+                new_value = trial.suggest_loguniform(key_name, low, high)
+            case "uniform", [float(low), float(high)]:
+                new_value = trial.suggest_uniform(key_name, low, high)
+            case _, _:
+                raise KeyError(f"Combination of {key_type} and {value} not supported")
+
+        new_params[key_name] = new_value
+
+    if len(new_params) == 0:
+        raise ValueError("No paramteres to tune. Specify them under `tuner.params`.")
+    return new_params
diff --git a/luxonis_train/models/__init__.py b/luxonis_train/models/__init__.py
index 1e2f0d91..db71b9e5 100644
--- a/luxonis_train/models/__init__.py
+++ b/luxonis_train/models/__init__.py
@@ -1,5 +1,5 @@
-from .luxonis_model import LuxonisModel
+from .luxonis_lightning import LuxonisLightningModule
 from .luxonis_output import LuxonisOutput
 from .predefined_models import *
 
-__all__ = ["LuxonisModel", "LuxonisOutput"]
+__all__ = ["LuxonisLightningModule", "LuxonisOutput"]
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_lightning.py
similarity index 95%
rename from luxonis_train/models/luxonis_model.py
rename to luxonis_train/models/luxonis_lightning.py
index 296c85ee..3147ffe1 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_lightning.py
@@ -22,12 +22,7 @@
     combine_visualizations,
     get_unnormalized_images,
 )
-from luxonis_train.callbacks import (
-    DeviceStatsMonitor,
-    GPUStatsMonitor,
-    LuxonisProgressBar,
-    ModuleFreezer,
-)
+from luxonis_train.callbacks import LuxonisProgressBar, ModuleFreezer
 from luxonis_train.nodes import BaseNode
 from luxonis_train.utils.config import AttachedModuleConfig, Config
 from luxonis_train.utils.general import DatasetMetadata, to_shape_packet, traverse_graph
@@ -40,7 +35,7 @@
 logger = getLogger(__name__)
 
 
-class LuxonisModel(pl.LightningModule):
+class LuxonisLightningModule(pl.LightningModule):
     """Class representing the entire model.
 
     This class keeps track of the model graph, nodes, and attached modules.
@@ -94,10 +89,10 @@ def __init__(
         self,
         cfg: Config,
         save_dir: str,
-        input_shape: dict[str, Size],
+        input_shapes: dict[str, Size],
         dataset_metadata: DatasetMetadata | None = None,
         *,
-        _core: "luxonis_train.core.Core | None" = None,
+        _core: "luxonis_train.core.LuxonisModel | None" = None,
         **kwargs,
     ):
         """Constructs an instance of `LuxonisModel` from `Config`.
@@ -106,9 +101,9 @@ def __init__(
         @param cfg: Config object.
         @type save_dir: str
         @param save_dir: Directory to save checkpoints.
-        @type input_shape: dict[str, Size]
-        @param input_shape: Dictionary of input shapes. Keys are input names, values are
-            shapes.
+        @type input_shapes: dict[str, Size]
+        @param input_shapes: Dictionary of input shapes. Keys are input names, values
+            are shapes.
         @type dataset_metadata: L{DatasetMetadata} | None
         @param dataset_metadata: Dataset metadata.
         @type kwargs: Any
@@ -121,7 +116,7 @@ def __init__(
         self._core = _core
 
         self.cfg = cfg
-        self.original_in_shape = input_shape
+        self.original_in_shapes = input_shapes
         self.image_source = cfg.loader.image_source
         self.dataset_metadata = dataset_metadata or DatasetMetadata()
         self.frozen_nodes: list[tuple[nn.Module, int]] = []
@@ -186,22 +181,22 @@ def __init__(
                 # assume the node is the starting node and takes all inputs from the loader.
 
                 self.loader_input_shapes[node_name] = {
-                    k: Size(v) for k, v in input_shape.items()
+                    k: Size(v) for k, v in input_shapes.items()
                 }
-                self.node_input_sources[node_name] = list(input_shape.keys())
+                self.node_input_sources[node_name] = list(input_shapes.keys())
             else:
                 # For each input_source, check if the loader provides the required output.
                 # If yes, add the shape to the input_shapes dict. If not, raise an error.
                 self.loader_input_shapes[node_name] = {}
                 for input_source in node_cfg.input_sources:
-                    if input_source not in input_shape:
+                    if input_source not in input_shapes:
                         raise ValueError(
                             f"Node {node_name} requires input source {input_source}, "
                             "which is not provided by the loader."
                         )
 
                     self.loader_input_shapes[node_name][input_source] = Size(
-                        input_shape[input_source]
+                        input_shapes[input_source]
                     )
 
                 # Inputs (= preceding nodes) are handled in the _initiate_nodes method.
@@ -240,6 +235,13 @@ def __init__(
 
         self.load_checkpoint(self.cfg.model.weights)
 
+    @property
+    def core(self) -> "luxonis_train.core.LuxonisModel":
+        """Returns the core model."""
+        if self._core is None:
+            raise ValueError("Core reference is not set.")
+        return self._core
+
     def _initiate_nodes(
         self,
         nodes: dict[str, tuple[type[BaseNode], Kwargs]],
@@ -285,7 +287,7 @@ def _initiate_nodes(
 
             node = Node(
                 input_shapes=node_input_shapes,
-                original_in_shape=self.original_in_shape[self.image_source],
+                original_in_shape=self.original_in_shapes[self.image_source],
                 dataset_metadata=self.dataset_metadata,
                 **node_kwargs,
             )
@@ -680,8 +682,6 @@ def configure_callbacks(self) -> list[pl.Callback]:
         self.best_val_metric_checkpoints_path = f"{self.save_dir}/best_val_metric"
         model_name = self.cfg.model.name
 
-        user_callbacks = [c.name for c in self.cfg.trainer.callbacks]
-
         callbacks: list[pl.Callback] = [
             ModelCheckpoint(
                 monitor="val/loss",
@@ -691,18 +691,8 @@ def configure_callbacks(self) -> list[pl.Callback]:
                 save_top_k=self.cfg.trainer.save_top_k,
                 mode="min",
             ),
+            RichModelSummary(max_depth=2),
         ]
-        if "DeviceStatsMonitor" not in user_callbacks:
-            callbacks.append(DeviceStatsMonitor(cpu_stats=True))
-
-        if "GPUStatsMonitor" not in user_callbacks:
-            if GPUStatsMonitor.is_available():
-                callbacks.append(GPUStatsMonitor())
-            else:
-                logger.warning(
-                    "GPUStatsMonitor is not available for this machine."
-                    "Verify that `nvidia-smi` is installed."
-                )
 
         if self.main_metric is not None:
             main_metric = self.main_metric.replace("/", "_")
@@ -721,9 +711,6 @@ def configure_callbacks(self) -> list[pl.Callback]:
         if self.frozen_nodes:
             callbacks.append(ModuleFreezer(self.frozen_nodes))
 
-        if self.cfg.use_rich_text:
-            callbacks.append(RichModelSummary(max_depth=2))
-
         for callback in self.cfg.trainer.callbacks:
             if callback.active:
                 callbacks.append(CALLBACKS.get(callback.name)(**callback.params))
@@ -839,14 +826,7 @@ def _print_results(
 
         logger.info(f"{stage} loss: {loss:.4f}")
 
-        if self.cfg.use_rich_text:
-            self._progress_bar.print_results(stage=stage, loss=loss, metrics=metrics)
-        else:
-            for node_name, node_metrics in metrics.items():
-                for metric_name, metric_value in node_metrics.items():
-                    logger.info(
-                        f"{stage} metric: {node_name}/{metric_name}: {metric_value:.4f}"
-                    )
+        self._progress_bar.print_results(stage=stage, loss=loss, metrics=metrics)
 
         if self.main_metric is not None:
             main_metric_node, main_metric_name = self.main_metric.split("/")
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 4ab2ad2d..0e0a4ad2 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -64,7 +64,7 @@ def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]:
 
         return out_feature, out_cls, out_reg
 
-    def _initialize_weights_and_biases(self, prior_prob: float):
+    def _initialize_weights_and_biases(self, prior_prob: float) -> None:
         data = [
             (self.class_branch[-1], -math.log((1 - prior_prob) / prior_prob)),
             (self.regression_branch[-1], 1.0),
@@ -290,7 +290,7 @@ def __init__(
             activation=nn.Identity(),
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         if hasattr(self, "rbr_reparam"):
             return self.nonlinearity(self.se(self.rbr_reparam(x)))
 
@@ -301,7 +301,7 @@ def forward(self, x: Tensor):
 
         return self.nonlinearity(self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out))
 
-    def reparametrize(self):
+    def reparametrize(self) -> None:
         if hasattr(self, "rbr_reparam"):
             return
 
@@ -325,7 +325,7 @@ def reparametrize(self):
         if hasattr(self, "id_tensor"):
             self.__delattr__("id_tensor")
 
-    def _get_equivalent_kernel_bias(self):
+    def _get_equivalent_kernel_bias(self) -> tuple[Tensor, Tensor]:
         """Derives the equivalent kernel and bias in a DIFFERENTIABLE way."""
         kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
         kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
@@ -406,7 +406,7 @@ def __init__(
             )
             in_channels = out_channels
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         for block in self.blocks:
             x = block(x)
         return x
@@ -432,7 +432,7 @@ def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 5):
             kernel_size=kernel_size, stride=1, padding=kernel_size // 2
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         x = self.conv1(x)
         # apply max-pooling at three different scales
         y1 = self.max_pool(x)
@@ -468,7 +468,7 @@ def __init__(self, in_channels: int, out_channels: int):
             nn.Sigmoid(),
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         x = self.conv_3x3(x)
         attention = self.attention(x)
         out = x * attention
@@ -506,7 +506,7 @@ def __init__(self, in_channels: int, out_channels: int, reduction: int = 1):
             nn.Sigmoid(),
         )
 
-    def forward(self, x1, x2):
+    def forward(self, x1: Tensor, x2: Tensor) -> Tensor:
         fusion = torch.cat([x1, x2], dim=1)
         x = self.conv_1x1(fusion)
         attention = self.attention(x)
@@ -523,7 +523,7 @@ def __init__(self, channel: int):
         self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
         nn.init.normal_(self.implicit, std=0.02)
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.implicit.expand_as(x) + x
 
 
@@ -536,7 +536,7 @@ def __init__(self, channel: int):
         self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
         nn.init.normal_(self.implicit, mean=1.0, std=0.02)
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.implicit.expand_as(x) * x
 
 
@@ -589,7 +589,7 @@ def __init__(self, in_channels: int, out_channels: int):
 
         self.block = nn.Sequential(*layers)
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         out = self.block(x)
         return out
 
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/efficient_bbox_head.py
index 37f23e8b..e80ca31a 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/efficient_bbox_head.py
@@ -117,7 +117,7 @@ def wrap(
         else:
             boxes = self._process_to_bbox((features, cls_tensor, reg_tensor))
             return {
-                self.task: boxes,
+                "boundingbox": boxes,
                 "features": features,
                 "class_scores": [cls_tensor],
                 "distributions": [reg_tensor],
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 283bffba..9c4a0f2c 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -3,18 +3,19 @@
 from typing import Annotated, Any, Literal
 
 from luxonis_ml.data import LabelType
-from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
-from pydantic import BaseModel, ConfigDict, Field, model_validator
+from luxonis_ml.utils import (
+    BaseModelExtraForbid,
+    Environ,
+    LuxonisConfig,
+    LuxonisFileSystem,
+)
+from pydantic import Field, model_validator
 from typing_extensions import Self
 
 logger = logging.getLogger(__name__)
 
 
-class CustomBaseModel(BaseModel):
-    model_config = ConfigDict(extra="forbid")
-
-
-class AttachedModuleConfig(CustomBaseModel):
+class AttachedModuleConfig(BaseModelExtraForbid):
     name: str
     attached_to: str
     alias: str | None = None
@@ -29,12 +30,12 @@ class MetricModuleConfig(AttachedModuleConfig):
     is_main_metric: bool = False
 
 
-class FreezingConfig(CustomBaseModel):
+class FreezingConfig(BaseModelExtraForbid):
     active: bool = False
     unfreeze_after: int | float | None = None
 
 
-class ModelNodeConfig(CustomBaseModel):
+class ModelNodeConfig(BaseModelExtraForbid):
     name: str
     alias: str | None = None
     inputs: list[str] = []  # From preceding nodes
@@ -44,7 +45,7 @@ class ModelNodeConfig(CustomBaseModel):
     task: str | dict[LabelType, str] | None = None
 
 
-class PredefinedModelConfig(CustomBaseModel):
+class PredefinedModelConfig(BaseModelExtraForbid):
     name: str
     params: dict[str, Any] = {}
     include_nodes: bool = True
@@ -53,7 +54,7 @@ class PredefinedModelConfig(CustomBaseModel):
     include_visualizers: bool = True
 
 
-class ModelConfig(CustomBaseModel):
+class ModelConfig(BaseModelExtraForbid):
     name: str = "model"
     predefined_model: PredefinedModelConfig | None = None
     weights: str | None = None
@@ -127,7 +128,7 @@ def check_unique_names(self) -> Self:
         return self
 
 
-class TrackerConfig(CustomBaseModel):
+class TrackerConfig(BaseModelExtraForbid):
     project_name: str | None = None
     project_id: str | None = None
     run_name: str | None = None
@@ -139,7 +140,7 @@ class TrackerConfig(CustomBaseModel):
     is_mlflow: bool = False
 
 
-class LoaderConfig(CustomBaseModel):
+class LoaderConfig(BaseModelExtraForbid):
     name: str = "LuxonisLoaderTorch"
     image_source: str = "image"
     train_view: str = "train"
@@ -148,7 +149,7 @@ class LoaderConfig(CustomBaseModel):
     params: dict[str, Any] = {}
 
 
-class NormalizeAugmentationConfig(CustomBaseModel):
+class NormalizeAugmentationConfig(BaseModelExtraForbid):
     active: bool = True
     params: dict[str, Any] = {
         "mean": [0.485, 0.456, 0.406],
@@ -156,13 +157,13 @@ class NormalizeAugmentationConfig(CustomBaseModel):
     }
 
 
-class AugmentationConfig(CustomBaseModel):
+class AugmentationConfig(BaseModelExtraForbid):
     name: str
     active: bool = True
     params: dict[str, Any] = {}
 
 
-class PreprocessingConfig(CustomBaseModel):
+class PreprocessingConfig(BaseModelExtraForbid):
     train_image_size: Annotated[
         list[int], Field(default=[256, 256], min_length=2, max_length=2)
     ] = [256, 256]
@@ -188,23 +189,23 @@ def get_active_augmentations(self) -> list[AugmentationConfig]:
         return [aug for aug in self.augmentations if aug.active]
 
 
-class CallbackConfig(CustomBaseModel):
+class CallbackConfig(BaseModelExtraForbid):
     name: str
     active: bool = True
     params: dict[str, Any] = {}
 
 
-class OptimizerConfig(CustomBaseModel):
+class OptimizerConfig(BaseModelExtraForbid):
     name: str = "Adam"
     params: dict[str, Any] = {}
 
 
-class SchedulerConfig(CustomBaseModel):
+class SchedulerConfig(BaseModelExtraForbid):
     name: str = "ConstantLR"
     params: dict[str, Any] = {}
 
 
-class TrainerConfig(CustomBaseModel):
+class TrainerConfig(BaseModelExtraForbid):
     preprocessing: PreprocessingConfig = PreprocessingConfig()
 
     accelerator: Literal["auto", "cpu", "gpu"] = "auto"
@@ -254,29 +255,33 @@ def check_validation_interval(self) -> Self:
         return self
 
 
-class OnnxExportConfig(CustomBaseModel):
+class OnnxExportConfig(BaseModelExtraForbid):
     opset_version: int = 12
     dynamic_axes: dict[str, Any] | None = None
 
 
-class BlobconverterExportConfig(CustomBaseModel):
+class BlobconverterExportConfig(BaseModelExtraForbid):
     active: bool = False
     shaves: int = 6
     version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = "2022.1"
 
 
-class ExportConfig(CustomBaseModel):
-    export_save_directory: str = "output_export"
+class ArchiveConfig(BaseModelExtraForbid):
+    name: str | None = None
+    upload_to_run: bool = True
+    upload_url: str | None = None
+
+
+class ExportConfig(ArchiveConfig):
+    name: str | None = None
     input_shape: list[int] | None = None
-    export_model_name: str = "model"
-    data_type: Literal["INT8", "FP16", "FP32"] = "FP16"
+    data_type: Literal["int8", "fp16", "fp32"] = "fp16"
     reverse_input_channels: bool = True
     scale_values: list[float] | None = None
     mean_values: list[float] | None = None
     output_names: list[str] | None = None
     onnx: OnnxExportConfig = OnnxExportConfig()
     blobconverter: BlobconverterExportConfig = BlobconverterExportConfig()
-    upload_url: str | None = None
 
     @model_validator(mode="after")
     def check_values(self) -> Self:
@@ -291,18 +296,12 @@ def pad_values(values: float | list[float] | None):
         return self
 
 
-class ArchiveConfig(BaseModel):
-    archive_name: str = "nn_archive"
-    archive_save_directory: str = "output_archive"
-    upload_url: str | None = None
-
-
-class StorageConfig(CustomBaseModel):
+class StorageConfig(BaseModelExtraForbid):
     active: bool = True
     storage_type: Literal["local", "remote"] = "local"
 
 
-class TunerConfig(CustomBaseModel):
+class TunerConfig(BaseModelExtraForbid):
     study_name: str = "test-study"
     continue_existing_study: bool = True
     use_pruner: bool = True
@@ -316,7 +315,6 @@ class TunerConfig(CustomBaseModel):
 
 
 class Config(LuxonisConfig):
-    use_rich_text: bool = True
     model: ModelConfig = ModelConfig()
     loader: LoaderConfig = LoaderConfig()
     tracker: TrackerConfig = TrackerConfig()
@@ -336,14 +334,6 @@ def check_environment(cls, data: Any) -> Any:
             )
         return data
 
-    @model_validator(mode="before")
-    @classmethod
-    def setup_logging(cls, data: Any) -> Any:
-        if isinstance(data, dict):
-            if data.get("use_rich_text", True):
-                setup_logging(use_rich=True)
-        return data
-
     @classmethod
     def get_config(
         cls,
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index 6f7e027a..5ae3b43f 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -12,8 +12,6 @@
 from luxonis_train.utils.types import Packet
 
 
-# TODO: could be moved to luxonis-ml?
-# TODO: support multiclass keypoints
 class DatasetMetadata:
     """Metadata about the dataset."""
 
@@ -29,20 +27,12 @@ def __init__(
         the model.
 
         @type classes: dict[str, list[str]] | None
-        @param classes: Dictionary mapping task names to lists of class names. If not
-            provided, will be inferred from the dataset loader.
-        @type n_classes: int | None
-        @param n_classes: Number of classes for each label type.
-        @type n_keypoints: int | None
-        @param n_keypoints: Number of keypoints in the dataset.
-        @type keypoint_names: list[str] | None
-        @param keypoint_names: List of keypoint names.
-        @type connectivity: list[tuple[int, int]] | None
-        @param connectivity: List of edges in the skeleton graph.
+        @param classes: Dictionary mapping tasks to lists of class names.
+        @type n_keypoints: dict[str, int] | None
+        @param n_keypoints: Dictionary mapping tasks to the number of keypoints.
         @type loader: DataLoader | None
         @param loader: Dataset loader.
         """
-
         self._classes = classes or {}
         self._n_keypoints = n_keypoints or {}
         self._loader = loader
@@ -176,8 +166,8 @@ def infer_upscale_factor(
 ) -> int:
     """Infer the upscale factor from the input height and original height."""
     num_up = math.log2(orig_height) - math.log2(in_height)
-    if num_up.is_integer():
-        return int(num_up)
+    if abs(round(num_up) - num_up) < 1e-6:
+        return int(round(num_up))
     elif not strict:
         if warn:
             logging.getLogger(__name__).warning(
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index 5f3bba59..8d8c4090 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -45,7 +45,7 @@ def image_source(self) -> str:
 
     @property
     @abstractmethod
-    def input_shape(self) -> dict[str, Size]:
+    def input_shapes(self) -> dict[str, Size]:
         """
         Shape of each loader group (sub-element), WITHOUT batch dimension.
         Examples:
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index 8545dad2..c353c36e 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -35,7 +35,7 @@ def __len__(self) -> int:
         return len(self.base_loader)
 
     @property
-    def input_shape(self) -> dict[str, Size]:
+    def input_shapes(self) -> dict[str, Size]:
         img = self[0][0][self.image_source]
         return {self.image_source: img.shape}
 
diff --git a/luxonis_train/utils/tracker.py b/luxonis_train/utils/tracker.py
index 5f64f08b..4df76edd 100644
--- a/luxonis_train/utils/tracker.py
+++ b/luxonis_train/utils/tracker.py
@@ -1,5 +1,3 @@
-from typing import Literal
-
 from lightning.pytorch.loggers.logger import Logger
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from luxonis_ml.tracker import LuxonisTracker
@@ -8,21 +6,31 @@
 class LuxonisTrackerPL(LuxonisTracker, Logger):
     """Implementation of LuxonisTracker that is compatible with PytorchLightning."""
 
+    def __init__(self, *, _auto_finalize: bool = True, **kwargs):
+        """
+        @type _auto_finalize: bool
+        @param _auto_finalize: If True, the run will be finalized automatically when the training ends.
+            If set to C{False}, the user will have to call the L{_finalize} method manually.
+
+        @type kwargs: dict
+        @param kwargs: Additional keyword arguments to be passed to the L{LuxonisTracker}.
+        """
+        LuxonisTracker.__init__(self, **kwargs)
+        Logger.__init__(self)
+        if _auto_finalize:
+            self.finalize = self._finalize
+
     @rank_zero_only
-    def finalize(
-        self, status: Literal["success", "failed", "finished"] = "success"
-    ) -> None:
+    def _finalize(self, status: str = "success") -> None:
         """Finalizes current run."""
         if self.is_tensorboard:
             self.experiment["tensorboard"].flush()
             self.experiment["tensorboard"].close()
         if self.is_mlflow:
-            if status == "success":
+            if status in ["success", "finished"]:
                 mlflow_status = "FINISHED"
-            elif status == "failed":
+            else:
                 mlflow_status = "FAILED"
-            elif status == "finished":
-                mlflow_status = "FINISHED"
             self.experiment["mlflow"].end_run(mlflow_status)
         if self.is_wandb:
             if status == "success":
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 0de426a7..22a3e108 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,5 +1,6 @@
 import json
 import os
+import os.path as osp
 from collections import defaultdict
 from pathlib import Path
 
@@ -13,6 +14,7 @@
 from luxonis_ml.utils import LuxonisFileSystem, environ
 
 WORK_DIR = Path("tests", "data")
+WORK_DIR.mkdir(parents=True, exist_ok=True)
 
 environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml"
 
@@ -163,13 +165,13 @@ def create_coco_dataset():
     dataset_name = "coco_test"
     url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
     output_folder = "../data/"
-    output_zip = os.path.join(output_folder, "COCO_people_subset.zip")
+    output_zip = osp.join(output_folder, "COCO_people_subset.zip")
 
-    if not os.path.exists(output_folder):
+    if not osp.exists(output_folder):
         os.makedirs(output_folder)
 
-    if not os.path.exists(output_zip) and not os.path.exists(
-        os.path.join(output_folder, "COCO_people_subset")
+    if not osp.exists(output_zip) and not osp.exists(
+        osp.join(output_folder, "COCO_people_subset")
     ):
         gdown.download(url, output_zip, quiet=False)
 
@@ -180,9 +182,8 @@ def create_coco_dataset():
 @pytest.fixture(scope="session", autouse=True)
 def create_cifar10_dataset():
     dataset = LuxonisDataset("cifar10_test", delete_existing=True)
-    output_folder = "../data/"
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
+    output_folder = "../data/cifar10"
+    os.makedirs(output_folder, exist_ok=True)
     cifar10_torch = torchvision.datasets.CIFAR10(
         root=output_folder, train=False, download=True
     )
@@ -203,7 +204,7 @@ def CIFAR10_subset_generator():
         for i, (image, label) in enumerate(cifar10_torch):  # type: ignore
             if i == 1000:
                 break
-            path = os.path.join(output_folder, f"cifar_{i}.png")
+            path = osp.join(output_folder, f"cifar_{i}.png")
             image.save(path)
             yield {
                 "file": path,
diff --git a/tests/integration/multi_input_modules.py b/tests/integration/multi_input_modules.py
index bac43091..dbc5a449 100644
--- a/tests/integration/multi_input_modules.py
+++ b/tests/integration/multi_input_modules.py
@@ -11,7 +11,7 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     @property
-    def input_shape(self):
+    def input_shapes(self):
         return {
             "left": torch.Size([3, 224, 224]),
             "right": torch.Size([3, 224, 224]),
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index f040c032..1185bb8d 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -1,11 +1,12 @@
 import shutil
+import sys
 from pathlib import Path
 
 import pytest
 from luxonis_ml.data import LuxonisDataset
 from multi_input_modules import *
 
-from luxonis_train.core import Exporter, Inferer, Trainer, Tuner
+from luxonis_train.core import LuxonisModel
 from luxonis_train.utils.config import Config
 
 TEST_OUTPUT = Path("tests/integration/_test-output")
@@ -26,8 +27,8 @@
 @pytest.fixture(scope="function", autouse=True)
 def clear_output():
     Config.clear_instance()
-    yield
     shutil.rmtree(TEST_OUTPUT, ignore_errors=True)
+    yield
     STUDY_PATH.unlink(missing_ok=True)
     ONNX_PATH.unlink(missing_ok=True)
     shutil.rmtree(INFER_PATH, ignore_errors=True)
@@ -37,43 +38,62 @@ def clear_output():
     "config_file", [str(path) for path in Path("configs").glob("*model*")]
 )
 def test_simple_models(config_file: str):
-    trainer = Trainer(
-        config_file,
-        opts=OPTS,
+    model = LuxonisModel(config_file, opts=OPTS)
+    model.train()
+    model.test()
+    model.export()
+    assert (
+        Path(model.run_save_dir, "export", model.cfg.model.name)
+        .with_suffix(".onnx")
+        .exists()
     )
-    trainer.train()
-    trainer.test()
-
-    Exporter(config_file).export("test_export.onnx")
+    model.archive()
+    assert (
+        Path(
+            model.run_save_dir,
+            "archive",
+            model.cfg.archiver.name or model.cfg.model.name,
+        )
+        .with_suffix(".onnx.tar.xz")
+        .exists()
+    )
+    del model
 
 
 def test_multi_input():
     config_file = "configs/example_multi_input.yaml"
-    trainer = Trainer(config_file, opts=OPTS)
-    trainer.train()
-    trainer.test(view="val")
+    model = LuxonisModel(config_file, opts=OPTS)
+    model.train()
+    model.test(view="val")
 
     assert not ONNX_PATH.exists()
-    Exporter(config_file).export(str(ONNX_PATH))
+    model.export(str(ONNX_PATH))
     assert ONNX_PATH.exists()
 
     assert not INFER_PATH.exists()
-    Inferer(config_file, view="val", save_dir=INFER_PATH).infer()
+    model.infer(view="val", save_dir=INFER_PATH)
     assert INFER_PATH.exists()
+    del model
 
 
 def test_custom_tasks(parking_lot_dataset: LuxonisDataset):
     config_file = "tests/configs/parking_lot_config.yaml"
-    Trainer(
+    model = LuxonisModel(
         config_file,
         opts=OPTS
         | {
             "loader.params.dataset_name": parking_lot_dataset.dataset_name,
+            "trainer.batch_size": 2,
         },
-    ).train()
+    )
+    model.train()
+    assert model.archive().exists()
+    del model
 
 
+@pytest.mark.skipif(sys.platform == "win32", reason="Tuning not supported on Windows")
 def test_tuner():
-    tuner = Tuner("configs/example_tuning.yaml", opts=OPTS)
-    tuner.tune()
+    model = LuxonisModel("configs/example_tuning.yaml", opts=OPTS)
+    model.tune()
     assert STUDY_PATH.exists()
+    del model
diff --git a/tests/unittests/test_core/__init__.py b/tests/unittests/test_core/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
deleted file mode 100644
index d7b4dcef..00000000
--- a/tests/unittests/test_core/test_archiver.py
+++ /dev/null
@@ -1,339 +0,0 @@
-import io
-import json
-import os
-import random
-import shutil
-import tarfile
-import unittest
-
-import cv2
-import lightning.pytorch as pl
-import numpy as np
-import onnx
-import pytest
-from luxonis_ml.data import LuxonisDataset
-from parameterized import parameterized
-
-import luxonis_train
-from luxonis_train.core import Archiver
-from luxonis_train.core.exporter import Exporter
-from luxonis_train.core.trainer import Trainer
-from luxonis_train.nodes.enums.head_categorization import ImplementedHeads
-from luxonis_train.utils.config import Config
-
-HEAD_NAMES = [head_name for head_name in ImplementedHeads.__members__]
-
-
-@pytest.mark.skip()
-class TestArchiver(unittest.TestCase):
-    @classmethod
-    def setup_class(cls):
-        """Creates all files required for testing."""
-
-        # make tmp dir
-        luxonis_train_parent_dir = os.path.dirname(
-            os.path.dirname(luxonis_train.__file__)
-        )
-        cls.tmp_path = os.path.join(
-            luxonis_train_parent_dir, "tests", "unittests", "test_core", "tmp"
-        )
-        os.mkdir(cls.tmp_path)
-
-        # make LDFs
-        unilabelLDF = "dummyLDF_unilabel"
-        cls._make_dummy_ldf(
-            ldf_name=unilabelLDF,
-            save_path=cls.tmp_path,
-            bbx_anno=True,
-            kpt_anno=True,
-        )
-        multilabelLDF = "dummyLDF_multilabel"
-        cls._make_dummy_ldf(
-            ldf_name=multilabelLDF,
-            save_path=cls.tmp_path,
-            cls_anno=True,
-            bbx_anno=True,
-            sgm_anno=True,
-            multilabel=True,
-        )
-        cls.ldf_names = [unilabelLDF, multilabelLDF]
-
-        for head_name in HEAD_NAMES:
-            if head_name == "ImplicitKeypointBBoxHead":
-                ldf_name = unilabelLDF  # multiclass keypoint detection not yet supported in luxonis-train
-            else:
-                ldf_name = multilabelLDF
-
-            # make config
-            cfg_dict = cls._make_dummy_cfg_dict(
-                head_name=head_name,
-                save_path=cls.tmp_path,
-                ldf_name=ldf_name,
-            )
-            cfg = Config.get_config(cfg_dict)
-
-            # train model
-            cfg.trainer.epochs = 1
-            cfg.trainer.validation_interval = 1
-            cfg.trainer.batch_size = 1
-            trainer = Trainer(cfg=cfg)
-            trainer.train()
-            callbacks = [
-                c
-                for c in trainer.pl_trainer.callbacks
-                if isinstance(c, pl.callbacks.ModelCheckpoint)
-            ]
-            model_checkpoint_path = callbacks[0].best_model_path
-            model_ckpt = os.path.join(trainer.run_save_dir, model_checkpoint_path)
-            trainer.reset_logging()
-
-            # export model to ONNX
-            cfg.model.weights = model_ckpt
-            exporter = Exporter(cfg=cfg)
-            cls.onnx_model_path = os.path.join(cls.tmp_path, "model.onnx")
-            exporter.export(onnx_path=cls.onnx_model_path)
-            exporter.reset_logging()
-
-            # make archive
-            cfg.archiver.archive_save_directory = cls.tmp_path
-            cfg.archiver.archive_name = f"nnarchive_{head_name}"
-            archiver = Archiver(cfg=cfg)
-            cls.archive_path = archiver.archive(cls.onnx_model_path)
-            archiver.reset_logging()
-
-            # clear the loaded config instance
-            Config.clear_instance()
-
-    def _make_dummy_ldf(
-        ldf_name: str,
-        save_path: str,
-        number: int = 3,
-        dim: tuple = (10, 10, 3),
-        cls_anno: bool = False,
-        bbx_anno: bool = False,
-        sgm_anno: bool = False,
-        kpt_anno: bool = False,
-        multilabel: bool = False,
-        split_ratios: list = None,
-    ):
-        """Creates random-pixel images with fictional annotations and parses them to
-        L{LuxonisDataset} format.
-
-        @type ldf_name: str
-        @param ldf_name: Name of the created L{LuxonisDataset} format dataset.
-        @type save_path: str
-        @param save_path: Path to where the created images are saved.
-        @type number: int
-        @param number: Number of images to create.
-        @type dim: Tuple[int, int, int]
-        @param dim: Dimensions of the created images in HWC order.
-        @type cls_anno: bool
-        @param cls_anno: True if created dataset should contain classification annotations.
-        type bbx_anno: bool
-        @param bbx_anno: True if created dataset should contain bounding box annotations.
-        type sgm_anno: bool
-        @param sgm_anno: True if created dataset should contain segmentation annotations.
-        type kpt_anno: bool
-        @param kpt_anno: True if created dataset should contain keypoint annotations.
-        type multilabel: bool
-        @param multilabel: True if created dataset should contain multilabel annotations.
-        type split_ratios: List[float, float, float]
-        @param split_ratios: List of ratios defining the train, val, and test splits.
-        """
-
-        if split_ratios is None:
-            split_ratios = [0.333, 0.333, 0.333]
-
-        os.makedirs(os.path.join(save_path, "images"), exist_ok=True)
-
-        if multilabel:
-            labels = ["label_x", "label_y", "label_z"]
-        else:
-            labels = ["label_x"]
-
-        def dataset_generator():
-            for i in range(number):
-                label = random.choice(labels)
-                img = np.random.randint(0, 256, dim, dtype=np.uint8)
-                img_file_path = os.path.join(save_path, "images", f"img{i}.png")
-                cv2.imwrite(img_file_path, img)
-
-                if cls_anno:
-                    yield {
-                        "file": img_file_path,
-                        "type": "classification",
-                        "value": True,
-                        "class": label,
-                    }
-
-                if bbx_anno:
-                    box = (0.25, 0.25, 0.5, 0.5)
-                    yield {
-                        "file": img_file_path,
-                        "type": "box",
-                        "value": box,
-                        "class": label,
-                    }
-
-                if kpt_anno:
-                    keypoints = [
-                        (0.25, 0.25, 2),
-                        (0.75, 0.25, 2),
-                        (0.75, 0.75, 2),
-                        (0.25, 0.75, 2),
-                    ]
-                    yield {
-                        "file": img_file_path,
-                        "type": "keypoints",
-                        "value": keypoints,
-                        "class": label,
-                    }
-
-                if sgm_anno:
-                    polyline = [
-                        (0.25, 0.75),
-                        (0.75, 0.25),
-                        (0.75, 0.75),
-                        (0.25, 0.75),
-                        (0.25, 0.25),
-                    ]
-                    yield {
-                        "file": img_file_path,
-                        "type": "polyline",
-                        "value": polyline,
-                        "class": label,
-                    }
-
-        if LuxonisDataset.exists(ldf_name):
-            print("Deleting existing dataset")
-            LuxonisDataset(ldf_name).delete_dataset()
-        dataset = LuxonisDataset(ldf_name)
-        if kpt_anno:
-            keypoint_labels = [
-                "kp1",
-                "kp2",
-                "kp3",
-                "kp4",
-            ]
-            keypoint_edges = [
-                [0, 1],
-                [1, 2],
-                [2, 3],
-                [3, 0],
-            ]
-            dataset.set_skeletons(
-                {
-                    label: {"labels": keypoint_labels, "edges": keypoint_edges}
-                    for label in labels
-                }
-            )
-        dataset.add(dataset_generator())
-        dataset.make_splits(ratios=split_ratios)
-
-    def _make_dummy_cfg_dict(head_name: str, ldf_name: str, save_path: str) -> dict:
-        """Creates a configuration dict based on the type of the provided model head.
-
-        @type head_name: str
-        @param head_name: Name of the specified head.
-        @type ldf_name: str
-        @param ldf_name: Name of the L{LuxonisDataset} format dataset on which the
-            training will be performed.
-        @type save_path: str
-        @param save_path: Path to LuxonisTrackerPL save directory.
-        @rtype: dict
-        @return: Created config dict.
-        """
-
-        cfg_dict = {"model": {"name": f"model_w_{head_name}"}}
-        cfg_dict["dataset"] = {"name": ldf_name}
-        cfg_dict["tracker"] = {"save_directory": save_path}
-
-        if head_name == "ClassificationHead":
-            cfg_dict["model"]["predefined_model"] = {"name": "ClassificationModel"}
-        elif head_name == "EfficientBBoxHead":
-            cfg_dict["model"]["predefined_model"] = {"name": "DetectionModel"}
-        elif head_name == "ImplicitKeypointBBoxHead":
-            cfg_dict["model"]["predefined_model"] = {"name": "KeypointDetectionModel"}
-        elif head_name == "SegmentationHead":
-            cfg_dict["model"]["predefined_model"] = {"name": "SegmentationModel"}
-        elif head_name == "BiSeNetHead":
-            cfg_dict["model"]["nodes"] = [
-                {"name": "MicroNet", "alias": "segmentation_backbone"},
-                {
-                    "name": "BiSeNetHead",
-                    "alias": "segmentation_head",
-                    "inputs": ["segmentation_backbone"],
-                },
-            ]
-            cfg_dict["model"]["losses"] = [
-                {"name": "BCEWithLogitsLoss", "attached_to": "segmentation_head"}
-            ]
-        else:
-            raise NotImplementedError(f"No implementation for {head_name}")
-
-        return cfg_dict
-
-    @parameterized.expand(HEAD_NAMES)
-    def test_archive_creation(self, head_name):
-        """Tests if NN archive was created using xz compression (should be the default
-        option)."""
-        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
-        assert archive_path.endswith("tar.xz")
-
-    @parameterized.expand(HEAD_NAMES)
-    def test_archive_contents(self, head_name):
-        """Tests if NN archive consists of config.json and model.onnx."""
-        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
-        with tarfile.open(archive_path, mode="r") as tar:
-            archive_fnames = tar.getnames()
-        assert (
-            len(archive_fnames) == 2
-            and any([fname == "config.json" for fname in archive_fnames])
-            and any([fname == "model.onnx" for fname in archive_fnames])
-        )
-
-    @parameterized.expand(HEAD_NAMES)
-    def test_onnx(self, head_name):
-        """Tests if archive ONNX model is valid."""
-        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
-        with tarfile.open(archive_path, mode="r") as tar:
-            f = tar.extractfile("model.onnx")
-            model_bytes = f.read()
-            model_io = io.BytesIO(model_bytes)
-            onnx_model = onnx.load(model_io)
-        assert onnx.checker.check_model(onnx_model, full_check=True) is None
-
-    @parameterized.expand(HEAD_NAMES)
-    def test_config_io(self, head_name):
-        """Tests if archived config inputs and outputs are valid."""
-        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
-        with tarfile.open(archive_path, mode="r") as tar:
-            f = tar.extractfile("config.json")
-            json_dict = json.load(f)
-            f = tar.extractfile("model.onnx")
-            model_bytes = f.read()
-            model_io = io.BytesIO(model_bytes)
-            onnx_model = onnx.load(model_io)
-
-        config_input_names = []
-        for input in json_dict["model"]["inputs"]:
-            config_input_names.append(input["name"])
-        valid_inputs = set([input.name for input in onnx_model.graph.input]) == set(
-            config_input_names
-        )
-
-        config_output_names = []
-        for input in json_dict["model"]["outputs"]:
-            config_output_names.append(input["name"])
-        valid_outputs = set([output.name for output in onnx_model.graph.output]) == set(
-            config_output_names
-        )
-
-        assert valid_inputs and valid_outputs
-
-    @classmethod
-    def teardown_class(cls):
-        """Removes all files created during setup."""
-        for ldf_name in cls.ldf_names:
-            LuxonisDataset(ldf_name).delete_dataset()
-        shutil.rmtree(cls.tmp_path)

From 55df6ef2b244fb513049c863b9e4c4f5b2af10b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Mon, 19 Aug 2024 17:15:08 +0200
Subject: [PATCH 50/75] Stricter Config Types (#60)

---
 luxonis_train/utils/config.py | 60 +++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 9c4a0f2c..dfa427b5 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -1,6 +1,6 @@
 import logging
 import sys
-from typing import Annotated, Any, Literal
+from typing import Annotated, Any, Literal, TypeAlias
 
 from luxonis_ml.data import LabelType
 from luxonis_ml.utils import (
@@ -10,20 +10,32 @@
     LuxonisFileSystem,
 )
 from pydantic import Field, model_validator
+from pydantic.types import FilePath, NonNegativeFloat, NonNegativeInt, PositiveInt
 from typing_extensions import Self
 
 logger = logging.getLogger(__name__)
 
+Params: TypeAlias = dict[str, Any]
+
 
 class AttachedModuleConfig(BaseModelExtraForbid):
     name: str
     attached_to: str
     alias: str | None = None
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
 class LossModuleConfig(AttachedModuleConfig):
-    weight: float = 1.0
+    weight: NonNegativeFloat = 1.0
+
+    @model_validator(mode="after")
+    def validate_weight(self) -> Self:
+        if self.weight == 0:
+            logger.warning(
+                f"Loss '{self.name}' has weight set to 0. "
+                "This loss will not contribute to the training."
+            )
+        return self
 
 
 class MetricModuleConfig(AttachedModuleConfig):
@@ -32,7 +44,7 @@ class MetricModuleConfig(AttachedModuleConfig):
 
 class FreezingConfig(BaseModelExtraForbid):
     active: bool = False
-    unfreeze_after: int | float | None = None
+    unfreeze_after: NonNegativeInt | NonNegativeFloat | None = None
 
 
 class ModelNodeConfig(BaseModelExtraForbid):
@@ -40,24 +52,24 @@ class ModelNodeConfig(BaseModelExtraForbid):
     alias: str | None = None
     inputs: list[str] = []  # From preceding nodes
     input_sources: list[str] = []  # From data loader
-    params: dict[str, Any] = {}
     freezing: FreezingConfig = FreezingConfig()
     task: str | dict[LabelType, str] | None = None
+    params: Params = {}
 
 
 class PredefinedModelConfig(BaseModelExtraForbid):
     name: str
-    params: dict[str, Any] = {}
     include_nodes: bool = True
     include_losses: bool = True
     include_metrics: bool = True
     include_visualizers: bool = True
+    params: Params = {}
 
 
 class ModelConfig(BaseModelExtraForbid):
     name: str = "model"
     predefined_model: PredefinedModelConfig | None = None
-    weights: str | None = None
+    weights: FilePath | None = None
     nodes: list[ModelNodeConfig] = []
     losses: list[LossModuleConfig] = []
     metrics: list[MetricModuleConfig] = []
@@ -146,7 +158,7 @@ class LoaderConfig(BaseModelExtraForbid):
     train_view: str = "train"
     val_view: str = "val"
     test_view: str = "test"
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
 class NormalizeAugmentationConfig(BaseModelExtraForbid):
@@ -160,7 +172,7 @@ class NormalizeAugmentationConfig(BaseModelExtraForbid):
 class AugmentationConfig(BaseModelExtraForbid):
     name: str
     active: bool = True
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
 class PreprocessingConfig(BaseModelExtraForbid):
@@ -192,23 +204,23 @@ def get_active_augmentations(self) -> list[AugmentationConfig]:
 class CallbackConfig(BaseModelExtraForbid):
     name: str
     active: bool = True
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
 class OptimizerConfig(BaseModelExtraForbid):
     name: str = "Adam"
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
 class SchedulerConfig(BaseModelExtraForbid):
     name: str = "ConstantLR"
-    params: dict[str, Any] = {}
+    params: Params = {}
 
 
 class TrainerConfig(BaseModelExtraForbid):
     preprocessing: PreprocessingConfig = PreprocessingConfig()
 
-    accelerator: Literal["auto", "cpu", "gpu"] = "auto"
+    accelerator: Literal["auto", "cpu", "gpu", "tpu"] = "auto"
     devices: int | list[int] | str = "auto"
     strategy: Literal["auto", "ddp"] = "auto"
     num_sanity_val_steps: int = 2
@@ -217,17 +229,17 @@ class TrainerConfig(BaseModelExtraForbid):
     verbose: bool = True
 
     seed: int | None = None
-    batch_size: int = 32
-    accumulate_grad_batches: int = 1
+    batch_size: PositiveInt = 32
+    accumulate_grad_batches: PositiveInt = 1
     use_weighted_sampler: bool = False
-    epochs: int = 100
-    num_workers: int = 2
-    train_metrics_interval: int = -1
-    validation_interval: int = 1
-    num_log_images: int = 4
+    epochs: PositiveInt = 100
+    num_workers: NonNegativeInt = 4
+    train_metrics_interval: Literal[-1] | PositiveInt = -1
+    validation_interval: Literal[-1] | PositiveInt = 1
+    num_log_images: NonNegativeInt = 4
     skip_last_batch: bool = True
     log_sub_losses: bool = True
-    save_top_k: int = 3
+    save_top_k: Literal[-1] | NonNegativeInt = 3
 
     callbacks: list[CallbackConfig] = []
 
@@ -256,7 +268,7 @@ def check_validation_interval(self) -> Self:
 
 
 class OnnxExportConfig(BaseModelExtraForbid):
-    opset_version: int = 12
+    opset_version: PositiveInt = 12
     dynamic_axes: dict[str, Any] | None = None
 
 
@@ -305,8 +317,8 @@ class TunerConfig(BaseModelExtraForbid):
     study_name: str = "test-study"
     continue_existing_study: bool = True
     use_pruner: bool = True
-    n_trials: int | None = 15
-    timeout: int | None = None
+    n_trials: PositiveInt | None = 15
+    timeout: PositiveInt | None = None
     storage: StorageConfig = StorageConfig()
     params: Annotated[
         dict[str, list[str | int | float | bool | list]],

From 0d1a8a189e461659767c5161a123e106bf0913b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Mon, 19 Aug 2024 20:57:53 +0200
Subject: [PATCH 51/75] Semantic Nodes Categorization (#61)

Co-authored-by: GitHub Actions <actions@github.com>
---
 .github/workflows/tests.yaml                  |  3 +-
 luxonis_train/nodes/__init__.py               | 41 ++-----------------
 luxonis_train/nodes/backbones/__init__.py     | 21 ++++++++++
 .../nodes/{ => backbones}/contextspatial.py   |  3 +-
 .../nodes/{ => backbones}/efficientnet.py     |  2 +-
 .../nodes/{ => backbones}/efficientrep.py     |  3 +-
 .../nodes/{ => backbones}/micronet.py         |  3 +-
 .../nodes/{ => backbones}/mobilenetv2.py      |  2 +-
 .../nodes/{ => backbones}/mobileone.py        |  3 +-
 luxonis_train/nodes/{ => backbones}/repvgg.py |  2 +-
 luxonis_train/nodes/{ => backbones}/resnet.py |  2 +-
 .../nodes/{ => backbones}/rexnetv1.py         |  3 +-
 luxonis_train/nodes/base_node.py              |  2 +
 luxonis_train/nodes/heads/__init__.py         | 15 +++++++
 .../nodes/{ => heads}/bisenet_head.py         |  3 +-
 .../nodes/{ => heads}/classification_head.py  |  3 +-
 .../nodes/{ => heads}/efficient_bbox_head.py  |  3 +-
 .../efficient_keypoint_bbox_head.py           |  0
 .../implicit_keypoint_bbox_head.py            |  3 +-
 .../nodes/{ => heads}/segmentation_head.py    |  3 +-
 luxonis_train/nodes/necks/__init__.py         |  3 ++
 .../nodes/{ => necks}/reppan_neck.py          |  3 +-
 media/coverage_badge.svg                      |  4 +-
 tests/integration/test_sanity.py              | 10 ++++-
 24 files changed, 72 insertions(+), 68 deletions(-)
 create mode 100644 luxonis_train/nodes/backbones/__init__.py
 rename luxonis_train/nodes/{ => backbones}/contextspatial.py (98%)
 rename luxonis_train/nodes/{ => backbones}/efficientnet.py (96%)
 rename luxonis_train/nodes/{ => backbones}/efficientrep.py (98%)
 rename luxonis_train/nodes/{ => backbones}/micronet.py (99%)
 rename luxonis_train/nodes/{ => backbones}/mobilenetv2.py (95%)
 rename luxonis_train/nodes/{ => backbones}/mobileone.py (99%)
 rename luxonis_train/nodes/{ => backbones}/repvgg.py (99%)
 rename luxonis_train/nodes/{ => backbones}/resnet.py (98%)
 rename luxonis_train/nodes/{ => backbones}/rexnetv1.py (99%)
 create mode 100644 luxonis_train/nodes/heads/__init__.py
 rename luxonis_train/nodes/{ => heads}/bisenet_head.py (96%)
 rename luxonis_train/nodes/{ => heads}/classification_head.py (94%)
 rename luxonis_train/nodes/{ => heads}/efficient_bbox_head.py (99%)
 rename luxonis_train/nodes/{ => heads}/efficient_keypoint_bbox_head.py (100%)
 rename luxonis_train/nodes/{ => heads}/implicit_keypoint_bbox_head.py (99%)
 rename luxonis_train/nodes/{ => heads}/segmentation_head.py (96%)
 create mode 100644 luxonis_train/nodes/necks/__init__.py
 rename luxonis_train/nodes/{ => necks}/reppan_neck.py (99%)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 112741e3..8b176add 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -14,7 +14,6 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest]
-        version: ['3.10', '3.11']
 
     runs-on: ${{ matrix.os }}
 
@@ -27,7 +26,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: ${{ matrix.version }}
+        python-version: '3.10'
         cache: pip
 
     - name: Install dependencies [Ubuntu]
diff --git a/luxonis_train/nodes/__init__.py b/luxonis_train/nodes/__init__.py
index 4c90abaa..5b4a889f 100644
--- a/luxonis_train/nodes/__init__.py
+++ b/luxonis_train/nodes/__init__.py
@@ -1,37 +1,4 @@
-from .base_node import BaseNode
-from .bisenet_head import BiSeNetHead
-from .classification_head import ClassificationHead
-from .contextspatial import ContextSpatial
-from .efficient_bbox_head import EfficientBBoxHead
-from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead
-from .efficientnet import EfficientNet
-from .efficientrep import EfficientRep
-from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
-from .micronet import MicroNet
-from .mobilenetv2 import MobileNetV2
-from .mobileone import MobileOne
-from .reppan_neck import RepPANNeck
-from .repvgg import RepVGG
-from .resnet import ResNet
-from .rexnetv1 import ReXNetV1_lite
-from .segmentation_head import SegmentationHead
-
-__all__ = [
-    "BiSeNetHead",
-    "ClassificationHead",
-    "ContextSpatial",
-    "EfficientBBoxHead",
-    "EfficientNet",
-    "EfficientRep",
-    "EfficientKeypointBBoxHead",
-    "ImplicitKeypointBBoxHead",
-    "BaseNode",
-    "MicroNet",
-    "MobileNetV2",
-    "MobileOne",
-    "ReXNetV1_lite",
-    "RepPANNeck",
-    "RepVGG",
-    "ResNet",
-    "SegmentationHead",
-]
+from .backbones import *
+from .base_node import *
+from .heads import *
+from .necks import *
diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py
new file mode 100644
index 00000000..9463124b
--- /dev/null
+++ b/luxonis_train/nodes/backbones/__init__.py
@@ -0,0 +1,21 @@
+from .contextspatial import ContextSpatial
+from .efficientnet import EfficientNet
+from .efficientrep import EfficientRep
+from .micronet import MicroNet
+from .mobilenetv2 import MobileNetV2
+from .mobileone import MobileOne
+from .repvgg import RepVGG
+from .resnet import ResNet
+from .rexnetv1 import ReXNetV1_lite
+
+__all__ = [
+    "ContextSpatial",
+    "EfficientNet",
+    "EfficientRep",
+    "MicroNet",
+    "MobileNetV2",
+    "MobileOne",
+    "ReXNetV1_lite",
+    "RepVGG",
+    "ResNet",
+]
diff --git a/luxonis_train/nodes/contextspatial.py b/luxonis_train/nodes/backbones/contextspatial.py
similarity index 98%
rename from luxonis_train/nodes/contextspatial.py
rename to luxonis_train/nodes/backbones/contextspatial.py
index 1ca1460d..2cac4b81 100644
--- a/luxonis_train/nodes/contextspatial.py
+++ b/luxonis_train/nodes/backbones/contextspatial.py
@@ -7,6 +7,7 @@
 from torch import Tensor, nn
 from torch.nn import functional as F
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import (
     AttentionRefinmentBlock,
     ConvModule,
@@ -14,8 +15,6 @@
 )
 from luxonis_train.utils.registry import NODES
 
-from .base_node import BaseNode
-
 
 class ContextSpatial(BaseNode[Tensor, list[Tensor]]):
     def __init__(self, context_backbone: str = "MobileNetV2", **kwargs):
diff --git a/luxonis_train/nodes/efficientnet.py b/luxonis_train/nodes/backbones/efficientnet.py
similarity index 96%
rename from luxonis_train/nodes/efficientnet.py
rename to luxonis_train/nodes/backbones/efficientnet.py
index 37f8ced5..e560bc5f 100644
--- a/luxonis_train/nodes/efficientnet.py
+++ b/luxonis_train/nodes/backbones/efficientnet.py
@@ -7,7 +7,7 @@
 import torch
 from torch import Tensor, nn
 
-from .base_node import BaseNode
+from luxonis_train.nodes.base_node import BaseNode
 
 
 class EfficientNet(BaseNode[Tensor, list[Tensor]]):
diff --git a/luxonis_train/nodes/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep.py
similarity index 98%
rename from luxonis_train/nodes/efficientrep.py
rename to luxonis_train/nodes/backbones/efficientrep.py
index 24e43397..be558620 100644
--- a/luxonis_train/nodes/efficientrep.py
+++ b/luxonis_train/nodes/backbones/efficientrep.py
@@ -9,6 +9,7 @@
 
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import (
     BlockRepeater,
     RepVGGBlock,
@@ -16,8 +17,6 @@
 )
 from luxonis_train.utils.general import make_divisible
 
-from .base_node import BaseNode
-
 logger = logging.getLogger(__name__)
 
 
diff --git a/luxonis_train/nodes/micronet.py b/luxonis_train/nodes/backbones/micronet.py
similarity index 99%
rename from luxonis_train/nodes/micronet.py
rename to luxonis_train/nodes/backbones/micronet.py
index 603eabde..074dce2a 100644
--- a/luxonis_train/nodes/micronet.py
+++ b/luxonis_train/nodes/backbones/micronet.py
@@ -4,10 +4,9 @@
 from torch import Tensor, nn
 
 from luxonis_train.nodes.activations import HSigmoid, HSwish
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import ConvModule
 
-from .base_node import BaseNode
-
 
 class MicroNet(BaseNode[Tensor, list[Tensor]]):
     """
diff --git a/luxonis_train/nodes/mobilenetv2.py b/luxonis_train/nodes/backbones/mobilenetv2.py
similarity index 95%
rename from luxonis_train/nodes/mobilenetv2.py
rename to luxonis_train/nodes/backbones/mobilenetv2.py
index db6cf879..48161835 100644
--- a/luxonis_train/nodes/mobilenetv2.py
+++ b/luxonis_train/nodes/backbones/mobilenetv2.py
@@ -6,7 +6,7 @@
 import torchvision
 from torch import Tensor, nn
 
-from .base_node import BaseNode
+from luxonis_train.nodes.base_node import BaseNode
 
 
 class MobileNetV2(BaseNode[Tensor, list[Tensor]]):
diff --git a/luxonis_train/nodes/mobileone.py b/luxonis_train/nodes/backbones/mobileone.py
similarity index 99%
rename from luxonis_train/nodes/mobileone.py
rename to luxonis_train/nodes/backbones/mobileone.py
index 645534e4..2d460fd0 100644
--- a/luxonis_train/nodes/mobileone.py
+++ b/luxonis_train/nodes/backbones/mobileone.py
@@ -10,10 +10,9 @@
 import torch
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import ConvModule, SqueezeExciteBlock
 
-from .base_node import BaseNode
-
 
 class MobileOne(BaseNode[Tensor, list[Tensor]]):
     """Implementation of MobileOne backbone.
diff --git a/luxonis_train/nodes/repvgg.py b/luxonis_train/nodes/backbones/repvgg.py
similarity index 99%
rename from luxonis_train/nodes/repvgg.py
rename to luxonis_train/nodes/backbones/repvgg.py
index f488a68c..c536c78e 100644
--- a/luxonis_train/nodes/repvgg.py
+++ b/luxonis_train/nodes/backbones/repvgg.py
@@ -6,7 +6,7 @@
 
 from luxonis_train.nodes.blocks import RepVGGBlock
 
-from .base_node import BaseNode
+from ..base_node import BaseNode
 
 logger = logging.getLogger(__name__)
 
diff --git a/luxonis_train/nodes/resnet.py b/luxonis_train/nodes/backbones/resnet.py
similarity index 98%
rename from luxonis_train/nodes/resnet.py
rename to luxonis_train/nodes/backbones/resnet.py
index 3f810100..e4228410 100644
--- a/luxonis_train/nodes/resnet.py
+++ b/luxonis_train/nodes/backbones/resnet.py
@@ -8,7 +8,7 @@
 import torchvision
 from torch import Tensor, nn
 
-from .base_node import BaseNode
+from ..base_node import BaseNode
 
 
 class ResNet(BaseNode[Tensor, list[Tensor]]):
diff --git a/luxonis_train/nodes/rexnetv1.py b/luxonis_train/nodes/backbones/rexnetv1.py
similarity index 99%
rename from luxonis_train/nodes/rexnetv1.py
rename to luxonis_train/nodes/backbones/rexnetv1.py
index 181ad325..6d23857e 100644
--- a/luxonis_train/nodes/rexnetv1.py
+++ b/luxonis_train/nodes/backbones/rexnetv1.py
@@ -7,11 +7,10 @@
 import torch
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import ConvModule
 from luxonis_train.utils.general import make_divisible
 
-from .base_node import BaseNode
-
 
 class ReXNetV1_lite(BaseNode[Tensor, list[Tensor]]):
     def __init__(
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 3b549e0a..9db45316 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -19,6 +19,8 @@
 ForwardOutputT = TypeVar("ForwardOutputT")
 ForwardInputT = TypeVar("ForwardInputT")
 
+__all__ = ["BaseNode"]
+
 
 class BaseNode(
     nn.Module,
diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py
new file mode 100644
index 00000000..28b5e8ca
--- /dev/null
+++ b/luxonis_train/nodes/heads/__init__.py
@@ -0,0 +1,15 @@
+from .bisenet_head import BiSeNetHead
+from .classification_head import ClassificationHead
+from .efficient_bbox_head import EfficientBBoxHead
+from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead
+from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
+from .segmentation_head import SegmentationHead
+
+__all__ = [
+    "BiSeNetHead",
+    "ClassificationHead",
+    "EfficientBBoxHead",
+    "EfficientKeypointBBoxHead",
+    "ImplicitKeypointBBoxHead",
+    "SegmentationHead",
+]
diff --git a/luxonis_train/nodes/bisenet_head.py b/luxonis_train/nodes/heads/bisenet_head.py
similarity index 96%
rename from luxonis_train/nodes/bisenet_head.py
rename to luxonis_train/nodes/heads/bisenet_head.py
index 54128cad..3fef7584 100644
--- a/luxonis_train/nodes/bisenet_head.py
+++ b/luxonis_train/nodes/heads/bisenet_head.py
@@ -7,12 +7,11 @@
 
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import ConvModule
 from luxonis_train.utils.general import infer_upscale_factor
 from luxonis_train.utils.types import LabelType, Packet
 
-from .base_node import BaseNode
-
 
 class BiSeNetHead(BaseNode[Tensor, Tensor]):
     in_height: int
diff --git a/luxonis_train/nodes/classification_head.py b/luxonis_train/nodes/heads/classification_head.py
similarity index 94%
rename from luxonis_train/nodes/classification_head.py
rename to luxonis_train/nodes/heads/classification_head.py
index d33faeb5..07b3d72b 100644
--- a/luxonis_train/nodes/classification_head.py
+++ b/luxonis_train/nodes/heads/classification_head.py
@@ -1,9 +1,8 @@
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.utils.types import LabelType
 
-from .base_node import BaseNode
-
 
 class ClassificationHead(BaseNode[Tensor, Tensor]):
     in_channels: int
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py
similarity index 99%
rename from luxonis_train/nodes/efficient_bbox_head.py
rename to luxonis_train/nodes/heads/efficient_bbox_head.py
index e80ca31a..5607a2a8 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_bbox_head.py
@@ -9,6 +9,7 @@
 import torch
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import EfficientDecoupledBlock
 from luxonis_train.utils.boxutils import (
     anchors_for_fpn_features,
@@ -17,8 +18,6 @@
 )
 from luxonis_train.utils.types import LabelType, Packet
 
-from .base_node import BaseNode
-
 
 class EfficientBBoxHead(
     BaseNode[list[Tensor], tuple[list[Tensor], list[Tensor], list[Tensor]]]
diff --git a/luxonis_train/nodes/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py
similarity index 100%
rename from luxonis_train/nodes/efficient_keypoint_bbox_head.py
rename to luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
similarity index 99%
rename from luxonis_train/nodes/implicit_keypoint_bbox_head.py
rename to luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
index 79e3fb79..0ca995c5 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
@@ -5,6 +5,7 @@
 import torch
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv
 from luxonis_train.utils.boxutils import (
     non_max_suppression,
@@ -13,8 +14,6 @@
 )
 from luxonis_train.utils.types import LabelType, Packet
 
-from .base_node import BaseNode
-
 logger = logging.getLogger(__name__)
 
 
diff --git a/luxonis_train/nodes/segmentation_head.py b/luxonis_train/nodes/heads/segmentation_head.py
similarity index 96%
rename from luxonis_train/nodes/segmentation_head.py
rename to luxonis_train/nodes/heads/segmentation_head.py
index cddfe8db..1b29df7b 100644
--- a/luxonis_train/nodes/segmentation_head.py
+++ b/luxonis_train/nodes/heads/segmentation_head.py
@@ -7,12 +7,11 @@
 import torch.nn as nn
 from torch import Tensor
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import UpBlock
 from luxonis_train.utils.general import infer_upscale_factor
 from luxonis_train.utils.types import LabelType
 
-from .base_node import BaseNode
-
 
 class SegmentationHead(BaseNode[Tensor, Tensor]):
     in_height: int
diff --git a/luxonis_train/nodes/necks/__init__.py b/luxonis_train/nodes/necks/__init__.py
new file mode 100644
index 00000000..eef2e9a0
--- /dev/null
+++ b/luxonis_train/nodes/necks/__init__.py
@@ -0,0 +1,3 @@
+from .reppan_neck import RepPANNeck
+
+__all__ = ["RepPANNeck"]
diff --git a/luxonis_train/nodes/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck.py
similarity index 99%
rename from luxonis_train/nodes/reppan_neck.py
rename to luxonis_train/nodes/necks/reppan_neck.py
index 26fed274..bd05f083 100644
--- a/luxonis_train/nodes/reppan_neck.py
+++ b/luxonis_train/nodes/necks/reppan_neck.py
@@ -10,11 +10,10 @@
 
 from torch import Tensor, nn
 
+from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import RepDownBlock, RepUpBlock
 from luxonis_train.utils.general import make_divisible
 
-from .base_node import BaseNode
-
 
 class RepPANNeck(BaseNode[list[Tensor], list[Tensor]]):
     def __init__(
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 12876e69..8e21255a 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">78%</text>
-        <text x="80" y="14">78%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">84%</text>
+        <text x="80" y="14">84%</text>
     </g>
 </svg>
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index 1185bb8d..c6d3bf09 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -35,9 +35,17 @@ def clear_output():
 
 
 @pytest.mark.parametrize(
-    "config_file", [str(path) for path in Path("configs").glob("*model*")]
+    "config_file",
+    [
+        "classification_model",
+        "segmentation_model",
+        "detection_model",
+        "keypoint_bbox_model",
+        "resnet_model",
+    ],
 )
 def test_simple_models(config_file: str):
+    config_file = f"configs/{config_file}.yaml"
     model = LuxonisModel(config_file, opts=OPTS)
     model.train()
     model.test()

From 356e5ebf6b602509b687e2a05e347d0c34179551 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 20 Aug 2024 18:47:18 +0200
Subject: [PATCH 52/75] Support for Multi-Split Views (#62)

---
 configs/README.md                          | 21 +++++++++----------
 luxonis_train/__main__.py                  | 19 +++++++++++------
 luxonis_train/core/core.py                 | 24 ++++++++++++++--------
 luxonis_train/core/utils/export_utils.py   |  6 ++++--
 luxonis_train/utils/config.py              | 15 ++++++++++----
 luxonis_train/utils/loaders/base_loader.py | 12 +++++------
 6 files changed, 59 insertions(+), 38 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index bf37317c..a85d5221 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -125,17 +125,16 @@ You can configure it like this:
 
 To store and load the data we use LuxonisDataset and LuxonisLoader. For specific config parameters refer to [LuxonisML](https://github.com/luxonis/luxonis-ml).
 
-| Key            | Type                                     | Default value       | Description                                    |
-| -------------- | ---------------------------------------- | ------------------- | ---------------------------------------------- |
-| name           | str \| None                              | None                | name of the dataset                            |
-| id             | str \| None                              | None                | id of the dataset                              |
-| team_id        | str \| None                              | None                | team under which you can find all datasets     |
-| bucket_type    | Literal\["intenal", "external"\]         | internal            | type of underlying storage                     |
-| bucket_storage | Literal\["local", "s3", "gcc", "azure"\] | BucketStorage.LOCAL | underlying object storage for a bucket         |
-| train_view     | str                                      | train               | view to use for training                       |
-| val_view       | str                                      | val                 | view to use for validation                     |
-| test_view      | str                                      | test                | view to use for testing                        |
-| json_mode      | bool                                     | False               | load using JSON annotations instead of MongoDB |
+| Key            | Type                                     | Default value       | Description                                |
+| -------------- | ---------------------------------------- | ------------------- | ------------------------------------------ |
+| name           | str \| None                              | None                | name of the dataset                        |
+| id             | str \| None                              | None                | id of the dataset                          |
+| team_id        | str \| None                              | None                | team under which you can find all datasets |
+| bucket_type    | Literal\["intenal", "external"\]         | internal            | type of underlying storage                 |
+| bucket_storage | Literal\["local", "s3", "gcc", "azure"\] | BucketStorage.LOCAL | underlying object storage for a bucket     |
+| train_view     | str \| list\[str\]                       | train               | splits to use for training                 |
+| val_view       | str \| list\[str\]                       | val                 | splits to use for validation               |
+| test_view      | str \| list\[str\]                       | test                | splits to use for testing                  |
 
 ## Trainer
 
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 2b7f3ed3..cdc66954 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -1,11 +1,18 @@
 import tempfile
+from enum import Enum
 from importlib.metadata import version
 from pathlib import Path
 from typing import Annotated, Optional
 
 import typer
 import yaml
-from luxonis_ml.enums import SplitType
+
+
+class _ViewType(str, Enum):
+    TRAIN = "train"
+    VAL = "val"
+    TEST = "test"
+
 
 app = typer.Typer(
     help="Luxonis Train CLI",
@@ -31,7 +38,7 @@
     ),
 ]
 
-ViewType = Annotated[SplitType, typer.Option(help="Which dataset view to use.")]
+ViewType = Annotated[_ViewType, typer.Option(help="Which dataset view to use.")]
 
 SaveDirType = Annotated[
     Optional[Path],
@@ -55,7 +62,7 @@ def train(
 
 @app.command()
 def test(
-    config: ConfigType = None, view: ViewType = SplitType.VAL, opts: OptsType = None
+    config: ConfigType = None, view: ViewType = _ViewType.VAL, opts: OptsType = None
 ):
     """Evaluate model."""
     from luxonis_train.core import LuxonisModel
@@ -82,7 +89,7 @@ def export(config: ConfigType = None, opts: OptsType = None):
 @app.command()
 def infer(
     config: ConfigType = None,
-    view: ViewType = SplitType.VAL,
+    view: ViewType = _ViewType.VAL,
     save_dir: SaveDirType = None,
     opts: OptsType = None,
 ):
@@ -96,7 +103,7 @@ def infer(
 def inspect(
     config: ConfigType = None,
     view: Annotated[
-        SplitType,
+        str,
         typer.Option(
             ...,
             "--view",
@@ -132,7 +139,7 @@ def inspect(
 
         lxml_inspect(
             name=cfg.loader.params["dataset_name"],
-            view=view,
+            view=[view],
             aug_config=f.name,
         )
 
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 44e254db..b6aa03f2 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -3,7 +3,7 @@
 import threading
 from logging import getLogger
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 import lightning.pytorch as pl
 import lightning_utilities.core.rank_zero as rank_zero_module
@@ -138,6 +138,14 @@ def __init__(
             )
             for view in ["train", "val", "test"]
         }
+
+        for name, loader in self.loaders.items():
+            logger.info(
+                f"{name.capitalize()} loader - splits: {loader.view}, size: {len(loader)}"
+            )
+            if len(loader) == 0:
+                logger.warning(f"{name.capitalize()} loader is empty!")
+
         sampler = None
         # TODO: implement weighted sampler
         if self.cfg.trainer.use_weighted_sampler:
@@ -335,19 +343,17 @@ def export(
             if self.cfg.exporter.upload_url is not None:
                 LuxonisFileSystem.upload(f.name, self.cfg.exporter.upload_url)
 
-    def test(self, new_thread: bool = False, view: str | None = None) -> None:
+    def test(
+        self, new_thread: bool = False, view: Literal["train", "test", "val"] = "val"
+    ) -> None:
         """Runs testing.
 
         @type new_thread: bool
-        @param new_thread: Runs testing in new thread if set to True.
-        @type view: str | None
-        @param view: Which split to run the tests on. If unset, the value in
-            C{loader.test_view} will be used. Valid values are: 'train', 'val', 'test'.
-            Defauls to None.
+        @param new_thread: Runs testing in a new thread if set to True.
+        @type view: Literal["train", "test", "val"]
+        @param view: Which view to run the testing on. Defauls to "val".
         """
 
-        view = view or self.cfg.loader.test_view
-
         if view not in self.pytorch_loaders:
             raise ValueError(
                 f"View {view} is not valid. Valid views are: 'train', 'val', 'test'."
diff --git a/luxonis_train/core/utils/export_utils.py b/luxonis_train/core/utils/export_utils.py
index f44b5d1a..3b34a912 100644
--- a/luxonis_train/core/utils/export_utils.py
+++ b/luxonis_train/core/utils/export_utils.py
@@ -1,5 +1,6 @@
 import logging
 from contextlib import contextmanager
+from pathlib import Path
 
 import luxonis_train
 from luxonis_train.utils.config import Config, ExportConfig
@@ -9,12 +10,13 @@
 
 @contextmanager
 def replace_weights(
-    module: "luxonis_train.models.LuxonisLightningModule", weights: str | None = None
+    module: "luxonis_train.models.LuxonisLightningModule",
+    weights: str | Path | None = None,
 ):
     old_weights = None
     if weights is not None:
         old_weights = module.state_dict()
-        module.load_checkpoint(weights)
+        module.load_checkpoint(str(weights))
 
     yield
 
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index dfa427b5..d1f29ac1 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -9,7 +9,7 @@
     LuxonisConfig,
     LuxonisFileSystem,
 )
-from pydantic import Field, model_validator
+from pydantic import Field, field_validator, model_validator
 from pydantic.types import FilePath, NonNegativeFloat, NonNegativeInt, PositiveInt
 from typing_extensions import Self
 
@@ -155,11 +155,18 @@ class TrackerConfig(BaseModelExtraForbid):
 class LoaderConfig(BaseModelExtraForbid):
     name: str = "LuxonisLoaderTorch"
     image_source: str = "image"
-    train_view: str = "train"
-    val_view: str = "val"
-    test_view: str = "test"
+    train_view: list[str] = ["train"]
+    val_view: list[str] = ["val"]
+    test_view: list[str] = ["test"]
     params: Params = {}
 
+    @field_validator("train_view", "val_view", "test_view", mode="before")
+    @classmethod
+    def validate_splits(cls, splits: Any) -> list[Any]:
+        if isinstance(splits, str):
+            return [splits]
+        return splits
+
 
 class NormalizeAugmentationConfig(BaseModelExtraForbid):
     active: bool = True
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index 8d8c4090..e18d7f5e 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -25,11 +25,11 @@ class BaseLoaderTorch(
 
     def __init__(
         self,
-        view: str,
+        view: str | list[str],
         augmentations: Augmentations | None = None,
         image_source: str | None = None,
     ):
-        self.view = view
+        self.view = view if isinstance(view, list) else [view]
         self.augmentations = augmentations
         self._image_source = image_source
 
@@ -50,25 +50,25 @@ def input_shapes(self) -> dict[str, Size]:
         Shape of each loader group (sub-element), WITHOUT batch dimension.
         Examples:
 
-        1. Single image input:
+        1. Single image input::
             {
                 'image': torch.Size([3, 224, 224]),
             }
 
-        2. Image and segmentation input:
+        2. Image and segmentation input::
             {
                 'image': torch.Size([3, 224, 224]),
                 'segmentation': torch.Size([1, 224, 224]),
             }
 
-        3. Left image, right image and disparity input:
+        3. Left image, right image and disparity input::
             {
                 'left': torch.Size([3, 224, 224]),
                 'right': torch.Size([3, 224, 224]),
                 'disparity': torch.Size([1, 224, 224]),
             }
 
-        4. Image, keypoints, and point cloud input:
+        4. Image, keypoints, and point cloud input::
             {
                 'image': torch.Size([3, 224, 224]),
                 'keypoints': torch.Size([17, 2]),

From 9c8683bc89f41166d2ab20846da5e85b78a65936 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 21 Aug 2024 13:11:37 +0200
Subject: [PATCH 53/75] Option to Parse Data in `LuxonisLoaderTorch` (#63)

---
 .gitignore                                    |   1 +
 .../attached_modules/metrics/common.py        |  25 +++-
 luxonis_train/core/core.py                    |  14 +-
 luxonis_train/utils/config.py                 |  16 +++
 .../utils/loaders/luxonis_loader_torch.py     | 131 ++++++++++++++++--
 tests/configs/segmentation_parse_loader.yaml  |  27 ++++
 tests/integration/test_sanity.py              |   8 ++
 7 files changed, 200 insertions(+), 22 deletions(-)
 create mode 100644 tests/configs/segmentation_parse_loader.yaml

diff --git a/.gitignore b/.gitignore
index 1f2a2381..7f182cf4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,3 +153,4 @@ mlruns
 wandb
 tests/_data
 tests/integration/_test-output
+data
diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py
index 340ad444..97e8a7ec 100644
--- a/luxonis_train/attached_modules/metrics/common.py
+++ b/luxonis_train/attached_modules/metrics/common.py
@@ -14,15 +14,26 @@ def __init__(self, **kwargs):
         super().__init__(node=kwargs.pop("node", None))
         task = kwargs.get("task")
 
-        if task is None:
-            if self.node.n_classes > 1:
-                task = "multiclass"
-            else:
-                task = "binary"
+        if self.node.n_classes > 1:
+            if task == "binary":
+                raise ValueError(
+                    f"Task type set to '{task}', but the dataset has more than 1 class. "
+                    f"Set the `task` parameter for {self.name} to either 'multiclass' or 'multilabel'."
+                )
+            task = "multiclass"
+        else:
+            if task == "multiclass":
+                raise ValueError(
+                    f"Task type set to '{task}', but the dataset has only 1 class. "
+                    f"Set the `task` parameter for {self.name} to 'binary'."
+                )
+            task = "binary"
+        if "task" not in kwargs:
             logger.warning(
-                f"Task type not specified for {self.name}, assuming '{task}'."
+                f"Task type not specified for {self.name}, assuming '{task}'. "
+                "If this is not correct, please set the `task` parameter explicitly."
             )
-            kwargs["task"] = task
+        kwargs["task"] = task
         self._task = task
 
         if self._task == "multiclass":
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index b6aa03f2..d3b00a45 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -22,7 +22,7 @@
 from luxonis_train.models import LuxonisLightningModule
 from luxonis_train.utils.config import Config
 from luxonis_train.utils.general import DatasetMetadata
-from luxonis_train.utils.loaders import collate_fn
+from luxonis_train.utils.loaders import BaseLoaderTorch, collate_fn
 from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.tracker import LuxonisTrackerPL
 
@@ -121,8 +121,14 @@ def __init__(
             callbacks=LuxonisProgressBar(),
         )
 
-        self.loaders = {
-            view: LOADERS.get(self.cfg.loader.name)(
+        self.loaders: dict[str, BaseLoaderTorch] = {}
+        for view in ["train", "val", "test"]:
+            loader_name = self.cfg.loader.name
+            Loader = LOADERS.get(loader_name)
+            if loader_name == "LuxonisLoaderTorch" and view != "train":
+                self.cfg.loader.params["delete_existing"] = False
+
+            self.loaders[view] = Loader(
                 augmentations=(
                     self.train_augmentations
                     if view == "train"
@@ -136,8 +142,6 @@ def __init__(
                 image_source=self.cfg.loader.image_source,
                 **self.cfg.loader.params,
             )
-            for view in ["train", "val", "test"]
-        }
 
         for name, loader in self.loaders.items():
             logger.info(
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index d1f29ac1..c1dffe47 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -3,6 +3,7 @@
 from typing import Annotated, Any, Literal, TypeAlias
 
 from luxonis_ml.data import LabelType
+from luxonis_ml.enums import DatasetType
 from luxonis_ml.utils import (
     BaseModelExtraForbid,
     Environ,
@@ -167,6 +168,21 @@ def validate_splits(cls, splits: Any) -> list[Any]:
             return [splits]
         return splits
 
+    @model_validator(mode="after")
+    def validate_params(self) -> Self:
+        dataset_type = self.params.get("dataset_type")
+        if dataset_type is None:
+            return self
+        dataset_type = dataset_type.upper()
+
+        if dataset_type not in DatasetType.__members__:
+            raise ValueError(
+                f"Dataset type '{dataset_type}' not supported."
+                f"Supported types are: {', '.join(DatasetType.__members__)}."
+            )
+        self.params["dataset_type"] = DatasetType(dataset_type.lower())
+        return self
+
 
 class NormalizeAugmentationConfig(BaseModelExtraForbid):
     active: bool = True
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index c353c36e..4a8b505e 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -1,29 +1,97 @@
-from typing import Literal
+import logging
+from pathlib import Path
+from typing import Literal, cast
 
 import numpy as np
-from luxonis_ml.data import BucketStorage, BucketType, LuxonisDataset, LuxonisLoader
+from luxonis_ml.data import (
+    Augmentations,
+    BucketStorage,
+    BucketType,
+    LuxonisDataset,
+    LuxonisLoader,
+)
+from luxonis_ml.data.parsers import LuxonisParser
+from luxonis_ml.enums import DatasetType
 from torch import Size, Tensor
+from typeguard import typechecked
 
 from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput
 
+logger = logging.getLogger(__name__)
+
 
 class LuxonisLoaderTorch(BaseLoaderTorch):
+    @typechecked
     def __init__(
         self,
-        dataset_name: str,
+        dataset_name: str | None = None,
+        dataset_dir: str | None = None,
+        dataset_type: DatasetType | None = None,
         team_id: str | None = None,
         bucket_type: Literal["internal", "external"] = "internal",
         bucket_storage: Literal["local", "s3", "gcs", "azure"] = "local",
         stream: bool = False,
+        delete_existing: bool = True,
+        view: str | list[str] = "train",
+        augmentations: Augmentations | None = None,
         **kwargs,
     ):
-        super().__init__(**kwargs)
-        self.dataset = LuxonisDataset(
-            dataset_name=dataset_name,
-            team_id=team_id,
-            bucket_type=BucketType(bucket_type),
-            bucket_storage=BucketStorage(bucket_storage),
-        )
+        """Torch-compatible loader for Luxonis datasets.
+
+        Can either use an already existing dataset or parse a new one from a directory.
+
+        @type dataset_name: str | None
+        @param dataset_name: Name of the dataset to load. If not provided, the
+            C{dataset_dir} argument must be provided instead. If both C{dataset_dir} and
+            C{dataset_name} are provided, the dataset will be parsed from the directory
+            and saved with the provided name.
+        @type dataset_dir: str | None
+        @param dataset_dir: Path to the dataset directory. It can be either a local path
+            or a URL. The data can be in a zip file. If not provided, C{dataset_name} of
+            an existing dataset must be provided.
+        @type dataset_type: str | None
+        @param dataset_type: Type of the dataset. Only relevant when C{dataset_dir} is
+            provided. If not provided, the type will be inferred from the directory
+            structure.
+        @type team_id: str | None
+        @param team_id: Optional unique team identifier for the cloud.
+        @type bucket_type: Literal["internal", "external"]
+        @param bucket_type: Type of the bucket. Only relevant for remote datasets.
+            Defaults to 'internal'.
+        @type bucket_storage: Literal["local", "s3", "gcs", "azure"]
+        @param bucket_storage: Type of the bucket storage. Defaults to 'local'.
+        @type stream: bool
+        @param stream: Flag for data streaming. Defaults to C{False}.
+        @type delete_existing: bool
+        @param delete_existing: Only relevant when C{dataset_dir} is provided. By
+            default, the dataset is parsed again every time the loader is created
+            because the underlying data might have changed. If C{delete_existing} is set
+            to C{False} and a dataset of the same name already exists, the existing
+            dataset will be used instead of re-parsing the data.
+        @type view: str | list[str]
+        @param view: A single split or a list of splits that will be used to create a
+            view of the dataset. Each split is a string that represents a subset of the
+            dataset. The available splits depend on the dataset, but usually include
+            'train', 'val', and 'test'. Defaults to 'train'.
+        @type augmentations: Augmentations | None
+        @param augmentations: Augmentations to apply to the data. Defaults to C{None}.
+        """
+        super().__init__(view=view, augmentations=augmentations, **kwargs)
+        if dataset_dir is not None:
+            self.dataset = self._parse_dataset(
+                dataset_dir, dataset_name, dataset_type, delete_existing
+            )
+        else:
+            if dataset_name is None:
+                raise ValueError(
+                    "Either `dataset_dir` or `dataset_name` must be provided."
+                )
+            self.dataset = LuxonisDataset(
+                dataset_name=dataset_name,
+                team_id=team_id,
+                bucket_type=BucketType(bucket_type),
+                bucket_storage=BucketStorage(bucket_storage),
+            )
         self.base_loader = LuxonisLoader(
             dataset=self.dataset,
             view=self.view,
@@ -57,3 +125,46 @@ def get_classes(self) -> dict[str, list[str]]:
     def get_n_keypoints(self) -> dict[str, int]:
         skeletons = self.dataset.get_skeletons()
         return {task: len(skeletons[task][0]) for task in skeletons}
+
+    def _parse_dataset(
+        self,
+        dataset_dir: str,
+        dataset_name: str | None,
+        dataset_type: DatasetType | None,
+        delete_existing: bool,
+    ) -> LuxonisDataset:
+        if dataset_name is None:
+            dataset_name = Path(dataset_dir).stem
+            if dataset_type is not None:
+                dataset_name += f"_{dataset_type.value}"
+
+        if LuxonisDataset.exists(dataset_name):
+            if not delete_existing:
+                return LuxonisDataset(dataset_name=dataset_name)
+            else:
+                logger.warning(
+                    f"Dataset {dataset_name} already exists. "
+                    "The dataset will be generated again to ensure the latest data are used. "
+                    "If you don't want to regenerate the dataset every time, set `delete_existing=False`'"
+                )
+
+        if dataset_type is None:
+            logger.warning(
+                "Dataset type is not set. "
+                "Attempting to infer it from the directory structure. "
+                "If this fails, please set the dataset type manually. "
+                f"Supported types are: {', '.join(DatasetType.__members__)}."
+            )
+
+        logger.info(f"Parsing dataset from {dataset_dir} with name '{dataset_name}'")
+
+        return cast(
+            LuxonisDataset,
+            LuxonisParser(
+                dataset_dir,
+                dataset_name=dataset_name,
+                dataset_type=dataset_type,
+                save_dir="data",
+                delete_existing=True,
+            ).parse(),
+        )
diff --git a/tests/configs/segmentation_parse_loader.yaml b/tests/configs/segmentation_parse_loader.yaml
new file mode 100644
index 00000000..60f7a30d
--- /dev/null
+++ b/tests/configs/segmentation_parse_loader.yaml
@@ -0,0 +1,27 @@
+# Example configuration for training a predefined segmentation model
+
+model:
+  name: parse_loader_test
+  predefined_model:
+    name: SegmentationModel
+    params:
+      backbone: MicroNet
+      task: multiclass
+
+loader:
+  params:
+    dataset_dir: gs://luxonis-test-bucket/luxonis-ml-test-data/D2_Tile.png-mask-semantic.zip
+    dataset_name: _parse_loader_test_dataset
+
+trainer:
+  preprocessing:
+    train_image_size: [&height 128, &width 128]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  batch_size: 4
+  epochs: &epochs 1
+  num_workers: 4
+  validation_interval: 1
+  num_log_images: 8
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index c6d3bf09..b94da85c 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -42,6 +42,8 @@ def clear_output():
         "detection_model",
         "keypoint_bbox_model",
         "resnet_model",
+        "coco_model",
+        "efficient_coco_model",
     ],
 )
 def test_simple_models(config_file: str):
@@ -99,6 +101,12 @@ def test_custom_tasks(parking_lot_dataset: LuxonisDataset):
     del model
 
 
+def test_parsing_loader():
+    model = LuxonisModel("tests/configs/segmentation_parse_loader.yaml")
+    model.train()
+    del model
+
+
 @pytest.mark.skipif(sys.platform == "win32", reason="Tuning not supported on Windows")
 def test_tuner():
     model = LuxonisModel("configs/example_tuning.yaml", opts=OPTS)

From 3c2b2ae083e28ea2d832621b441b672ae5fa92ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 22 Aug 2024 18:29:15 +0200
Subject: [PATCH 54/75] Fix Archiver (#64)

---
 .github/workflows/tests.yaml              |   3 +-
 luxonis_train/core/core.py                |  20 +-
 luxonis_train/core/utils/archive_utils.py | 153 ++++++----
 requirements-dev.txt                      |   1 +
 tests/configs/parking_lot_config.yaml     |   7 +-
 tests/integration/conftest.py             |  24 +-
 tests/integration/parking_lot.json        | 351 ++++++++++++++++++++++
 tests/integration/test_sanity.py          |  47 ++-
 8 files changed, 503 insertions(+), 103 deletions(-)
 create mode 100644 tests/integration/parking_lot.json

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 8b176add..a0999d9b 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -14,6 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest]
+        version: ['3.10']
 
     runs-on: ${{ matrix.os }}
 
@@ -26,7 +27,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: '3.10'
+        python-version: ${{ matrix.version }}
         cache: pip
 
     - name: Install dependencies [Ubuntu]
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index d3b00a45..d2e4b7b4 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -561,7 +561,6 @@ def archive(self, path: str | Path | None = None) -> Path:
         archive_save_directory.mkdir(parents=True, exist_ok=True)
         inputs = []
         outputs = []
-        heads = []
 
         if path is None:
             if "onnx" not in self._exported_models:
@@ -585,32 +584,33 @@ def _mult(lst: list[float | int]) -> list[float]:
         }
 
         inputs_dict = get_inputs(path)
-        for input_name in inputs_dict:
+        for input_name, metadata in inputs_dict.items():
             inputs.append(
                 {
                     "name": input_name,
-                    "dtype": inputs_dict[input_name]["dtype"],
-                    "shape": inputs_dict[input_name]["shape"],
-                    "layout": inputs_dict[input_name]["layout"],
+                    "dtype": metadata["dtype"],
+                    "shape": metadata["shape"],
                     "preprocessing": preprocessing,
                     "input_type": "image",
                 }
             )
 
         outputs_dict = get_outputs(path)
-        for output_name in outputs_dict:
+        for output_name, metadata in outputs_dict.items():
             outputs.append(
-                {"name": output_name, "dtype": outputs_dict[output_name]["dtype"]}
+                {
+                    "name": output_name,
+                    "dtype": metadata["dtype"],
+                    "shape": metadata["shape"],
+                }
             )
 
-        heads_dict = get_heads(
+        heads = get_heads(
             self.cfg,
             outputs,
             self.loaders["train"].get_classes(),
             self.lightning_module.nodes,  # type: ignore
         )
-        for head_name in heads_dict:
-            heads.append(heads_dict[head_name])
 
         model = {
             "metadata": {
diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py
index 6f85f4a3..72cdefc7 100644
--- a/luxonis_train/core/utils/archive_utils.py
+++ b/luxonis_train/core/utils/archive_utils.py
@@ -1,7 +1,14 @@
+import logging
+from collections import defaultdict
 from pathlib import Path
+from typing import TypedDict
 
 import onnx
-from luxonis_ml.nn_archive.config_building_blocks import ObjectDetectionSubtypeYOLO
+from luxonis_ml.nn_archive.config_building_blocks import (
+    DataType,
+    ObjectDetectionSubtypeYOLO,
+)
+from onnx.onnx_pb import TensorProto
 
 from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.enums.head_categorization import (
@@ -10,8 +17,15 @@
 )
 from luxonis_train.utils.config import Config
 
+logger = logging.getLogger(__name__)
+
+
+class MetadataDict(TypedDict):
+    shape: list[int]
+    dtype: DataType
+
 
-def get_inputs(path: Path):
+def get_inputs(path: Path) -> dict[str, MetadataDict]:
     """Get inputs of a model executable.
 
     @type path: Path
@@ -19,43 +33,14 @@ def get_inputs(path: Path):
     """
 
     if path.suffix == ".onnx":
-        return _get_onnx_inputs(str(path))
+        return _get_onnx_inputs(path)
     else:
         raise NotImplementedError(
             f"Missing input reading function for {path.suffix} models."
         )
 
 
-def _get_onnx_inputs(path: str) -> dict:
-    """Get inputs of an ONNX model executable.
-
-    @type path: str
-    @param path: Path to model executable file.
-    """
-
-    inputs_dict = {}
-    model = onnx.load(path)
-    for input in model.graph.input:
-        tensor_type = input.type.tensor_type
-        dtype_idx = tensor_type.elem_type
-        dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
-        shape = []
-        for d in tensor_type.shape.dim:
-            if d.HasField("dim_value"):
-                shape.append(d.dim_value)
-            else:
-                raise ValueError("Unsupported input dimension identifier type")
-        if shape[1] == 3:
-            layout = "NCHW"
-        elif shape[3] == 3:
-            layout = "NHWC"
-        else:
-            raise ValueError("Unknown input layout")
-        inputs_dict[input.name] = {"dtype": dtype, "shape": shape, "layout": layout}
-    return inputs_dict
-
-
-def get_outputs(path: Path) -> dict:
+def get_outputs(path: Path) -> dict[str, MetadataDict]:
     """Get outputs of a model executable.
 
     @type path: Path
@@ -63,28 +48,59 @@ def get_outputs(path: Path) -> dict:
     """
 
     if path.suffix == ".onnx":
-        return _get_onnx_outputs(str(path))
+        return _get_onnx_outputs(path)
     else:
         raise NotImplementedError(
             f"Missing input reading function for {path.suffix} models."
         )
 
 
-def _get_onnx_outputs(path: str) -> dict:
-    """Get outputs of an ONNX model executable.
+def _from_onnx_dtype(dtype: int) -> DataType:
+    dtype_map = {
+        TensorProto.INT8: "int8",
+        TensorProto.INT32: "int32",
+        TensorProto.UINT8: "uint8",
+        TensorProto.FLOAT: "float32",
+        TensorProto.FLOAT16: "float16",
+    }
+    if dtype not in dtype_map:
+        raise ValueError(f"Unsupported ONNX data type: `{dtype}`")
+
+    return DataType(dtype_map[dtype])
+
+
+def _load_onnx_model(onnx_path: Path) -> onnx.ModelProto:
+    try:
+        return onnx.load(str(onnx_path))
+    except Exception as e:
+        raise ValueError(f"Failed to load ONNX model: `{onnx_path}`") from e
 
-    @type executable_path: str
-    @param executable_path: Path to model executable file.
-    """
 
-    outputs_dict = {}
-    model = onnx.load(path)
+def _get_onnx_outputs(onnx_path: Path) -> dict[str, MetadataDict]:
+    model = _load_onnx_model(onnx_path)
+    outputs: dict[str, MetadataDict] = defaultdict(dict)  # type: ignore
+
     for output in model.graph.output:
-        tensor_type = output.type.tensor_type
-        dtype_idx = tensor_type.elem_type
-        dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
-        outputs_dict[output.name] = {"dtype": dtype}
-    return outputs_dict
+        shape = [dim.dim_value for dim in output.type.tensor_type.shape.dim]
+        outputs[output.name]["shape"] = shape
+        outputs[output.name]["dtype"] = _from_onnx_dtype(
+            output.type.tensor_type.elem_type
+        )
+
+    return outputs
+
+
+def _get_onnx_inputs(onnx_path: Path) -> dict[str, MetadataDict]:
+    model = _load_onnx_model(onnx_path)
+
+    inputs: dict[str, MetadataDict] = defaultdict(dict)  # type: ignore
+
+    for inp in model.graph.input:
+        shape = [dim.dim_value for dim in inp.type.tensor_type.shape.dim]
+        inputs[inp.name]["shape"] = shape
+        inputs[inp.name]["dtype"] = _from_onnx_dtype(inp.type.tensor_type.elem_type)
+
+    return inputs
 
 
 def _get_classes(
@@ -150,24 +166,43 @@ def _get_head_specific_parameters(
     return parameters
 
 
-def _get_head_outputs(outputs: list[dict], head_name: str) -> list[str]:
+def _get_head_outputs(outputs: list[dict], head_name: str, head_type: str) -> list[str]:
     """Get model outputs in a head-specific format.
 
+    @type outputs: list[dict]
+    @param outputs: List of NN Archive outputs.
     @type head_name: str
-    @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
+    @param head_name: Type of the head (e.g. 'EfficientBBoxHead') or its custom alias.
+    @type head_type: str
+    @param head_name: Type of the head (e.g. 'EfficientBBoxHead').
     @rtype: list[str]
     @return: List of output names.
     """
 
-    if head_name == "ClassificationHead":
+    output_names = []
+    for output in outputs:
+        name = output["name"].split("/")[0]
+        if name == head_name:
+            output_names.append(output["name"])
+
+    if output_names:
+        return output_names
+
+    # TODO: Fix this, will require refactoring custom ONNX output names
+    logger.error(
+        "ONNX model uses custom output names, trying to determine outputs based on the head type. "
+        "This will likely result in incorrect archive for multi-head models."
+    )
+
+    if head_type == "ClassificationHead":
         return [outputs[0]["name"]]
-    elif head_name == "EfficientBBoxHead":
+    elif head_type == "EfficientBBoxHead":
         return [output["name"] for output in outputs]
-    elif head_name in ["SegmentationHead", "BiSeNetHead"]:
+    elif head_type in ["SegmentationHead", "BiSeNetHead"]:
         return [outputs[0]["name"]]
-    elif head_name == "ImplicitKeypointBBoxHead":
+    elif head_type == "ImplicitKeypointBBoxHead":
         return [outputs[0]["name"]]
-    elif head_name == "EfficientKeypointBBoxHead":
+    elif head_type == "EfficientKeypointBBoxHead":
         return [outputs[0]["name"]]
     else:
         raise ValueError("Unknown head name")
@@ -178,7 +213,7 @@ def get_heads(
     outputs: list[dict],
     class_dict: dict[str, list[str]],
     nodes: dict[str, BaseNode],
-) -> dict[str, dict]:
+) -> list[dict]:
     """Get model heads.
 
     @type cfg: Config
@@ -190,7 +225,7 @@ def get_heads(
     @type nodes: dict[str, BaseNode]
     @param nodes: Dictionary of nodes.
     """
-    heads_dict = {}
+    heads = []
 
     for node in cfg.model.nodes:
         node_name = node.name
@@ -200,10 +235,10 @@ def get_heads(
                 parser = getattr(ImplementedHeads, node_name).value
                 task = node.task
                 if isinstance(task, dict):
-                    task = str(next(iter(task)))
+                    task = str(next(iter(task.values())))
 
                 classes = _get_classes(node_name, task, class_dict)
-                head_outputs = _get_head_outputs(outputs, node_name)
+                head_outputs = _get_head_outputs(outputs, node_alias, node_name)
                 head_dict = {
                     "parser": parser,
                     "metadata": {
@@ -215,5 +250,5 @@ def get_heads(
                 head_dict["metadata"].update(
                     _get_head_specific_parameters(nodes, node_name, node_alias)
                 )
-                heads_dict[node_name] = head_dict
-    return heads_dict
+                heads.append(head_dict)
+    return heads
diff --git a/requirements-dev.txt b/requirements-dev.txt
index a919d265..7f915575 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,3 +3,4 @@ gdown>=4.2.0
 pre-commit>=3.2.1
 opencv-stubs>=0.0.8
 pytest-cov>=4.1.0
+pytest-subtests>=0.12.1
diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml
index 53760045..ae9f8069 100644
--- a/tests/configs/parking_lot_config.yaml
+++ b/tests/configs/parking_lot_config.yaml
@@ -213,12 +213,7 @@ trainer:
           p: 0.1
 
   callbacks:
-    - name: LearningRateMonitor
-      params:
-        logging_interval: step
-    - name: MetadataLogger
-      params:
-        hyperparams: ["trainer.epochs", trainer.batch_size]
     - name: ExportOnTrainEnd
     - name: TestOnTrainEnd
+    - name: ArchiveOnTrainEnd
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 22a3e108..9b24271b 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,6 +1,4 @@
 import json
-import os
-import os.path as osp
 from collections import defaultdict
 from pathlib import Path
 
@@ -164,26 +162,22 @@ def generator():
 def create_coco_dataset():
     dataset_name = "coco_test"
     url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
-    output_folder = "../data/"
-    output_zip = osp.join(output_folder, "COCO_people_subset.zip")
+    output_zip = WORK_DIR / "COCO_people_subset.zip"
 
-    if not osp.exists(output_folder):
-        os.makedirs(output_folder)
+    if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
+        gdown.download(url, str(output_zip), quiet=False)
 
-    if not osp.exists(output_zip) and not osp.exists(
-        osp.join(output_folder, "COCO_people_subset")
-    ):
-        gdown.download(url, output_zip, quiet=False)
-
-    parser = LuxonisParser(output_zip, dataset_name=dataset_name, delete_existing=True)
+    parser = LuxonisParser(
+        str(output_zip), dataset_name=dataset_name, delete_existing=True
+    )
     parser.parse(random_split=True)
 
 
 @pytest.fixture(scope="session", autouse=True)
 def create_cifar10_dataset():
     dataset = LuxonisDataset("cifar10_test", delete_existing=True)
-    output_folder = "../data/cifar10"
-    os.makedirs(output_folder, exist_ok=True)
+    output_folder = WORK_DIR / "cifar10"
+    output_folder.mkdir(parents=True, exist_ok=True)
     cifar10_torch = torchvision.datasets.CIFAR10(
         root=output_folder, train=False, download=True
     )
@@ -204,7 +198,7 @@ def CIFAR10_subset_generator():
         for i, (image, label) in enumerate(cifar10_torch):  # type: ignore
             if i == 1000:
                 break
-            path = osp.join(output_folder, f"cifar_{i}.png")
+            path = output_folder / f"cifar_{i}.png"
             image.save(path)
             yield {
                 "file": path,
diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json
new file mode 100644
index 00000000..d9599642
--- /dev/null
+++ b/tests/integration/parking_lot.json
@@ -0,0 +1,351 @@
+{
+    "config_version": "1.0",
+    "model": {
+        "metadata": {
+            "name": "parking_lot_model",
+            "path": "parking_lot_model.onnx",
+            "precision": "float32"
+        },
+        "inputs": [
+            {
+                "name": "image",
+                "dtype": "float32",
+                "input_type": "image",
+                "shape": [
+                    1,
+                    3,
+                    256,
+                    320
+                ],
+                "layout": "NCHW",
+                "preprocessing": {
+                    "mean": [
+                        123.675,
+                        116.28,
+                        103.53
+                    ],
+                    "scale": [
+                        58.395,
+                        57.12,
+                        57.375
+                    ],
+                    "reverse_channels": true,
+                    "interleaved_to_planar": false
+                }
+            }
+        ],
+        "outputs": [
+            {
+                "name": "any-vehicle-segmentation-head/vehicle_segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    1,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "bbox-head/boundingbox/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    7,
+                    32,
+                    40
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "bbox-head/boundingbox/1",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    7,
+                    16,
+                    20
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "bbox-head/boundingbox/2",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    7,
+                    8,
+                    10
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "brand-segmentation-head/brand_segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    23,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "car-detection-head/boxes_and_keypoints/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    66240,
+                    24
+                ],
+                "layout": "NCD"
+            },
+            {
+                "name": "color-segmentation-head/color_segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    4,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "context-brand-segmentation-head/brand_segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    23,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "motorbike-detection-head/outputs/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    14,
+                    32,
+                    40
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "motorbike-detection-head/outputs/1",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    14,
+                    16,
+                    20
+                ],
+                "layout": "NCHW"
+            },
+            {
+                "name": "motorbike-detection-head/outputs/2",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    14,
+                    8,
+                    10
+                ],
+                "layout": "NCDE"
+            },
+            {
+                "name": "vehicle-type-segmentation-head/vehicle_type_segmentation/0",
+                "dtype": "float32",
+                "shape": [
+                    1,
+                    3,
+                    256,
+                    320
+                ],
+                "layout": "NCHW"
+            }
+        ],
+        "heads": [
+            {
+                "parser": "YoloDetectionNetwork",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "motorbike",
+                        "car"
+                    ],
+                    "n_classes": 2,
+                    "iou_threshold": 0.45,
+                    "conf_threshold": 0.25,
+                    "max_det": 300,
+                    "anchors": null,
+                    "subtype": "yolov6"
+                },
+                "outputs": [
+                    "bbox-head/boundingbox/0",
+                    "bbox-head/boundingbox/1",
+                    "bbox-head/boundingbox/2"
+                ]
+            },
+            {
+                "parser": "YoloDetectionNetwork",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "car"
+                    ],
+                    "n_classes": 1,
+                    "iou_threshold": 0.45,
+                    "conf_threshold": 0.25,
+                    "max_det": 300,
+                    "subtype": "yolov7",
+                    "n_keypoints": 6
+                },
+                "outputs": [
+                    "car-detection-head/boxes_and_keypoints/0"
+                ]
+            },
+            {
+                "parser": "YoloDetectionNetwork",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "motorbike"
+                    ],
+                    "n_classes": 1,
+                    "iou_threshold": 0.45,
+                    "conf_threshold": 0.25,
+                    "max_det": 300,
+                    "anchors": null,
+                    "n_keypoints": 3
+                },
+                "outputs": [
+                    "motorbike-detection-head/outputs/0",
+                    "motorbike-detection-head/outputs/1",
+                    "motorbike-detection-head/outputs/2"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "background",
+                        "chrysler",
+                        "bmw",
+                        "ducati",
+                        "dodge",
+                        "ferrari",
+                        "infiniti",
+                        "land-rover",
+                        "roll-royce",
+                        "saab",
+                        "Kawasaki",
+                        "moto",
+                        "truimph",
+                        "alfa-romeo",
+                        "harley",
+                        "honda",
+                        "jeep",
+                        "aprilia",
+                        "piaggio",
+                        "yamaha",
+                        "buick",
+                        "pontiac",
+                        "isuzu"
+                    ],
+                    "n_classes": 23,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "context-brand-segmentation-head/brand_segmentation/0"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "background",
+                        "blue",
+                        "green",
+                        "red"
+                    ],
+                    "n_classes": 4,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "color-segmentation-head/color_segmentation/0"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "vehicle"
+                    ],
+                    "n_classes": 1,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "any-vehicle-segmentation-head/vehicle_segmentation/0"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "background",
+                        "chrysler",
+                        "bmw",
+                        "ducati",
+                        "dodge",
+                        "ferrari",
+                        "infiniti",
+                        "land-rover",
+                        "roll-royce",
+                        "saab",
+                        "Kawasaki",
+                        "moto",
+                        "truimph",
+                        "alfa-romeo",
+                        "harley",
+                        "honda",
+                        "jeep",
+                        "aprilia",
+                        "piaggio",
+                        "yamaha",
+                        "buick",
+                        "pontiac",
+                        "isuzu"
+                    ],
+                    "n_classes": 23,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "brand-segmentation-head/brand_segmentation/0"
+                ]
+            },
+            {
+                "parser": "SegmentationParser",
+                "metadata": {
+                    "postprocessor_path": null,
+                    "classes": [
+                        "background",
+                        "car",
+                        "motorbike"
+                    ],
+                    "n_classes": 3,
+                    "is_softmax": false
+                },
+                "outputs": [
+                    "vehicle-type-segmentation-head/vehicle_type_segmentation/0"
+                ]
+            }
+        ]
+    }
+}
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index b94da85c..cf7af8aa 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -1,5 +1,8 @@
+import json
 import shutil
 import sys
+import tarfile
+from copy import deepcopy
 from pathlib import Path
 
 import pytest
@@ -24,10 +27,15 @@
 }
 
 
+@pytest.fixture(scope="session", autouse=True)
+def manage_out_dir():
+    shutil.rmtree(TEST_OUTPUT, ignore_errors=True)
+    TEST_OUTPUT.mkdir(exist_ok=True)
+
+
 @pytest.fixture(scope="function", autouse=True)
-def clear_output():
+def clear_files():
     Config.clear_instance()
-    shutil.rmtree(TEST_OUTPUT, ignore_errors=True)
     yield
     STUDY_PATH.unlink(missing_ok=True)
     ONNX_PATH.unlink(missing_ok=True)
@@ -86,18 +94,33 @@ def test_multi_input():
     del model
 
 
-def test_custom_tasks(parking_lot_dataset: LuxonisDataset):
+def test_custom_tasks(parking_lot_dataset: LuxonisDataset, subtests):
     config_file = "tests/configs/parking_lot_config.yaml"
-    model = LuxonisModel(
-        config_file,
-        opts=OPTS
-        | {
-            "loader.params.dataset_name": parking_lot_dataset.dataset_name,
-            "trainer.batch_size": 2,
-        },
-    )
+    opts = deepcopy(OPTS) | {
+        "loader.params.dataset_name": parking_lot_dataset.dataset_name,
+        "trainer.batch_size": 2,
+    }
+    del opts["trainer.callbacks"]
+    model = LuxonisModel(config_file, opts=opts)
     model.train()
-    assert model.archive().exists()
+    archive_path = Path(
+        model.run_save_dir, "archive", model.cfg.model.name
+    ).with_suffix(".onnx.tar.xz")
+    correct_archive_config = json.loads(
+        Path("tests/integration/parking_lot.json").read_text()
+    )
+
+    with subtests.test("test_archive"):
+        assert archive_path.exists()
+        with tarfile.open(archive_path) as tar:
+            extracted_cfg = tar.extractfile("config.json")
+
+            assert extracted_cfg is not None, "Config JSON not found in the archive."
+            generated_config = json.loads(extracted_cfg.read().decode())
+
+        del generated_config["model"]["heads"][1]["metadata"]["anchors"]
+        assert generated_config == correct_archive_config
+
     del model
 
 

From 54fc1449b69c978b1bb2afdaca963d161b166434 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 27 Aug 2024 10:46:12 -0400
Subject: [PATCH 55/75] Generalized Progress Bar (#66)

---
 luxonis_train/callbacks/__init__.py           |  10 +-
 .../callbacks/luxonis_progress_bar.py         | 190 ++++++++++--------
 luxonis_train/core/core.py                    |  12 +-
 luxonis_train/models/luxonis_lightning.py     |   9 +-
 luxonis_train/utils/config.py                 |   1 +
 requirements.txt                              |   3 +-
 6 files changed, 136 insertions(+), 89 deletions(-)

diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index 84d2d1cf..4c7f7824 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -11,7 +11,11 @@
 from .archive_on_train_end import ArchiveOnTrainEnd
 from .export_on_train_end import ExportOnTrainEnd
 from .gpu_stats_monitor import GPUStatsMonitor
-from .luxonis_progress_bar import LuxonisProgressBar
+from .luxonis_progress_bar import (
+    BaseLuxonisProgressBar,
+    LuxonisRichProgressBar,
+    LuxonisTQDMProgressBar,
+)
 from .metadata_logger import MetadataLogger
 from .module_freezer import ModuleFreezer
 from .test_on_train_end import TestOnTrainEnd
@@ -27,7 +31,9 @@
 __all__ = [
     "ArchiveOnTrainEnd",
     "ExportOnTrainEnd",
-    "LuxonisProgressBar",
+    "LuxonisTQDMProgressBar",
+    "LuxonisRichProgressBar",
+    "BaseLuxonisProgressBar",
     "MetadataLogger",
     "ModuleFreezer",
     "TestOnTrainEnd",
diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py
index bed449f2..d14fcf08 100644
--- a/luxonis_train/callbacks/luxonis_progress_bar.py
+++ b/luxonis_train/callbacks/luxonis_progress_bar.py
@@ -1,35 +1,16 @@
+from abc import ABC, abstractmethod
 from collections.abc import Mapping
 
 import lightning.pytorch as pl
-from lightning.pytorch.callbacks import RichProgressBar
+import tabulate
+from lightning.pytorch.callbacks import ProgressBar, RichProgressBar, TQDMProgressBar
 from rich.console import Console
 from rich.table import Table
 
 from luxonis_train.utils.registry import CALLBACKS
 
 
-@CALLBACKS.register_module()
-class LuxonisProgressBar(RichProgressBar):
-    """Custom rich text progress bar based on RichProgressBar from Pytorch Lightning."""
-
-    _console: Console
-
-    def __init__(self):
-        super().__init__(leave=True)
-
-    def print_single_line(self, text: str, style: str = "magenta") -> None:
-        """Prints single line of text to the console.
-
-        @type text: str
-        @param text: Text to print.
-        @type style: str
-        @param style: Style of the text. Defaults to C{"magenta"}.
-        """
-        if self._check_console():
-            self._console.print(f"[{style}]{text}[/{style}]")
-        else:
-            print(text)
-
+class BaseLuxonisProgressBar(ABC, ProgressBar):
     def get_metrics(
         self, trainer: pl.Trainer, pl_module: pl.LightningModule
     ) -> dict[str, int | str | float | dict[str, float]]:
@@ -40,22 +21,48 @@ def get_metrics(
             items["Loss"] = pl_module.training_step_outputs[-1]["loss"].item()
         return items
 
-    def _check_console(self) -> bool:
-        """Checks if console is set.
+    @abstractmethod
+    def print_results(
+        self,
+        stage: str,
+        loss: float,
+        metrics: Mapping[str, Mapping[str, int | str | float]],
+    ) -> None:
+        """Prints results to the console.
+
+        This includes the stage name, loss value, and tables with metrics.
 
-        @rtype: bool
-        @return: True if console is set, False otherwise.
+        @type stage: str
+        @param stage: Stage name.
+        @type loss: float
+        @param loss: Loss value.
+        @type metrics: Mapping[str, Mapping[str, int | str | float]]
+        @param metrics: Metrics in format {table_name: table}.
         """
-        return self._console is not None
+        pass
 
-    def print_table(
+
+@CALLBACKS.register_module()
+class LuxonisTQDMProgressBar(TQDMProgressBar, BaseLuxonisProgressBar):
+    """Custom text progress bar based on TQDMProgressBar from Pytorch Lightning."""
+
+    def __init__(self):
+        super().__init__(leave=True)
+
+    def _rule(self, title: str | None = None) -> None:
+        if title is not None:
+            print(f"------{title}-----")
+        else:
+            print("-----------------")
+
+    def _print_table(
         self,
         title: str,
         table: Mapping[str, int | str | float],
         key_name: str = "Name",
         value_name: str = "Value",
     ) -> None:
-        """Prints table to the console using rich text.
+        """Prints table to the console using tabulate.
 
         @type title: str
         @param title: Title of the table
@@ -66,35 +73,78 @@ def print_table(
         @type value_name: str
         @param value_name: Name of the value column. Defaults to C{"Value"}.
         """
-        if self._check_console():
-            rich_table = Table(
-                title=title,
-                show_header=True,
-                header_style="bold magenta",
+        self._rule(title)
+        print(
+            tabulate.tabulate(
+                table.items(),
+                headers=[key_name, value_name],
+                tablefmt="fancy_grid",
+                numalign="right",
             )
-            rich_table.add_column(key_name, style="magenta")
-            rich_table.add_column(value_name, style="white")
-            for name, value in table.items():
-                if isinstance(value, float):
-                    rich_table.add_row(name, f"{value:.5f}")
-                else:
-                    rich_table.add_row(name, str(value))
-            self._console.print(rich_table)
-        else:
-            print(f"------{title}-----")
-            for name, value in table.items():
-                print(f"{name}: {value}")
+        )
+        print()
+
+    def print_results(
+        self,
+        stage: str,
+        loss: float,
+        metrics: Mapping[str, Mapping[str, int | str | float]],
+    ) -> None:
+        self._rule(stage)
+        print(f"Loss: {loss}")
+        print("Metrics:")
+        for table_name, table in metrics.items():
+            self._print_table(table_name, table)
+        self._rule()
+
+
+@CALLBACKS.register_module()
+class LuxonisRichProgressBar(RichProgressBar, BaseLuxonisProgressBar):
+    """Custom rich text progress bar based on RichProgressBar from Pytorch Lightning."""
+
+    def __init__(self):
+        super().__init__(leave=True)
+
+    @property
+    def console(self) -> Console:
+        if self._console is None:
+            raise RuntimeError(
+                "Console is not initialized for the `LuxonisRichProgressBar`. "
+                "Consider setting `tracker.use_rich_progress_bar` to `False` in the configuration."
+            )
+        return self._console
 
-    def print_tables(
-        self, tables: Mapping[str, Mapping[str, int | str | float]]
+    def print_table(
+        self,
+        title: str,
+        table: Mapping[str, int | str | float],
+        key_name: str = "Name",
+        value_name: str = "Value",
     ) -> None:
-        """Prints multiple tables to the console using rich text.
+        """Prints table to the console using rich text.
 
-        @type tables: Mapping[str, Mapping[str, int | str | float]]
-        @param tables: Tables to print in format {table_name: table}.
+        @type title: str
+        @param title: Title of the table
+        @type table: Mapping[str, int | str | float]
+        @param table: Table to print
+        @type key_name: str
+        @param key_name: Name of the key column. Defaults to C{"Name"}.
+        @type value_name: str
+        @param value_name: Name of the value column. Defaults to C{"Value"}.
         """
-        for table_name, table in tables.items():
-            self.print_table(table_name, table)
+        rich_table = Table(
+            title=title,
+            show_header=True,
+            header_style="bold magenta",
+        )
+        rich_table.add_column(key_name, style="magenta")
+        rich_table.add_column(value_name, style="white")
+        for name, value in table.items():
+            if isinstance(value, float):
+                rich_table.add_row(name, f"{value:.5f}")
+            else:
+                rich_table.add_row(name, str(value))
+        self.console.print(rich_table)
 
     def print_results(
         self,
@@ -102,29 +152,9 @@ def print_results(
         loss: float,
         metrics: Mapping[str, Mapping[str, int | str | float]],
     ) -> None:
-        """Prints results to the console using rich text.
-
-        @type stage: str
-        @param stage: Stage name.
-        @type loss: float
-        @param loss: Loss value.
-        @type metrics: Mapping[str, Mapping[str, int | str | float]]
-        @param metrics: Metrics in format {table_name: table}.
-        """
-        if self._check_console():
-            self._console.rule(f"{stage}", style="bold magenta")
-            self._console.print(
-                f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]"
-            )
-            self._console.print("[bold magenta]Metrics:[/bold magenta]")
-            self.print_tables(metrics)
-            self._console.rule(style="bold magenta")
-        else:
-            print(f"------{stage}-----")
-            print(f"Loss: {loss}")
-
-            for node_name, node_metrics in metrics.items():
-                for metric_name, metric_value in node_metrics.items():
-                    print(
-                        f"{stage} metric: {node_name}/{metric_name}: {metric_value:.4f}"
-                    )
+        self.console.rule(f"{stage}", style="bold magenta")
+        self.console.print(f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]")
+        self.console.print("[bold magenta]Metrics:[/bold magenta]")
+        for table_name, table in metrics.items():
+            self.print_table(table_name, table)
+        self.console.rule(style="bold magenta")
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index d2e4b7b4..e7bf35a2 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -18,7 +18,7 @@
 from luxonis_ml.utils import LuxonisFileSystem, reset_logging, setup_logging
 
 from luxonis_train.attached_modules.visualizers import get_unnormalized_images
-from luxonis_train.callbacks import LuxonisProgressBar
+from luxonis_train.callbacks import LuxonisRichProgressBar, LuxonisTQDMProgressBar
 from luxonis_train.models import LuxonisLightningModule
 from luxonis_train.utils.config import Config
 from luxonis_train.utils.general import DatasetMetadata
@@ -118,7 +118,9 @@ def __init__(
             self.cfg,
             logger=self.tracker,
             deterministic=deterministic,
-            callbacks=LuxonisProgressBar(),
+            callbacks=LuxonisRichProgressBar()
+            if self.cfg.trainer.use_rich_progress_bar
+            else LuxonisTQDMProgressBar(),
         )
 
         self.loaders: dict[str, BaseLoaderTorch] = {}
@@ -441,7 +443,11 @@ def _objective(trial: optuna.trial.Trial) -> float:
                 input_shapes=self.loaders["train"].input_shapes,
                 _core=self,
             )
-            callbacks = [LuxonisProgressBar()]
+            callbacks = [
+                LuxonisRichProgressBar()
+                if cfg.trainer.use_rich_progress_bar
+                else LuxonisTQDMProgressBar()
+            ]
 
             pruner_callback = PyTorchLightningPruningCallback(trial, monitor="val/loss")
             callbacks.append(pruner_callback)
diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py
index 3147ffe1..a3671dac 100644
--- a/luxonis_train/models/luxonis_lightning.py
+++ b/luxonis_train/models/luxonis_lightning.py
@@ -22,7 +22,10 @@
     combine_visualizations,
     get_unnormalized_images,
 )
-from luxonis_train.callbacks import LuxonisProgressBar, ModuleFreezer
+from luxonis_train.callbacks import (
+    BaseLuxonisProgressBar,
+    ModuleFreezer,
+)
 from luxonis_train.nodes import BaseNode
 from luxonis_train.utils.config import AttachedModuleConfig, Config
 from luxonis_train.utils.general import DatasetMetadata, to_shape_packet, traverse_graph
@@ -815,8 +818,8 @@ def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict:
         )
 
     @property
-    def _progress_bar(self) -> LuxonisProgressBar:
-        return cast(LuxonisProgressBar, self._trainer.progress_bar_callback)
+    def _progress_bar(self) -> BaseLuxonisProgressBar:
+        return cast(BaseLuxonisProgressBar, self._trainer.progress_bar_callback)
 
     @rank_zero_only
     def _print_results(
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index c1dffe47..3739c61e 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -242,6 +242,7 @@ class SchedulerConfig(BaseModelExtraForbid):
 
 class TrainerConfig(BaseModelExtraForbid):
     preprocessing: PreprocessingConfig = PreprocessingConfig()
+    use_rich_progress_bar: bool = True
 
     accelerator: Literal["auto", "cpu", "gpu", "tpu"] = "auto"
     devices: int | list[int] | str = "auto"
diff --git a/requirements.txt b/requirements.txt
index 42834ae6..8bec2286 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 blobconverter>=1.4.2
-lightning>=2.0.0
+lightning>=2.4.0
 #luxonis-ml[all]>=0.1.0
 luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
 onnx>=1.12.0
@@ -17,3 +17,4 @@ torchvision>=0.16.0
 typer>=0.9.0
 mlflow>=2.10.0
 psutil>=5.0.0
+tabulate>=0.9.0

From 19a5d2be26fa02039b2b69029ddf40a6f578de5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 27 Aug 2024 10:51:27 -0400
Subject: [PATCH 56/75] Fail-Proof Checkpoint Usage in Callbacks (#65)

---
 luxonis_train/__main__.py                     |  3 +
 .../callbacks/archive_on_train_end.py         | 16 ++----
 .../callbacks/export_on_train_end.py          | 19 +++----
 luxonis_train/callbacks/needs_checkpoint.py   | 56 +++++++++++++++++++
 luxonis_train/core/core.py                    |  4 ++
 luxonis_train/utils/config.py                 | 33 +++++++++--
 6 files changed, 103 insertions(+), 28 deletions(-)
 create mode 100644 luxonis_train/callbacks/needs_checkpoint.py

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index cdc66954..454e9525 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -6,6 +6,9 @@
 
 import typer
 import yaml
+from luxonis_ml.utils import setup_logging
+
+setup_logging(use_rich=True)
 
 
 class _ViewType(str, Enum):
diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py
index 7d6da67f..d9e7b298 100644
--- a/luxonis_train/callbacks/archive_on_train_end.py
+++ b/luxonis_train/callbacks/archive_on_train_end.py
@@ -5,11 +5,13 @@
 import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
 
+from .needs_checkpoint import NeedsCheckpoint
+
 logger = logging.getLogger(__name__)
 
 
 @CALLBACKS.register_module()
-class ArchiveOnTrainEnd(pl.Callback):
+class ArchiveOnTrainEnd(NeedsCheckpoint):
     def on_train_end(
         self,
         _: pl.Trainer,
@@ -21,17 +23,11 @@ def on_train_end(
         @param trainer: Pytorch Lightning trainer.
         @type pl_module: L{pl.LightningModule}
         @param pl_module: Pytorch Lightning module.
-        @raises RuntimeError: If no best model path is found.
         """
 
-        best_model_path = pl_module.core.get_min_loss_checkpoint_path()
-        if not best_model_path:
-            logger.error(
-                "No best model path found. "
-                "Please make sure that ModelCheckpoint callback is present "
-                "and at least one validation epoch has been performed. "
-                "Skipping model archiving."
-            )
+        path = self.get_checkpoint(pl_module)
+        if path is None:
+            logger.warning("Skipping model archiving.")
             return
 
         onnx_path = pl_module.core._exported_models.get("onnx")
diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py
index 7e8f8a71..261c4ef6 100644
--- a/luxonis_train/callbacks/export_on_train_end.py
+++ b/luxonis_train/callbacks/export_on_train_end.py
@@ -5,11 +5,13 @@
 import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
 
+from .needs_checkpoint import NeedsCheckpoint
+
 logger = logging.getLogger(__name__)
 
 
 @CALLBACKS.register_module()
-class ExportOnTrainEnd(pl.Callback):
+class ExportOnTrainEnd(NeedsCheckpoint):
     def on_train_end(
         self,
         _: pl.Trainer,
@@ -21,17 +23,10 @@ def on_train_end(
         @param trainer: Pytorch Lightning trainer.
         @type pl_module: L{pl.LightningModule}
         @param pl_module: Pytorch Lightning module.
-        @raises RuntimeError: If no best model path is found.
         """
-
-        best_model_path = pl_module.core.get_best_metric_checkpoint_path()
-        if not best_model_path:
-            logger.error(
-                "No model checkpoint found. "
-                "Make sure that `ModelCheckpoint` callback is present "
-                "and at least one validation epoch has been performed. "
-                "Skipping model export."
-            )
+        path = self.get_checkpoint(pl_module)
+        if path is None:
+            logger.warning("Skipping model export.")
             return
 
-        pl_module.core.export(weights=best_model_path)
+        pl_module.core.export(weights=self.get_checkpoint(pl_module))
diff --git a/luxonis_train/callbacks/needs_checkpoint.py b/luxonis_train/callbacks/needs_checkpoint.py
new file mode 100644
index 00000000..30355e82
--- /dev/null
+++ b/luxonis_train/callbacks/needs_checkpoint.py
@@ -0,0 +1,56 @@
+import logging
+from typing import Literal
+
+import lightning.pytorch as pl
+
+import luxonis_train
+
+logger = logging.getLogger(__name__)
+
+
+class NeedsCheckpoint(pl.Callback):
+    def __init__(
+        self, preferred_checkpoint: Literal["metric", "loss"] = "metric", **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.preferred_checkpoint = preferred_checkpoint
+
+    @staticmethod
+    def _get_checkpoint(
+        checkpoint_type: str,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
+    ) -> str | None:
+        if checkpoint_type == "loss":
+            path = pl_module.core.get_min_loss_checkpoint_path()
+            if not path:
+                logger.error(
+                    "No checkpoint for minimum loss found. "
+                    "Make sure that `ModelCheckpoint` callback is present "
+                    "and at least one validation epoch has been performed."
+                )
+            return path
+        else:
+            path = pl_module.core.get_best_metric_checkpoint_path()
+            if not path:
+                logger.error(
+                    "No checkpoint for best metric found. "
+                    "Make sure that `ModelCheckpoint` callback is present, "
+                    "at least one validation epoch has been performed and "
+                    "the model has at least one metric."
+                )
+            return path
+
+    def _get_other_type(self, checkpoint_type: str) -> str:
+        if checkpoint_type == "loss":
+            return "metric"
+        return "loss"
+
+    def get_checkpoint(
+        self, pl_module: "luxonis_train.models.LuxonisLightningModule"
+    ) -> str | None:
+        path = self._get_checkpoint(self.preferred_checkpoint, pl_module)
+        if path is not None:
+            return path
+        other_checkpoint = self._get_other_type(self.preferred_checkpoint)
+        logger.info(f"Attempting to use {other_checkpoint} checkpoint.")
+        return self._get_checkpoint(other_checkpoint, pl_module)
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index e7bf35a2..2b1607ad 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -685,6 +685,8 @@ def get_min_loss_checkpoint_path(self) -> str | None:
         @rtype: str
         @return: Path to best checkpoint with respect to minimal validation loss
         """
+        if not self.pl_trainer.checkpoint_callbacks:
+            return None
         return self.pl_trainer.checkpoint_callbacks[0].best_model_path  # type: ignore
 
     @rank_zero_only
@@ -694,4 +696,6 @@ def get_best_metric_checkpoint_path(self) -> str | None:
         @rtype: str
         @return: Path to best checkpoint with respect to best validation metric
         """
+        if len(self.pl_trainer.checkpoint_callbacks) < 2:
+            return None
         return self.pl_trainer.checkpoint_callbacks[1].best_model_path  # type: ignore
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 3739c61e..44c00637 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -77,6 +77,27 @@ class ModelConfig(BaseModelExtraForbid):
     visualizers: list[AttachedModuleConfig] = []
     outputs: list[str] = []
 
+    @model_validator(mode="after")
+    def check_main_metric(self) -> Self:
+        for metric in self.metrics:
+            if metric.is_main_metric:
+                logger.info(f"Main metric: `{metric.name}`")
+                return self
+
+        logger.warning("No main metric specified.")
+        if self.metrics:
+            metric = self.metrics[0]
+            metric.is_main_metric = True
+            name = metric.alias or metric.name
+            logger.info(f"Setting '{name}' as main metric.")
+        else:
+            logger.error(
+                "No metrics specified. "
+                "This is likely unintended unless "
+                "the configuration is not used for training."
+            )
+        return self
+
     @model_validator(mode="after")
     def check_predefined_model(self) -> Self:
         from luxonis_train.utils.registry import MODELS
@@ -351,12 +372,12 @@ class TunerConfig(BaseModelExtraForbid):
 
 
 class Config(LuxonisConfig):
-    model: ModelConfig = ModelConfig()
-    loader: LoaderConfig = LoaderConfig()
-    tracker: TrackerConfig = TrackerConfig()
-    trainer: TrainerConfig = TrainerConfig()
-    exporter: ExportConfig = ExportConfig()
-    archiver: ArchiveConfig = ArchiveConfig()
+    model: Annotated[ModelConfig, Field(default_factory=ModelConfig)]
+    loader: Annotated[LoaderConfig, Field(default_factory=LoaderConfig)]
+    tracker: Annotated[TrackerConfig, Field(default_factory=TrackerConfig)]
+    trainer: Annotated[TrainerConfig, Field(default_factory=TrainerConfig)]
+    exporter: Annotated[ExportConfig, Field(default_factory=ExportConfig)]
+    archiver: Annotated[ArchiveConfig, Field(default_factory=ArchiveConfig)]
     tuner: TunerConfig | None = None
     ENVIRON: Environ = Field(Environ(), exclude=True)
 

From 8870968f8106e90ef085108f5dd5c3da48deb5d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 28 Aug 2024 15:20:39 -0400
Subject: [PATCH 57/75] LuxonisML 0.3.0 Updates (#68)

---
 luxonis_train/core/core.py                    |  3 +--
 luxonis_train/utils/loaders/base_loader.py    |  2 +-
 .../utils/loaders/luxonis_loader_torch.py     | 19 ++++++++-----------
 luxonis_train/utils/types.py                  | 10 +---------
 tests/integration/test_sanity.py              |  2 --
 5 files changed, 11 insertions(+), 25 deletions(-)

diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 2b1607ad..16953062 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -268,7 +268,7 @@ def thread_exception_hook(args):
             self.thread.start()
 
     def export(
-        self, onnx_save_path: str | None = None, *, weights: str | None = None
+        self, onnx_save_path: str | None = None, *, weights: str | Path | None = None
     ) -> None:
         """Runs export.
 
@@ -429,7 +429,6 @@ def _objective(trial: optuna.trial.Trial) -> float:
                 for a in cfg_copy.trainer.preprocessing.augmentations
                 if a.name != "Normalize"
             ]  # manually remove Normalize so it doesn't duplicate it when creating new cfg instance
-            Config.clear_instance()
             cfg = Config.get_config(cfg_copy.model_dump(), curr_params)
 
             child_tracker.log_hyperparams(curr_params)
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index e18d7f5e..5e884955 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -103,7 +103,7 @@ def get_classes(self) -> dict[str, list[str]]:
         @rtype: dict[LabelType, list[str]]
         @return: A dictionary mapping tasks to their classes.
         """
-        pass
+        ...
 
     def get_n_keypoints(self) -> dict[str, int] | None:
         """Returns the dictionary defining the semantic skeleton for each class using
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index 4a8b505e..328f87be 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -1,6 +1,6 @@
 import logging
 from pathlib import Path
-from typing import Literal, cast
+from typing import Literal
 
 import numpy as np
 from luxonis_ml.data import (
@@ -158,13 +158,10 @@ def _parse_dataset(
 
         logger.info(f"Parsing dataset from {dataset_dir} with name '{dataset_name}'")
 
-        return cast(
-            LuxonisDataset,
-            LuxonisParser(
-                dataset_dir,
-                dataset_name=dataset_name,
-                dataset_type=dataset_type,
-                save_dir="data",
-                delete_existing=True,
-            ).parse(),
-        )
+        return LuxonisParser(
+            dataset_dir,
+            dataset_name=dataset_name,
+            dataset_type=dataset_type,
+            save_dir="data",
+            delete_existing=True,
+        ).parse()
diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py
index 375ab565..84b8e019 100644
--- a/luxonis_train/utils/types.py
+++ b/luxonis_train/utils/types.py
@@ -45,15 +45,7 @@ def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str)
 class BaseProtocol(BaseModel):
     class Config:
         arbitrary_types_allowed = True
-
-    @classmethod
-    def get_task(cls) -> str:
-        if len(cls.__annotations__) == 1:
-            return list(cls.__annotations__)[0]
-        raise ValueError(
-            "Protocol must have exactly one field for automatic task inference. "
-            "Implement custom `prepare` method in your attached module."
-        )
+        extra = "forbid"
 
 
 class FeaturesProtocol(BaseProtocol):
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index cf7af8aa..5afa385b 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -10,7 +10,6 @@
 from multi_input_modules import *
 
 from luxonis_train.core import LuxonisModel
-from luxonis_train.utils.config import Config
 
 TEST_OUTPUT = Path("tests/integration/_test-output")
 INFER_PATH = Path("tests/integration/_infer_save_dir")
@@ -35,7 +34,6 @@ def manage_out_dir():
 
 @pytest.fixture(scope="function", autouse=True)
 def clear_files():
-    Config.clear_instance()
     yield
     STUDY_PATH.unlink(missing_ok=True)
     ONNX_PATH.unlink(missing_ok=True)

From 723dff190c27533baf64b11c4501df09858ff1f3 Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Sun, 1 Sep 2024 21:29:04 +0200
Subject: [PATCH 58/75] [Feat]: add obb head

---
 configs/obb_detection_model.yaml              |  40 +++
 .../attached_modules/losses/__init__.py       |   2 +
 .../losses/obb_detection_loss.py              | 334 ++++++++++++++++++
 .../metrics/mean_average_precision.py         |   2 +-
 .../visualizers/bbox_visualizer.py            |   2 +-
 .../models/predefined_models/__init__.py      |   2 +
 .../predefined_models/detection_model_obb.py  |  89 +++++
 luxonis_train/nodes/blocks/__init__.py        |   2 +
 luxonis_train/nodes/blocks/blocks.py          |  34 ++
 luxonis_train/nodes/heads/__init__.py         |   2 +
 .../nodes/heads/efficient_obbox_head.py       | 160 +++++++++
 luxonis_train/utils/assigners/__init__.py     |   4 +-
 luxonis_train/utils/assigners/tal_assigner.py | 141 +++++++-
 luxonis_train/utils/assigners/utils.py        |  54 ++-
 luxonis_train/utils/boxutils.py               | 149 ++++++++
 test_models.py                                |  25 ++
 16 files changed, 1033 insertions(+), 9 deletions(-)
 create mode 100644 configs/obb_detection_model.yaml
 create mode 100644 luxonis_train/attached_modules/losses/obb_detection_loss.py
 create mode 100644 luxonis_train/models/predefined_models/detection_model_obb.py
 create mode 100644 luxonis_train/nodes/heads/efficient_obbox_head.py
 create mode 100644 test_models.py

diff --git a/configs/obb_detection_model.yaml b/configs/obb_detection_model.yaml
new file mode 100644
index 00000000..719b3569
--- /dev/null
+++ b/configs/obb_detection_model.yaml
@@ -0,0 +1,40 @@
+# Example configuration for training a predefined obb (oriented bounding box) detection model
+
+model:
+  name: obb_detection
+  predefined_model:
+    name: OBBDetectionModel
+    params:
+      use_neck: True
+
+loader:
+  params:
+    dataset_name: obb_test
+    dataset_dir: "../dota8"
+    dataset_type: YOLOV6OBB
+
+trainer:
+  preprocessing:
+    train_image_size: [&height 256, &width 320]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  batch_size: 4
+  epochs: &epochs 200
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.02
+
+  scheduler:
+    name: ConstantLR
+
diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py
index 28585504..25f0b9e9 100644
--- a/luxonis_train/attached_modules/losses/__init__.py
+++ b/luxonis_train/attached_modules/losses/__init__.py
@@ -5,12 +5,14 @@
 from .efficient_keypoint_bbox_loss import EfficientKeypointBBoxLoss
 from .implicit_keypoint_bbox_loss import ImplicitKeypointBBoxLoss
 from .keypoint_loss import KeypointLoss
+from .obb_detection_loss import OBBDetectionLoss
 from .sigmoid_focal_loss import SigmoidFocalLoss
 from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss
 from .softmax_focal_loss import SoftmaxFocalLoss
 
 __all__ = [
     "AdaptiveDetectionLoss",
+    "OBBDetectionLoss",
     "BCEWithLogitsLoss",
     "CrossEntropyLoss",
     "EfficientKeypointBBoxLoss",
diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
new file mode 100644
index 00000000..9eec76b1
--- /dev/null
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -0,0 +1,334 @@
+from typing import Literal, cast
+
+import torch
+import torch.nn.functional as F
+from torch import Tensor, nn
+
+from luxonis_train.nodes.heads import EfficientOBBoxHead
+from luxonis_train.utils.assigners import RotatedTaskAlignedAssigner
+from luxonis_train.utils.boxutils import (
+    IoUType,
+    anchors_for_fpn_features,
+    bbox2dist,
+    dist2rbbox,
+    probiou,
+    xywh2xyxy,
+)
+from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
+
+from .base_loss import BaseLoss
+
+
+class OBBDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]):
+    node: EfficientOBBoxHead
+    supported_labels = [LabelType.OBOUNDINGBOX]
+
+    class NodePacket(Packet[Tensor]):
+        features: list[Tensor]
+        class_scores: Tensor
+        distributions: Tensor
+        angles: Tensor
+
+    def __init__(
+        self,
+        iou_type: IoUType = "giou",
+        reduction: Literal["sum", "mean"] = "mean",
+        class_loss_weight: float = 1.0,
+        iou_loss_weight: float = 2.5,
+        dfl_loss_weight: float = 1.0,
+        reg_max: int = 16,
+        **kwargs,
+    ):
+        """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications
+        <https://arxiv.org/pdf/2209.02976.pdf>}. It combines IoU based bbox regression loss and varifocal loss
+        for classification.
+        Code is adapted from U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models}.
+
+        @type n_warmup_epochs: int
+        @param n_warmup_epochs: Number of epochs where ATSS assigner is used, after that we switch to TAL assigner.
+        @type iou_type: L{IoUType}
+        @param iou_type: IoU type used for bbox regression loss.
+        @type reduction: Literal["sum", "mean"]
+        @param reduction: Reduction type for loss.
+        @type class_loss_weight: float
+        @param class_loss_weight: Weight of classification loss.
+        @type iou_loss_weight: float
+        @param iou_loss_weight: Weight of IoU loss.
+        @type kwargs: dict
+        @param kwargs: Additional arguments to pass to L{BaseLoss}.
+        """
+        super().__init__(**kwargs)
+
+        if not isinstance(self.node, EfficientOBBoxHead):
+            raise IncompatibleException(
+                f"Loss `{self.name}` is only "
+                "compatible with nodes of type `EfficientOBBoxHead`."
+            )
+        self.iou_type: IoUType = iou_type
+        self.reduction = reduction
+        self.n_classes = self.node.n_classes
+        self.stride = self.node.stride
+        self.grid_cell_size = self.node.grid_cell_size
+        self.grid_cell_offset = self.node.grid_cell_offset
+        self.original_img_size = self.node.original_in_shape[1:]
+        self.reg_max = reg_max
+
+        self.assigner = RotatedTaskAlignedAssigner(
+            n_classes=self.n_classes, topk=10, alpha=0.5, beta=6.0
+        )
+        # Bounding box loss
+        self.bbox_loss = RotatedBboxLoss(self.reg_max)
+        # Class loss
+        self.varifocal_loss = VarifocalLoss()
+        # self.bce = nn.BCEWithLogitsLoss(reduction="none")
+
+        # self.n_warmup_epochs = n_warmup_epochs
+        # self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes)
+        # self.tal_assigner = TaskAlignedAssigner(
+        #     topk=13, n_classes=self.n_classes, alpha=1.0, beta=6.0
+        # )
+
+        self.class_loss_weight = class_loss_weight
+        self.iou_loss_weight = iou_loss_weight
+        self.dfl_loss_weight = dfl_loss_weight
+
+        self.anchors = None
+        self.anchor_points = None
+        self.n_anchors_list = None
+        self.stride_tensor = None
+        self.gt_bboxes_scale = None
+
+    def prepare(
+        self, outputs: Packet[Tensor], labels: Labels
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+        feats = self.get_input_tensors(outputs, "features")
+        pred_scores = self.get_input_tensors(outputs, "class_scores")[0]
+        self.pred_distri = self.get_input_tensors(outputs, "distributions")[0]
+        pred_angles = self.get_input_tensors(outputs, "angles")[0]
+        batch_size = pred_scores.shape[0]
+        device = pred_scores.device
+
+        target = self.get_label(labels)[0]
+        if self.gt_bboxes_scale is None:
+            self.gt_bboxes_scale = torch.tensor(
+                [
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                ],
+                device=device,
+            )
+            (
+                self.anchors,
+                self.anchor_points,
+                self.n_anchors_list,
+                self.stride_tensor,
+            ) = anchors_for_fpn_features(
+                feats,
+                self.stride,
+                self.grid_cell_size,
+                self.grid_cell_offset,
+                multiply_with_stride=True,
+            )
+            self.anchor_points_strided = self.anchor_points / self.stride_tensor
+
+        target = self._preprocess_target(target, batch_size)
+        pred_bboxes = torch.cat(
+            (
+                dist2rbbox(self.pred_distri, pred_angles, self.anchor_points_strided),
+                pred_angles,
+            ),
+            dim=-1,
+        )
+
+        gt_labels = target[:, :, :1]
+        gt_cxcywh = target[:, :, 1:]
+        mask_gt = (gt_cxcywh.sum(-1, keepdim=True) > 0).float()
+
+        # TODO: log change of assigner (once common Logger)
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            mask_positive,
+            _,
+        ) = self.assigner(
+            pred_scores.detach(),
+            pred_bboxes.detach() * self.stride_tensor,
+            self.anchor_points,
+            gt_labels,
+            gt_cxcywh,
+            mask_gt,
+        )
+
+        return (
+            pred_bboxes,
+            pred_scores,
+            assigned_bboxes / self.stride_tensor,
+            assigned_labels,
+            assigned_scores,
+            mask_positive,
+        )
+
+    def forward(
+        self,
+        pred_bboxes: Tensor,
+        pred_scores: Tensor,
+        assigned_bboxes: Tensor,
+        assigned_labels: Tensor,
+        assigned_scores: Tensor,
+        mask_positive: Tensor,
+    ):
+        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1]
+
+        # CLS loss
+        loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label)
+        if assigned_scores.sum() > 1:
+            loss_cls /= assigned_scores.sum()
+
+        assigned_scores_sum = max(assigned_scores.sum(), 1)
+        # Bbox loss
+        self.bbox_loss = self.bbox_loss.to(self.pred_distri.device)
+        loss_iou, loss_dfl = self.bbox_loss(
+            self.pred_distri,
+            pred_bboxes,
+            self.anchor_points,
+            assigned_bboxes,
+            assigned_scores,
+            assigned_scores_sum,
+            mask_positive,
+        )
+
+        loss = (
+            self.class_loss_weight * loss_cls
+            + self.iou_loss_weight * loss_iou
+            + self.dfl_loss_weight * loss_dfl
+        )
+
+        sub_losses = {
+            "class": loss_cls.detach(),
+            "iou": loss_iou.detach(),
+            "dfl": loss_dfl.detach(),
+        }
+
+        return loss, sub_losses
+
+    def _preprocess_target(self, target: Tensor, batch_size: int):
+        """Preprocess target in shape [batch_size, N, 6] where N is maximum number of
+        instances in one image."""
+        sample_ids, counts = cast(
+            tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True)
+        )
+        c_max = int(counts.max()) if counts.numel() > 0 else 0
+        out_target = torch.zeros(batch_size, c_max, 6, device=target.device)
+        out_target[:, :, 0] = -1
+        for id, count in zip(sample_ids, counts):
+            out_target[id, :count] = target[target[:, 0] == id][:, 1:]
+
+        scaled_target = out_target[:, :, 1:5] * self.gt_bboxes_scale
+        # out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
+        out_target[..., 1:] = scaled_target
+        return out_target
+
+
+class VarifocalLoss(nn.Module):
+    def __init__(self, alpha: float = 0.75, gamma: float = 2.0):
+        """Varifocal Loss is a loss function for training a dense object detector to predict
+        the IoU-aware classification score, inspired by focal loss.
+        Code is adapted from: U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models/losses.py}
+
+        @type alpha: float
+        @param alpha: alpha parameter in focal loss, default is 0.75.
+        @type gamma: float
+        @param gamma: gamma parameter in focal loss, default is 2.0.
+        """
+
+        super().__init__()
+
+        self.alpha = alpha
+        self.gamma = gamma
+
+    def forward(
+        self, pred_score: Tensor, target_score: Tensor, label: Tensor
+    ) -> Tensor:
+        weight = (
+            self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label
+        )
+        ce_loss = F.binary_cross_entropy(
+            pred_score.float(), target_score.float(), reduction="none"
+        )
+        loss = (ce_loss * weight).sum()
+        return loss
+
+
+class DFLoss(nn.Module):
+    """Criterion class for computing DFL losses during training."""
+
+    def __init__(self, reg_max=16) -> None:
+        """Initialize the DFL module."""
+        super().__init__()
+        self.reg_max = reg_max
+
+    def __call__(self, pred_dist, target):
+        """Return sum of left and right DFL losses.
+
+        Distribution Focal Loss (DFL) proposed in Generalized Focal Loss
+        https://ieeexplore.ieee.org/document/9792391
+        """
+        target = target.clamp_(0, self.reg_max - 1 - 0.01)
+        tl = target.long()  # target left
+        tr = tl + 1  # target right
+        wl = tr - target  # weight left
+        wr = 1 - wl  # weight right
+        return (
+            F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape)
+            * wl
+            + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape)
+            * wr
+        ).mean(-1, keepdim=True)
+
+
+class RotatedBboxLoss(nn.Module):
+    """Criterion class for computing training losses during training."""
+
+    def __init__(self, reg_max):
+        """Initialize the BboxLoss module with regularization maximum and DFL
+        settings."""
+        super().__init__()
+        self.dfl_loss = DFLoss(reg_max) if reg_max > 1 else None
+
+    def forward(
+        self,
+        pred_dist,
+        pred_bboxes,
+        anchor_points,
+        target_bboxes,
+        target_scores,
+        target_scores_sum,
+        fg_mask,
+    ):
+        """IoU loss."""
+        weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
+        iou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
+        loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum
+
+        # DFL loss
+        if self.dfl_loss:
+            target_ltrb = bbox2dist(
+                xywh2xyxy(target_bboxes[..., :4]),
+                anchor_points,
+                self.dfl_loss.reg_max - 1,
+            )
+            loss_dfl = (
+                self.dfl_loss(
+                    pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max),
+                    target_ltrb[fg_mask],
+                )
+                * weight
+            )
+            loss_dfl = loss_dfl.sum() / target_scores_sum
+        else:
+            loss_dfl = torch.tensor(0.0).to(pred_dist.device)
+
+        return loss_iou, loss_dfl
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index ffdf5e22..d0642bff 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -15,7 +15,7 @@ class MeanAveragePrecision(BaseMetric):
     <https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html>}.
     """
 
-    supported_labels = [LabelType.BOUNDINGBOX]
+    supported_labels = [LabelType.BOUNDINGBOX, LabelType.OBOUNDINGBOX]
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
index df3ac933..d3c67dd6 100644
--- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
@@ -10,7 +10,7 @@
 
 
 class BBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
-    supported_labels = [LabelType.BOUNDINGBOX]
+    supported_labels = [LabelType.BOUNDINGBOX, LabelType.OBOUNDINGBOX]
 
     def __init__(
         self,
diff --git a/luxonis_train/models/predefined_models/__init__.py b/luxonis_train/models/predefined_models/__init__.py
index 0e8fe8c0..deb235e6 100644
--- a/luxonis_train/models/predefined_models/__init__.py
+++ b/luxonis_train/models/predefined_models/__init__.py
@@ -1,6 +1,7 @@
 from .base_predefined_model import BasePredefinedModel
 from .classification_model import ClassificationModel
 from .detection_model import DetectionModel
+from .detection_model_obb import OBBDetectionModel
 from .keypoint_detection_model import KeypointDetectionModel
 from .segmentation_model import SegmentationModel
 
@@ -8,6 +9,7 @@
     "BasePredefinedModel",
     "SegmentationModel",
     "DetectionModel",
+    "OBBDetectionModel",
     "KeypointDetectionModel",
     "ClassificationModel",
 ]
diff --git a/luxonis_train/models/predefined_models/detection_model_obb.py b/luxonis_train/models/predefined_models/detection_model_obb.py
new file mode 100644
index 00000000..e2b220f9
--- /dev/null
+++ b/luxonis_train/models/predefined_models/detection_model_obb.py
@@ -0,0 +1,89 @@
+from dataclasses import dataclass, field
+
+from luxonis_train.utils.config import (
+    AttachedModuleConfig,
+    LossModuleConfig,
+    MetricModuleConfig,
+    ModelNodeConfig,
+)
+from luxonis_train.utils.types import Kwargs
+
+from .base_predefined_model import BasePredefinedModel
+
+
+@dataclass
+class OBBDetectionModel(BasePredefinedModel):
+    use_neck: bool = True
+    backbone_params: Kwargs = field(default_factory=dict)
+    neck_params: Kwargs = field(default_factory=dict)
+    head_params: Kwargs = field(default_factory=dict)
+    loss_params: Kwargs = field(default_factory=dict)
+    visualizer_params: Kwargs = field(default_factory=dict)
+    task_name: str | None = None
+
+    @property
+    def nodes(self) -> list[ModelNodeConfig]:
+        nodes = [
+            ModelNodeConfig(
+                name="EfficientRep",
+                alias="detection_backbone",
+                freezing=self.backbone_params.pop("freezing", {}),
+                params=self.backbone_params,
+            ),
+        ]
+        if self.use_neck:
+            nodes.append(
+                ModelNodeConfig(
+                    name="RepPANNeck",
+                    alias="detection_neck",
+                    inputs=["detection_backbone"],
+                    freezing=self.neck_params.pop("freezing", {}),
+                    params=self.neck_params,
+                )
+            )
+
+        nodes.append(
+            ModelNodeConfig(
+                name="EfficientOBBoxHead",
+                alias="detection_obb_head",
+                freezing=self.head_params.pop("freezing", {}),
+                inputs=["detection_neck"] if self.use_neck else ["detection_backbone"],
+                params=self.head_params,
+                task=self.task_name,
+            )
+        )
+        return nodes
+
+    @property
+    def losses(self) -> list[LossModuleConfig]:
+        return [
+            LossModuleConfig(
+                name="OBBDetectionLoss",
+                alias="detection_obb_loss",
+                attached_to="detection_obb_head",
+                params=self.loss_params,
+                weight=1.0,
+            )
+        ]
+
+    @property
+    def metrics(self) -> list[MetricModuleConfig]:
+        return [
+            MetricModuleConfig(
+                name="MeanAveragePrecision",
+                alias="detection_map",
+                attached_to="detection_obb_head",
+                is_main_metric=True,
+            ),
+        ]
+
+    @property
+    def visualizers(self) -> list[AttachedModuleConfig]:
+        return [
+            AttachedModuleConfig(
+                name="BBoxVisualizer",
+                alias="detection_visualizer",
+                attached_to="detection_obb_head",
+                params=self.visualizer_params,
+            )
+        ]
diff --git a/luxonis_train/nodes/blocks/__init__.py b/luxonis_train/nodes/blocks/__init__.py
index a87c336e..ec0f75ef 100644
--- a/luxonis_train/nodes/blocks/__init__.py
+++ b/luxonis_train/nodes/blocks/__init__.py
@@ -3,6 +3,7 @@
     BlockRepeater,
     ConvModule,
     EfficientDecoupledBlock,
+    EfficientOBBDecoupledBlock,
     FeatureFusionBlock,
     KeypointBlock,
     LearnableAdd,
@@ -20,6 +21,7 @@
 __all__ = [
     "autopad",
     "EfficientDecoupledBlock",
+    "EfficientOBBDecoupledBlock",
     "ConvModule",
     "UpBlock",
     "RepDownBlock",
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 0e0a4ad2..54d1d8ed 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -80,6 +80,40 @@ def _initialize_weights_and_biases(self, prior_prob: float) -> None:
             module.weight = nn.Parameter(w, requires_grad=True)
 
 
+class EfficientOBBDecoupledBlock(EfficientDecoupledBlock):
+    def __init__(self, n_classes: int, in_channels: int):
+        """Efficient Decoupled block used for angle, class and regression predictions in
+        OBB (oriented bounding box) tasks.
+
+        @type n_classes: int
+        @param n_classes: Number of classes.
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        """
+        super().__init__(n_classes, in_channels)
+
+        self.angle_branch = nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                activation=nn.SiLU(),
+            ),
+            nn.Conv2d(in_channels=in_channels, out_channels=1, kernel_size=1),
+        )
+
+    def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+        out_feature = self.decoder(x)
+
+        out_cls = self.class_branch(out_feature)
+        out_reg = self.regression_branch(out_feature)
+        out_angle = self.angle_branch(out_feature)
+
+        return out_feature, out_cls, out_reg, out_angle
+
+
 class ConvModule(nn.Sequential):
     def __init__(
         self,
diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py
index 28b5e8ca..10c4e515 100644
--- a/luxonis_train/nodes/heads/__init__.py
+++ b/luxonis_train/nodes/heads/__init__.py
@@ -2,6 +2,7 @@
 from .classification_head import ClassificationHead
 from .efficient_bbox_head import EfficientBBoxHead
 from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead
+from .efficient_obbox_head import EfficientOBBoxHead
 from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
 from .segmentation_head import SegmentationHead
 
@@ -9,6 +10,7 @@
     "BiSeNetHead",
     "ClassificationHead",
     "EfficientBBoxHead",
+    "EfficientOBBoxHead",
     "EfficientKeypointBBoxHead",
     "ImplicitKeypointBBoxHead",
     "SegmentationHead",
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
new file mode 100644
index 00000000..c476101f
--- /dev/null
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -0,0 +1,160 @@
+import math
+from typing import Literal
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.blocks import EfficientOBBDecoupledBlock
+from luxonis_train.nodes.heads import EfficientBBoxHead
+from luxonis_train.utils.boxutils import (
+    anchors_for_fpn_features,
+    dist2rbbox,
+    non_max_suppression,
+)
+from luxonis_train.utils.types import LabelType, Packet
+
+
+class EfficientOBBoxHead(EfficientBBoxHead):
+    tasks: list[LabelType] = [LabelType.OBOUNDINGBOX]
+
+    def __init__(
+        self,
+        n_heads: Literal[2, 3, 4] = 3,
+        conf_thres: float = 0.25,
+        iou_thres: float = 0.45,
+        max_det: int = 300,
+        **kwargs,
+    ):
+        """Head for object detection.
+
+        TODO: add more documentation
+
+        @type n_heads: Literal[2,3,4]
+        @param n_heads: Number of output heads. Defaults to 3.
+          ***Note:*** Should be same also on neck in most cases.
+
+        @type conf_thres: float
+        @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
+
+        @type iou_thres: float
+        @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+
+        @type max_det: int
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
+        """
+        super().__init__(n_heads, conf_thres, iou_thres, max_det, **kwargs)
+
+        self.heads = nn.ModuleList()
+        for i in range(self.n_heads):
+            curr_head = EfficientOBBDecoupledBlock(
+                n_classes=self.n_classes,
+                in_channels=self.in_channels[i],
+            )
+            self.heads.append(curr_head)
+
+    def forward(
+        self, inputs: list[Tensor]
+    ) -> tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]:
+        features: list[Tensor] = []
+        cls_score_list: list[Tensor] = []
+        reg_distri_list: list[Tensor] = []
+        angles_list: list[Tensor] = []
+
+        for i, module in enumerate(self.heads):
+            out_feature, out_cls, out_reg, out_angle = module(inputs[i])
+            features.append(out_feature)
+
+            out_cls = torch.sigmoid(out_cls)
+            cls_score_list.append(out_cls)
+
+            reg_distri_list.append(out_reg)
+
+            out_angle = (out_angle.sigmoid() - 0.25) * math.pi  # [-pi/4, 3pi/4]
+            # out_angle = out_angle.sigmoid() * math.pi / 2  # [0, pi/2]
+            angles_list.append(out_angle)
+
+        return features, cls_score_list, reg_distri_list, angles_list
+
+    def wrap(
+        self, output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]
+    ) -> Packet[Tensor]:
+        features, cls_score_list, reg_distri_list, angles_list = output
+
+        if self.export:
+            outputs = []
+            for out_cls, out_reg, out_angles in zip(
+                cls_score_list, reg_distri_list, angles_list, strict=True
+            ):
+                conf, _ = out_cls.max(1, keepdim=True)
+                out = torch.cat([out_reg, conf, out_cls, out_angles], dim=1)
+                outputs.append(out)
+            return {self.task: outputs}
+
+        angle_tensor = torch.cat(
+            [angles_list[i].flatten(2) for i in range(len(angles_list))], dim=2
+        ).permute(0, 2, 1)
+        cls_tensor = torch.cat(
+            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
+        ).permute(0, 2, 1)
+        reg_tensor = torch.cat(
+            [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2
+        ).permute(0, 2, 1)
+
+        if self.training:
+            return {
+                "features": features,
+                "class_scores": [cls_tensor],
+                "distributions": [reg_tensor],
+                "angles": [angle_tensor],
+            }
+
+        else:
+            boxes = self._process_to_bbox(
+                (features, cls_tensor, reg_tensor, angle_tensor)
+            )
+            return {
+                "boundingbox": boxes,
+                "features": features,
+                "class_scores": [cls_tensor],
+                "distributions": [reg_tensor],
+                "angles": [angle_tensor],
+            }
+
+    def _process_to_bbox(
+        self, output: tuple[list[Tensor], Tensor, Tensor, Tensor]
+    ) -> list[Tensor]:
+        """Performs post-processing of the output and returns bboxs after NMS."""
+        features, cls_score_list, reg_dist_list, angles_list = output
+        _, anchor_points, _, stride_tensor = anchors_for_fpn_features(
+            features,
+            self.stride,
+            self.grid_cell_size,
+            self.grid_cell_offset,
+            multiply_with_stride=False,
+        )
+
+        pred_bboxes = dist2rbbox(reg_dist_list, angles_list, anchor_points)
+
+        pred_bboxes *= stride_tensor
+        output_merged = torch.cat(
+            [
+                pred_bboxes,
+                torch.ones(
+                    (features[-1].shape[0], pred_bboxes.shape[1], 1),
+                    dtype=pred_bboxes.dtype,
+                    device=pred_bboxes.device,
+                ),
+                cls_score_list,
+            ],
+            dim=-1,
+        )
+
+        return non_max_suppression(
+            output_merged,
+            n_classes=self.n_classes,
+            conf_thres=self.conf_thres,
+            iou_thres=self.iou_thres,
+            bbox_format="cxcywh",
+            max_det=self.max_det,
+            predicts_objectness=False,
+        )
diff --git a/luxonis_train/utils/assigners/__init__.py b/luxonis_train/utils/assigners/__init__.py
index 4d9bec9f..fb7d5fdd 100644
--- a/luxonis_train/utils/assigners/__init__.py
+++ b/luxonis_train/utils/assigners/__init__.py
@@ -1,4 +1,4 @@
 from .atts_assigner import ATSSAssigner
-from .tal_assigner import TaskAlignedAssigner
+from .tal_assigner import RotatedTaskAlignedAssigner, TaskAlignedAssigner
 
-__all__ = ["ATSSAssigner", "TaskAlignedAssigner"]
+__all__ = ["ATSSAssigner", "TaskAlignedAssigner", "RotatedTaskAlignedAssigner"]
diff --git a/luxonis_train/utils/assigners/tal_assigner.py b/luxonis_train/utils/assigners/tal_assigner.py
index 08b5b461..768e879d 100644
--- a/luxonis_train/utils/assigners/tal_assigner.py
+++ b/luxonis_train/utils/assigners/tal_assigner.py
@@ -2,7 +2,13 @@
 import torch.nn.functional as F
 from torch import Tensor, nn
 
-from .utils import batch_iou, candidates_in_gt, fix_collisions
+from .utils import (
+    batch_iou,
+    batch_iou_obb,
+    candidates_in_gt,
+    candidates_in_gt_obb,
+    fix_collisions,
+)
 
 
 class TaskAlignedAssigner(nn.Module):
@@ -204,14 +210,16 @@ def _get_final_assignments(
         @type gt_labels: Tensor
         @param gt_labels: Initial GT labels [bs, n_max_boxes, 1]
         @type gt_bboxes: Tensor
-        @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 4]
+        @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 4] or [bs, n_max_boxes, 5]
+            for obb
         @type assigned_gt_idx: Tensor
         @param assigned_gt_idx: Indices of matched GTs [bs, n_anchors]
         @type mask_pos_sum: Tensor
         @param mask_pos_sum: Mask of matched GTs [bs, n_anchors]
         @rtype: tuple[Tensor, Tensor, Tensor]
         @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes].
+            n_anchors, 4] or [bs, n_max_boxes, 5] for obb, assigned scores of shape [bs,
+            n_anchors, n_classes].
         """
         # assigned target labels
         batch_ind = torch.arange(
@@ -221,7 +229,7 @@ def _get_final_assignments(
         assigned_labels = gt_labels.long().flatten()[assigned_gt_idx]
 
         # assigned target boxes
-        assigned_bboxes = gt_bboxes.reshape([-1, 4])[assigned_gt_idx]
+        assigned_bboxes = gt_bboxes.reshape([-1, gt_bboxes.shape[-1]])[assigned_gt_idx]
 
         # assigned target scores
         assigned_labels[assigned_labels < 0] = 0
@@ -238,3 +246,128 @@ def _get_final_assignments(
         )
 
         return assigned_labels, assigned_bboxes, assigned_scores
+
+
+class RotatedTaskAlignedAssigner(TaskAlignedAssigner):
+    """Assigns ground-truth objects to rotated bounding boxes using a task-aligned
+    metric."""
+
+    @torch.no_grad()
+    def forward(
+        self,
+        pred_scores: Tensor,
+        pred_bboxes: Tensor,
+        anchor_points: Tensor,
+        gt_labels: Tensor,
+        gt_bboxes: Tensor,
+        mask_gt: Tensor,
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+        """Assigner's forward method which generates final assignments.
+
+        @type pred_scores: Tensor
+        @param pred_scores: Predicted scores [bs, n_anchors, 1]
+        @type pred_bboxes: Tensor
+        @param pred_bboxes: Predicted bboxes [bs, n_anchors, 5]
+        @type anchor_points: Tensor
+        @param anchor_points: Anchor points [n_anchors, 2]
+        @type gt_labels: Tensor
+        @param gt_labels: Initial GT labels [bs, n_max_boxes, 1]
+        @type gt_bboxes: Tensor
+        @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 5]
+        @type mask_gt: Tensor
+        @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1]
+        @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
+        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
+            n_anchors, 5], assigned scores of shape [bs, n_anchors, n_classes] and
+            output mask of shape [bs, n_anchors]
+        """
+        self.bs = pred_scores.size(0)
+        self.n_max_boxes = gt_bboxes.size(1)
+
+        if self.n_max_boxes == 0:
+            device = gt_bboxes.device
+            return (
+                torch.full_like(pred_scores[..., 0], self.n_classes).to(device),
+                torch.zeros_like(pred_bboxes).to(device),
+                torch.zeros_like(pred_scores).to(device),
+                torch.zeros_like(pred_scores[..., 0]).to(device),
+                torch.zeros_like(pred_scores[..., 0]).to(device),
+            )
+
+        # Compute alignment metric between all bboxes (bboxes of all pyramid levels) and GT
+        align_metric, overlaps = self._get_alignment_metric(
+            pred_scores, pred_bboxes, gt_labels, gt_bboxes
+        )
+
+        # Select top-k bboxes as candidates for each GT
+        is_in_gts = candidates_in_gt_obb(anchor_points, gt_bboxes)
+        is_in_gts = torch.reshape(is_in_gts, (self.bs, self.n_max_boxes, -1))
+        is_in_topk = self._select_topk_candidates(
+            align_metric * is_in_gts,
+            topk_mask=mask_gt.repeat([1, 1, self.topk]).bool(),
+        )
+
+        # Final positive candidates
+        mask_pos = is_in_topk * is_in_gts * mask_gt
+
+        # If an anchor box is assigned to multiple gts, the one with the highest IoU is selected
+        assigned_gt_idx, mask_pos_sum, mask_pos = fix_collisions(
+            mask_pos, overlaps, self.n_max_boxes
+        )
+
+        # Generate final targets based on masks
+        assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments(
+            gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
+        )
+
+        # normalize
+        align_metric *= mask_pos
+        pos_align_metrics = align_metric.max(dim=-1, keepdim=True)[0]
+        pos_overlaps = (overlaps * mask_pos).max(dim=-1, keepdim=True)[0]
+        norm_align_metric = (
+            (align_metric * pos_overlaps / (pos_align_metrics + self.eps))
+            .max(-2)[0]
+            .unsqueeze(-1)
+        )
+        assigned_scores = assigned_scores * norm_align_metric
+
+        out_mask_positive = mask_pos_sum.bool()
+
+        return (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            out_mask_positive,
+            assigned_gt_idx,
+        )
+
+    def _get_alignment_metric(
+        self,
+        pred_scores: Tensor,
+        pred_bboxes: Tensor,
+        gt_labels: Tensor,
+        gt_bboxes: Tensor,
+    ):
+        """Calculates anchor alignment metric and IoU between GTs and predicted oriented
+        bboxes.
+
+        @type pred_scores: Tensor
+        @param pred_scores: Predicted scores [bs, n_anchors, 1]
+        @type pred_bboxes: Tensor
+        @param pred_bboxes: Predicted bboxes [bs, n_anchors, 5]
+        @type gt_labels: Tensor
+        @param gt_labels: Initial GT labels [bs, n_max_boxes, 1]
+        @type gt_bboxes: Tensor
+        @param gt_bboxes: Initial GT bboxes [bs, n_max_boxes, 5]
+        """
+        pred_scores = pred_scores.permute(0, 2, 1)
+        gt_labels = gt_labels.to(torch.long)
+        ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long)
+        ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes)
+        ind[1] = gt_labels.squeeze(-1)
+        bbox_scores = pred_scores[ind[0], ind[1]]
+
+        overlaps = batch_iou_obb(gt_bboxes, pred_bboxes)
+        align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta)
+
+        return align_metric, overlaps
diff --git a/luxonis_train/utils/assigners/utils.py b/luxonis_train/utils/assigners/utils.py
index fadf5f8e..6467c62b 100644
--- a/luxonis_train/utils/assigners/utils.py
+++ b/luxonis_train/utils/assigners/utils.py
@@ -2,7 +2,7 @@
 import torch.nn.functional as F
 from torch import Tensor
 
-from luxonis_train.utils.boxutils import bbox_iou
+from luxonis_train.utils.boxutils import bbox_iou, probiou, xywhr2xyxyxyxy
 
 
 def candidates_in_gt(
@@ -30,6 +30,37 @@ def candidates_in_gt(
     return candidates
 
 
+def candidates_in_gt_obb(xy_centers, gt_bboxes):
+    """Select the positive anchor center in gt for rotated bounding boxes.
+
+    Args:
+        xy_centers (Tensor): shape(h*w, 2)
+        gt_bboxes (Tensor): shape(b, n_boxes, 5)
+
+    Returns:
+        (Tensor): shape(b, n_boxes, h*w)
+    """
+    # (b, n_boxes, 5) --> (b, n_boxes, 4, 2)
+    corners = xywhr2xyxyxyxy(gt_bboxes)
+    # (b, n_boxes, 1, 2)
+    a, b, _, d = corners.split(1, dim=-2)
+    ab = b - a
+    ad = d - a
+
+    # (b, n_boxes, h*w, 2)
+    ap = xy_centers - a
+    norm_ab = (ab * ab).sum(dim=-1)
+    norm_ad = (ad * ad).sum(dim=-1)
+    ap_dot_ab = (ap * ab).sum(dim=-1)
+    ap_dot_ad = (ap * ad).sum(dim=-1)
+    return (
+        (ap_dot_ab >= 0)
+        & (ap_dot_ab <= norm_ab)
+        & (ap_dot_ad >= 0)
+        & (ap_dot_ad <= norm_ad)
+    )  # is_in_box
+
+
 def fix_collisions(
     mask_pos: Tensor, overlaps: Tensor, n_max_boxes: int
 ) -> tuple[Tensor, Tensor, Tensor]:
@@ -71,3 +102,24 @@ def batch_iou(batch1: Tensor, batch2: Tensor) -> Tensor:
         [bbox_iou(batch1[i], batch2[i]) for i in range(batch1.size(0))], dim=0
     )
     return ious
+
+
+def batch_iou_obb(batch1: Tensor, batch2: Tensor) -> Tensor:
+    """Calculates IoU for each pair of oriented bboxes in the batch. Bboxes must be in
+    xcycwhr format.
+
+    @type batch1: Tensor
+    @param batch1: Tensor of shape C{[bs, N, 5]}
+    @type batch2: Tensor
+    @param batch2: Tensor of shape C{[bs, M, 5]}
+    @rtype: Tensor
+    @return: Per image box IoU of shape C{[bs, N]}
+    """
+    ious = torch.stack(
+        [
+            probiou(batch1[i], batch2[i]).squeeze(-1).clamp_(0)
+            for i in range(batch1.size(0))
+        ],
+        dim=0,
+    )
+    return ious
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 3a206c75..755cce11 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -3,6 +3,7 @@
 import math
 from typing import Literal, TypeAlias
 
+import numpy as np
 import torch
 from scipy.cluster.vq import kmeans
 from torch import Tensor
@@ -144,6 +145,28 @@ def dist2bbox(
     return bbox
 
 
+def dist2rbbox(
+    distance: Tensor,
+    pred_angles: Tensor,
+    anchor_points: Tensor,
+) -> Tensor:
+    """Transform distance (ltrb) to a rotated bounding box in "cxcywh" format.
+
+    @type distance: Tensor
+    @param distance: Distance predictions
+    @type anchor_points: Tensor
+    @param anchor_points: Head's anchor points
+    @rtype: Tensor
+    @return: BBoxes in "cxcywh" format
+    """
+    lt, rb = torch.split(distance, 2, -1)
+    cos, sin = torch.cos(pred_angles), torch.sin(pred_angles)
+    xf, yf = ((rb - lt) / 2).split(1, dim=-1)
+    x, y = xf * cos - yf * sin, xf * sin + yf * cos
+    xy = torch.cat([x, y], dim=-1) + anchor_points
+    return torch.cat([xy, lt + rb], dim=-1)
+
+
 def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor:
     """Transform bbox(xyxy) to distance(ltrb).
 
@@ -163,6 +186,60 @@ def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor:
     return dist
 
 
+def xywhr2xyxyxyxy(x):
+    """Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2,
+    xy3, xy4]. Rotation values should be in radians from 0 to pi/2.
+
+    Args:
+        x (numpy.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format of shape (n, 5) or (b, n, 5).
+
+    Returns:
+        (numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 4, 2) or (b, n, 4, 2).
+    """
+    cos, sin, cat, stack = (
+        (torch.cos, torch.sin, torch.cat, torch.stack)
+        if isinstance(x, torch.Tensor)
+        else (np.cos, np.sin, np.concatenate, np.stack)
+    )
+
+    ctr = x[..., :2]
+    w, h, angle = (x[..., i : i + 1] for i in range(2, 5))
+    cos_value, sin_value = cos(angle), sin(angle)
+    vec1 = [w / 2 * cos_value, w / 2 * sin_value]
+    vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
+    vec1 = cat(vec1, -1)
+    vec2 = cat(vec2, -1)
+    pt1 = ctr + vec1 + vec2
+    pt2 = ctr + vec1 - vec2
+    pt3 = ctr - vec1 - vec2
+    pt4 = ctr - vec1 + vec2
+    return stack([pt1, pt2, pt3, pt4], -2)
+
+
+def xywh2xyxy(x):
+    """Convert bounding box coordinates from (x, y, width, height) format to (x1, y1,
+    x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-
+    right corner. Note: ops per 2 channels faster than per channel.
+
+    Args:
+        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
+
+    Returns:
+        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
+    """
+    assert (
+        x.shape[-1] == 4
+    ), f"input shape last dimension expected 4 but input shape is {x.shape}"
+    y = (
+        torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
+    )  # faster than clone/copy
+    xy = x[..., :2]  # centers
+    wh = x[..., 2:] / 2  # half width-height
+    y[..., :2] = xy - wh  # top left xy
+    y[..., 2:] = xy + wh  # bottom right xy
+    return y
+
+
 def bbox_iou(
     bbox1: Tensor,
     bbox2: Tensor,
@@ -275,6 +352,78 @@ def bbox_iou(
         return iou
 
 
+def probiou(obb1, obb2, CIoU=False, eps=1e-7):
+    """Calculate probabilistic IoU between oriented bounding boxes.
+
+    Implements the algorithm from https://arxiv.org/pdf/2106.06072v1.pdf.
+
+    Args:
+        obb1 (torch.Tensor): Ground truth OBBs, shape (N, 5), format xywhr.
+        obb2 (torch.Tensor): Predicted OBBs, shape (N, 5), format xywhr.
+        CIoU (bool, optional): If True, calculate CIoU. Defaults to False.
+        eps (float, optional): Small value to avoid division by zero. Defaults to 1e-7.
+
+    Returns:
+        (torch.Tensor): OBB similarities, shape (N,).
+
+    Note:
+        OBB format: [center_x, center_y, width, height, rotation_angle].
+        If CIoU is True, returns CIoU instead of IoU.
+    """
+    x1, y1 = obb1[..., :2].split(1, dim=-1)
+    x2, y2 = obb2[..., :2].split(1, dim=-1)
+    a1, b1, c1 = _get_covariance_matrix(obb1)
+    a2, b2, c2 = _get_covariance_matrix(obb2)
+
+    t1 = (
+        ((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2))
+        / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
+    ) * 0.25
+    t2 = (
+        ((c1 + c2) * (x2 - x1) * (y1 - y2))
+        / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
+    ) * 0.5
+    t3 = (
+        ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
+        / (
+            4
+            * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt()
+            + eps
+        )
+        + eps
+    ).log() * 0.5
+    bd = (t1 + t2 + t3).clamp(eps, 100.0)
+    hd = (1.0 - (-bd).exp() + eps).sqrt()
+    iou = 1 - hd
+    if CIoU:  # only include the wh aspect ratio part
+        w1, h1 = obb1[..., 2:4].split(1, dim=-1)
+        w2, h2 = obb2[..., 2:4].split(1, dim=-1)
+        v = (4 / math.pi**2) * ((w2 / h2).atan() - (w1 / h1).atan()).pow(2)
+        with torch.no_grad():
+            alpha = v / (v - iou + (1 + eps))
+        return iou - v * alpha  # CIoU
+    return iou
+
+
+def _get_covariance_matrix(boxes):
+    """Generating covariance matrix from obbs.
+
+    Args:
+        boxes (torch.Tensor): A tensor of shape (N, 5) representing rotated bounding boxes, with xywhr format.
+
+    Returns:
+        (torch.Tensor): Covariance matrices corresponding to original rotated bounding boxes.
+    """
+    # Gaussian bounding boxes, ignore the center points (the first two columns) because they are not needed here.
+    gbbs = torch.cat((boxes[:, 2:4].pow(2) / 12, boxes[:, 4:]), dim=-1)
+    a, b, c = gbbs.split(1, dim=-1)
+    cos = c.cos()
+    sin = c.sin()
+    cos2 = cos.pow(2)
+    sin2 = sin.pow(2)
+    return a * cos2 + b * sin2, a * sin2 + b * cos2, (a - b) * cos * sin
+
+
 def non_max_suppression(
     preds: Tensor,
     n_classes: int,
diff --git a/test_models.py b/test_models.py
new file mode 100644
index 00000000..064f57f4
--- /dev/null
+++ b/test_models.py
@@ -0,0 +1,25 @@
+from pathlib import Path
+
+from luxonis_train.core import LuxonisModel
+
+TEST_OUTPUT = Path("./probe")
+OPTS = {
+    "trainer.epochs": 1,
+    "trainer.batch_size": 1,
+    "trainer.validation_interval": 1,
+    "trainer.callbacks": "[]",
+    "tracker.save_directory": str(TEST_OUTPUT),
+    "tuner.n_trials": 4,
+}
+
+
+def main():
+    config_file = "obb_detection_model"
+    config_file = f"configs/{config_file}.yaml"
+    model = LuxonisModel(config_file, opts=OPTS)
+    model.train()
+    model.test()
+
+
+if __name__ == "__main__":
+    main()

From 5acabfc4c3ba5c4352e53d5b82205697324cb667 Mon Sep 17 00:00:00 2001
From: Jernej Sabadin <116955183+JSabadin@users.noreply.github.com>
Date: Tue, 3 Sep 2024 21:32:48 +0200
Subject: [PATCH 59/75] Config `pin_memory` Option (#67)

---
 luxonis_train/core/core.py    | 1 +
 luxonis_train/utils/config.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 16953062..c683773c 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -167,6 +167,7 @@ def __init__(
                 drop_last=(
                     self.cfg.trainer.skip_last_batch if view == "train" else False
                 ),
+                pin_memory=self.cfg.trainer.pin_memory,
                 sampler=sampler if view == "train" else None,
             )
             for view in ["train", "val", "test"]
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 44c00637..e3e4c0fb 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -283,6 +283,7 @@ class TrainerConfig(BaseModelExtraForbid):
     validation_interval: Literal[-1] | PositiveInt = 1
     num_log_images: NonNegativeInt = 4
     skip_last_batch: bool = True
+    pin_memory: bool = True
     log_sub_losses: bool = True
     save_top_k: Literal[-1] | NonNegativeInt = 3
 

From 8a99628e4a9e789553f351418164c713a5a2b797 Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Tue, 3 Sep 2024 23:45:36 +0200
Subject: [PATCH 60/75] [Fix]: change target dimensions in _preprocess_target

---
 configs/obb_detection_model.yaml              | 10 +++----
 .../losses/obb_detection_loss.py              | 10 ++++++-
 luxonis_train/utils/boxutils.py               | 27 +++++++++++++++++++
 luxonis_train/utils/loaders/base_loader.py    |  6 ++++-
 4 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/configs/obb_detection_model.yaml b/configs/obb_detection_model.yaml
index 719b3569..a1f7f42d 100644
--- a/configs/obb_detection_model.yaml
+++ b/configs/obb_detection_model.yaml
@@ -14,11 +14,11 @@ loader:
     dataset_type: YOLOV6OBB
 
 trainer:
-  preprocessing:
-    train_image_size: [&height 256, &width 320]
-    keep_aspect_ratio: False
-    normalize:
-      active: True
+  # preprocessing:
+  #   train_image_size: [&height 256, &width 320]
+  #   keep_aspect_ratio: False
+  #   normalize:
+  #     active: True
 
   batch_size: 4
   epochs: &epochs 200
diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
index 9eec76b1..e87c1347 100644
--- a/luxonis_train/attached_modules/losses/obb_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -13,6 +13,7 @@
     dist2rbbox,
     probiou,
     xywh2xyxy,
+    xyxyxyxy2xywhr,
 )
 from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
 
@@ -217,6 +218,10 @@ def forward(
     def _preprocess_target(self, target: Tensor, batch_size: int):
         """Preprocess target in shape [batch_size, N, 6] where N is maximum number of
         instances in one image."""
+        idx_cls = target[:, :2]
+        xyxyxyxy = target[:, 2:]
+        cxcywhr = xyxyxyxy2xywhr(xyxyxyxy)
+        target = torch.cat([idx_cls, torch.tensor(cxcywhr)], dim=-1)
         sample_ids, counts = cast(
             tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True)
         )
@@ -227,8 +232,11 @@ def _preprocess_target(self, target: Tensor, batch_size: int):
             out_target[id, :count] = target[target[:, 0] == id][:, 1:]
 
         scaled_target = out_target[:, :, 1:5] * self.gt_bboxes_scale
+        scaled_target_angle = torch.cat(
+            [scaled_target, out_target[:, :, 5].transpose(0, 1).unsqueeze(0)], dim=-1
+        )
         # out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
-        out_target[..., 1:] = scaled_target
+        out_target[..., 1:] = scaled_target_angle
         return out_target
 
 
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 755cce11..0e5f8433 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -3,6 +3,7 @@
 import math
 from typing import Literal, TypeAlias
 
+import cv2
 import numpy as np
 import torch
 from scipy.cluster.vq import kmeans
@@ -186,6 +187,32 @@ def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor:
     return dist
 
 
+def xyxyxyxy2xywhr(x):
+    """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh,
+    rotation]. Rotation values are returned in radians from 0 to pi/2.
+
+    Args:
+        x (numpy.ndarray | torch.Tensor): Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8).
+
+    Returns:
+        (numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format of shape (n, 5).
+    """
+    is_torch = isinstance(x, torch.Tensor)
+    points = x.cpu().numpy() if is_torch else x
+    points = points.reshape(len(x), -1, 2)
+    rboxes = []
+    for pts in points:
+        # NOTE: Use cv2.minAreaRect to get accurate xywhr,
+        # especially some objects are cut off by augmentations in dataloader.
+        (cx, cy), (w, h), angle = cv2.minAreaRect(pts)
+        rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
+    return (
+        torch.tensor(rboxes, device=x.device, dtype=x.dtype)
+        if is_torch
+        else np.asarray(rboxes)
+    )
+
+
 def xywhr2xyxyxyxy(x):
     """Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2,
     xy3, xy4]. Rotation values should be in radians from 0 to pi/2.
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index 5e884955..9aaa6f53 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -141,7 +141,11 @@ def collate_fn(
         if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
             out_labels[task] = torch.stack(annos, 0), label_type
 
-        elif label_type in [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]:
+        elif label_type in [
+            LabelType.KEYPOINTS,
+            LabelType.BOUNDINGBOX,
+            LabelType.OBOUNDINGBOX,
+        ]:
             label_box: list[Tensor] = []
             for i, box in enumerate(annos):
                 l_box = torch.zeros((box.shape[0], box.shape[1] + 1))

From 8026126bc3e57e9b602373fb8aec3d5356443cd3 Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Wed, 4 Sep 2024 22:57:28 +0200
Subject: [PATCH 61/75] [Fix]: change obb decoupled block dimensions for DFL

---
 .../losses/obb_detection_loss.py              | 15 ++--
 luxonis_train/nodes/blocks/blocks.py          | 17 ++++-
 .../nodes/heads/efficient_obbox_head.py       | 43 ++++++++++--
 luxonis_train/utils/assigners/utils.py        | 10 ++-
 luxonis_train/utils/boxutils.py               | 70 ++++++++++++++++++-
 5 files changed, 140 insertions(+), 15 deletions(-)

diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
index e87c1347..20f5d08a 100644
--- a/luxonis_train/attached_modules/losses/obb_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -144,8 +144,8 @@ def prepare(
         )
 
         gt_labels = target[:, :, :1]
-        gt_cxcywh = target[:, :, 1:]
-        mask_gt = (gt_cxcywh.sum(-1, keepdim=True) > 0).float()
+        gt_cxcywhr = target[:, :, 1:]
+        mask_gt = (gt_cxcywhr.sum(-1, keepdim=True) > 0).float()
 
         # TODO: log change of assigner (once common Logger)
         (
@@ -159,7 +159,7 @@ def prepare(
             pred_bboxes.detach() * self.stride_tensor,
             self.anchor_points,
             gt_labels,
-            gt_cxcywh,
+            gt_cxcywhr,
             mask_gt,
         )
 
@@ -221,7 +221,10 @@ def _preprocess_target(self, target: Tensor, batch_size: int):
         idx_cls = target[:, :2]
         xyxyxyxy = target[:, 2:]
         cxcywhr = xyxyxyxy2xywhr(xyxyxyxy)
-        target = torch.cat([idx_cls, torch.tensor(cxcywhr)], dim=-1)
+        if isinstance(cxcywhr, Tensor):
+            target = torch.cat([idx_cls, cxcywhr.clone().detach()], dim=-1)
+        else:
+            target = torch.cat([idx_cls, torch.tensor(cxcywhr)], dim=-1)
         sample_ids, counts = cast(
             tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True)
         )
@@ -286,6 +289,7 @@ def __call__(self, pred_dist, target):
         """
         target = target.clamp_(0, self.reg_max - 1 - 0.01)
         tl = target.long()  # target left
+        # tl = target  # target left
         tr = tl + 1  # target right
         wl = tr - target  # weight left
         wr = 1 - wl  # weight right
@@ -330,7 +334,8 @@ def forward(
             )
             loss_dfl = (
                 self.dfl_loss(
-                    pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max),
+                    # pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max),
+                    pred_dist[fg_mask],
                     target_ltrb[fg_mask],
                 )
                 * weight
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 54d1d8ed..3b191a34 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -81,7 +81,7 @@ def _initialize_weights_and_biases(self, prior_prob: float) -> None:
 
 
 class EfficientOBBDecoupledBlock(EfficientDecoupledBlock):
-    def __init__(self, n_classes: int, in_channels: int):
+    def __init__(self, n_classes: int, in_channels: int, reg_max: int = 16):
         """Efficient Decoupled block used for angle, class and regression predictions in
         OBB (oriented bounding box) tasks.
 
@@ -89,9 +89,24 @@ def __init__(self, n_classes: int, in_channels: int):
         @param n_classes: Number of classes.
         @type in_channels: int
         @param in_channels: Number of input channels.
+        @type reg_max: int
+        @param reg_max: Number of bins for predicting the distributions of bounding box
+            coordinates.
         """
         super().__init__(n_classes, in_channels)
 
+        self.regression_branch = nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=in_channels,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                activation=nn.SiLU(),
+            ),
+            nn.Conv2d(in_channels=in_channels, out_channels=4 * reg_max, kernel_size=1),
+        )
+
         self.angle_branch = nn.Sequential(
             ConvModule(
                 in_channels=in_channels,
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
index c476101f..93a59e00 100644
--- a/luxonis_train/nodes/heads/efficient_obbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -23,6 +23,7 @@ def __init__(
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
         max_det: int = 300,
+        reg_max: int = 16,
         **kwargs,
     ):
         """Head for object detection.
@@ -41,9 +42,14 @@ def __init__(
 
         @type max_det: int
         @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
+
+        @type reg_max: int
+        @param reg_max: Number of bins for predicting the distributions of bounding box coordinates.
         """
         super().__init__(n_heads, conf_thres, iou_thres, max_det, **kwargs)
 
+        self.reg_max = reg_max
+
         self.heads = nn.ModuleList()
         for i in range(self.n_heads):
             curr_head = EfficientOBBDecoupledBlock(
@@ -69,8 +75,8 @@ def forward(
 
             reg_distri_list.append(out_reg)
 
-            out_angle = (out_angle.sigmoid() - 0.25) * math.pi  # [-pi/4, 3pi/4]
-            # out_angle = out_angle.sigmoid() * math.pi / 2  # [0, pi/2]
+            # out_angle = (out_angle.sigmoid() - 0.25) * math.pi  # [-pi/4, 3pi/4]
+            out_angle = out_angle.sigmoid() * math.pi / 2  # [0, pi/2]
             angles_list.append(out_angle)
 
         return features, cls_score_list, reg_distri_list, angles_list
@@ -133,9 +139,37 @@ def _process_to_bbox(
             multiply_with_stride=False,
         )
 
-        pred_bboxes = dist2rbbox(reg_dist_list, angles_list, anchor_points)
+        # The following block below is implied for the distributed predictions of the regression
+        # branch (used in DFL)
+        # if self.use_dfl: # consider adding this as a parameter
+        proj = torch.arange(
+            self.reg_max, dtype=torch.float, device=reg_dist_list.device
+        )
+        b, a, c = reg_dist_list.shape  # batch, anchors, channels
+        reg_dist_tensor = (  # we get a tensor of the expected values (mean) of the regression predictions
+            reg_dist_list.view(b, a, 4, c // 4)
+            .softmax(3)
+            .matmul(proj.type(reg_dist_list.dtype))
+        )
+        # pred_bboxes = dist2rbbox(reg_dist_tensor, angles_list, anchor_points) # xywh
+        pred_bboxes = torch.cat(
+            (
+                dist2rbbox(reg_dist_tensor, angles_list, anchor_points),
+                angles_list,
+            ),
+            dim=-1,
+        )  # xywhr
+
+        # pred_bboxes = xywh2xyxy(pred_bboxes)
+        # pred_bboxes = xywhr2xyxyxyxy(
+        #     pred_bboxes
+        # )  # format: [xy1, xy2, xy3, xy4], shape: (b, n, 4, 2)
+
+        xy_strided = pred_bboxes[..., :2] * stride_tensor
+        pred_bboxes = torch.cat(
+            [xy_strided, pred_bboxes[..., 2:]], dim=-1
+        )  # xywhr with xy strided
 
-        pred_bboxes *= stride_tensor
         output_merged = torch.cat(
             [
                 pred_bboxes,
@@ -149,6 +183,7 @@ def _process_to_bbox(
             dim=-1,
         )
 
+        # NOTE: change non_max_suppression for obb
         return non_max_suppression(
             output_merged,
             n_classes=self.n_classes,
diff --git a/luxonis_train/utils/assigners/utils.py b/luxonis_train/utils/assigners/utils.py
index 6467c62b..b9911a0b 100644
--- a/luxonis_train/utils/assigners/utils.py
+++ b/luxonis_train/utils/assigners/utils.py
@@ -2,7 +2,11 @@
 import torch.nn.functional as F
 from torch import Tensor
 
-from luxonis_train.utils.boxutils import bbox_iou, probiou, xywhr2xyxyxyxy
+from luxonis_train.utils.boxutils import (
+    batch_probiou,
+    bbox_iou,
+    xywhr2xyxyxyxy,
+)
 
 
 def candidates_in_gt(
@@ -113,11 +117,11 @@ def batch_iou_obb(batch1: Tensor, batch2: Tensor) -> Tensor:
     @type batch2: Tensor
     @param batch2: Tensor of shape C{[bs, M, 5]}
     @rtype: Tensor
-    @return: Per image box IoU of shape C{[bs, N]}
+    @return: Per image box IoU of shape C{[bs, N, M]}
     """
     ious = torch.stack(
         [
-            probiou(batch1[i], batch2[i]).squeeze(-1).clamp_(0)
+            batch_probiou(batch1[i], batch2[i]).squeeze(-1).clamp_(0)
             for i in range(batch1.size(0))
         ],
         dim=0,
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 0e5f8433..997545b2 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -151,14 +151,14 @@ def dist2rbbox(
     pred_angles: Tensor,
     anchor_points: Tensor,
 ) -> Tensor:
-    """Transform distance (ltrb) to a rotated bounding box in "cxcywh" format.
+    """Transform distance (ltrb) to a rotated bounding box in "xcycwh" format.
 
     @type distance: Tensor
     @param distance: Distance predictions
     @type anchor_points: Tensor
     @param anchor_points: Head's anchor points
     @rtype: Tensor
-    @return: BBoxes in "cxcywh" format
+    @return: BBoxes in "xcycwh" format
     """
     lt, rb = torch.split(distance, 2, -1)
     cos, sin = torch.cos(pred_angles), torch.sin(pred_angles)
@@ -243,6 +243,30 @@ def xywhr2xyxyxyxy(x):
     return stack([pt1, pt2, pt3, pt4], -2)
 
 
+def xyxy2xywh(x):
+    """Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width,
+    height) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-
+    right corner.
+
+    Args:
+        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
+
+    Returns:
+        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
+    """
+    assert (
+        x.shape[-1] == 4
+    ), f"input shape last dimension expected 4 but input shape is {x.shape}"
+    y = (
+        torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
+    )  # faster than clone/copy
+    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
+    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
+    y[..., 2] = x[..., 2] - x[..., 0]  # width
+    y[..., 3] = x[..., 3] - x[..., 1]  # height
+    return y
+
+
 def xywh2xyxy(x):
     """Convert bounding box coordinates from (x, y, width, height) format to (x1, y1,
     x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-
@@ -432,6 +456,48 @@ def probiou(obb1, obb2, CIoU=False, eps=1e-7):
     return iou
 
 
+def batch_probiou(obb1, obb2, eps=1e-7):
+    """
+    Calculate the prob IoU between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.
+
+    Args:
+        obb1 (torch.Tensor | np.ndarray): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.
+        obb2 (torch.Tensor | np.ndarray): A tensor of shape (M, 5) representing predicted obbs, with xywhr format.
+        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
+
+    Returns:
+        (torch.Tensor): A tensor of shape (N, M) representing obb similarities.
+    """
+    obb1 = torch.from_numpy(obb1) if isinstance(obb1, np.ndarray) else obb1
+    obb2 = torch.from_numpy(obb2) if isinstance(obb2, np.ndarray) else obb2
+
+    x1, y1 = obb1[..., :2].split(1, dim=-1)
+    x2, y2 = (x.squeeze(-1)[None] for x in obb2[..., :2].split(1, dim=-1))
+    a1, b1, c1 = _get_covariance_matrix(obb1)
+    a2, b2, c2 = (x.squeeze(-1)[None] for x in _get_covariance_matrix(obb2))
+
+    t1 = (
+        ((a1 + a2) * (y1 - y2).pow(2) + (b1 + b2) * (x1 - x2).pow(2))
+        / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
+    ) * 0.25
+    t2 = (
+        ((c1 + c2) * (x2 - x1) * (y1 - y2))
+        / ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2) + eps)
+    ) * 0.5
+    t3 = (
+        ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
+        / (
+            4
+            * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt()
+            + eps
+        )
+        + eps
+    ).log() * 0.5
+    bd = (t1 + t2 + t3).clamp(eps, 100.0)
+    hd = (1.0 - (-bd).exp() + eps).sqrt()
+    return 1 - hd
+
+
 def _get_covariance_matrix(boxes):
     """Generating covariance matrix from obbs.
 

From 7b5adf55207e803a61dd2bab62965d044012c436 Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Thu, 5 Sep 2024 23:48:03 +0200
Subject: [PATCH 62/75] [WiP]: add nms for obb

---
 .../losses/obb_detection_loss.py              |  30 +++-
 .../nodes/heads/efficient_obbox_head.py       |  15 +-
 luxonis_train/utils/assigners/utils.py        |   9 +-
 luxonis_train/utils/boxutils.py               | 167 ++++++++++++++++++
 4 files changed, 199 insertions(+), 22 deletions(-)

diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
index 20f5d08a..d3ff2070 100644
--- a/luxonis_train/attached_modules/losses/obb_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -134,16 +134,33 @@ def prepare(
             )
             self.anchor_points_strided = self.anchor_points / self.stride_tensor
 
-        target = self._preprocess_target(target, batch_size)
+        target = self._preprocess_target(
+            target, batch_size
+        )  # [cls, x, y, w, h, r] unnormalized
+
+        proj = torch.arange(
+            self.reg_max, dtype=torch.float, device=self.pred_distri.device
+        )
+        b, a, c = self.pred_distri.shape  # batch, anchors, channels
+        pred_distri_tensor = (  # we get a tensor of the expected values (mean) of the regression predictions
+            self.pred_distri.view(b, a, 4, c // 4)
+            .softmax(3)
+            .matmul(proj.type(self.pred_distri.dtype))
+        )
         pred_bboxes = torch.cat(
             (
-                dist2rbbox(self.pred_distri, pred_angles, self.anchor_points_strided),
+                dist2rbbox(pred_distri_tensor, pred_angles, self.anchor_points_strided),
                 pred_angles,
             ),
             dim=-1,
-        )
+        )  # xywhr unnormalized
+
+        xy_strided = pred_bboxes[..., :2] * self.stride_tensor
+        pred_bboxes_strided = torch.cat(
+            [xy_strided, pred_bboxes[..., 2:]], dim=-1
+        )  # xywhr unnormalized with xy strided
 
-        gt_labels = target[:, :, :1]
+        gt_cls = target[:, :, :1]
         gt_cxcywhr = target[:, :, 1:]
         mask_gt = (gt_cxcywhr.sum(-1, keepdim=True) > 0).float()
 
@@ -156,13 +173,14 @@ def prepare(
             _,
         ) = self.assigner(
             pred_scores.detach(),
-            pred_bboxes.detach() * self.stride_tensor,
+            pred_bboxes_strided.detach(),
             self.anchor_points,
-            gt_labels,
+            gt_cls,
             gt_cxcywhr,
             mask_gt,
         )
 
+        # NOTE: make assigned_bboxes_strided and retern it instead of assigned_bboxes
         return (
             pred_bboxes,
             pred_scores,
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
index 93a59e00..69f075aa 100644
--- a/luxonis_train/nodes/heads/efficient_obbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -9,7 +9,7 @@
 from luxonis_train.utils.boxutils import (
     anchors_for_fpn_features,
     dist2rbbox,
-    non_max_suppression,
+    non_max_suppression_obb,
 )
 from luxonis_train.utils.types import LabelType, Packet
 
@@ -151,7 +151,6 @@ def _process_to_bbox(
             .softmax(3)
             .matmul(proj.type(reg_dist_list.dtype))
         )
-        # pred_bboxes = dist2rbbox(reg_dist_tensor, angles_list, anchor_points) # xywh
         pred_bboxes = torch.cat(
             (
                 dist2rbbox(reg_dist_tensor, angles_list, anchor_points),
@@ -160,11 +159,6 @@ def _process_to_bbox(
             dim=-1,
         )  # xywhr
 
-        # pred_bboxes = xywh2xyxy(pred_bboxes)
-        # pred_bboxes = xywhr2xyxyxyxy(
-        #     pred_bboxes
-        # )  # format: [xy1, xy2, xy3, xy4], shape: (b, n, 4, 2)
-
         xy_strided = pred_bboxes[..., :2] * stride_tensor
         pred_bboxes = torch.cat(
             [xy_strided, pred_bboxes[..., 2:]], dim=-1
@@ -183,13 +177,14 @@ def _process_to_bbox(
             dim=-1,
         )
 
-        # NOTE: change non_max_suppression for obb
-        return non_max_suppression(
+        # pred = torch.rand((1, 1344, 15))
+        # pred[..., 5] = 1
+
+        return non_max_suppression_obb(
             output_merged,
             n_classes=self.n_classes,
             conf_thres=self.conf_thres,
             iou_thres=self.iou_thres,
-            bbox_format="cxcywh",
             max_det=self.max_det,
             predicts_objectness=False,
         )
diff --git a/luxonis_train/utils/assigners/utils.py b/luxonis_train/utils/assigners/utils.py
index b9911a0b..0dddee3e 100644
--- a/luxonis_train/utils/assigners/utils.py
+++ b/luxonis_train/utils/assigners/utils.py
@@ -44,15 +44,12 @@ def candidates_in_gt_obb(xy_centers, gt_bboxes):
     Returns:
         (Tensor): shape(b, n_boxes, h*w)
     """
-    # (b, n_boxes, 5) --> (b, n_boxes, 4, 2)
-    corners = xywhr2xyxyxyxy(gt_bboxes)
-    # (b, n_boxes, 1, 2)
-    a, b, _, d = corners.split(1, dim=-2)
+    corners = xywhr2xyxyxyxy(gt_bboxes)  # (b, n_boxes, 5) --> (b, n_boxes, 4, 2)
+    a, b, _, d = corners.split(1, dim=-2)  # (b, n_boxes, 1, 2)
     ab = b - a
     ad = d - a
 
-    # (b, n_boxes, h*w, 2)
-    ap = xy_centers - a
+    ap = xy_centers - a  # (b, n_boxes, h*w, 2)
     norm_ab = (ab * ab).sum(dim=-1)
     norm_ad = (ad * ad).sum(dim=-1)
     ap_dot_ab = (ap * ab).sum(dim=-1)
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 997545b2..f6eebdb7 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -650,6 +650,173 @@ def non_max_suppression(
     return output
 
 
+def non_max_suppression_obb(
+    preds: Tensor,
+    n_classes: int,
+    conf_thres: float = 0.25,
+    iou_thres: float = 0.45,
+    keep_classes: list[int] | None = None,
+    agnostic: bool = False,
+    multi_label: bool = False,
+    max_det: int = 300,
+    predicts_objectness: bool = True,
+) -> list[Tensor]:
+    """Non-maximum suppression on model's predictions to keep only best instances for
+    oriented bounding boxes (obb).
+
+    @type preds: Tensor
+    @param preds: Model's prediction tensor of shape [bs, N, M]. Bounding boxes are in xywhr format.
+    @type n_classes: int
+    @param n_classes: Number of model's classes.
+    @type conf_thres: float
+    @param conf_thres: Boxes with confidence higher than this will be kept. Defaults to
+        0.25.
+    @type iou_thres: float
+    @param iou_thres: Boxes with IoU higher than this will be discarded. Defaults to
+        0.45.
+    @type keep_classes: list[int] | None
+    @param keep_classes: Subset of classes to keep, if None then keep all of them.
+        Defaults to None.
+    @type agnostic: bool
+    @param agnostic: Whether perform NMS per class or treat all classes the same.
+        Defaults to False.
+    @type multi_label: bool
+    @param multi_label: Whether one prediction can have multiple labels. Defaults to
+        False.
+    @type bbox_format: BBoxFormatType
+    @param bbox_format: Input bbox format. Defaults to "xyxy".
+    @type max_det: int
+    @param max_det: Number of maximum output detections. Defaults to 300.
+    @type predicts_objectness: bool
+    @param predicts_objectness: Whether head predicts objectness confidence. Defaults to
+        True.
+    @rtype: list[Tensor]
+    @return: list of kept detections for each image, boxes in "xywhr" format. Tensors
+        with shape [n_kept, M]
+    """
+    if not (0 <= conf_thres <= 1):
+        raise ValueError(
+            f"Confidence threshold must be in range [0,1] but set to {conf_thres}."
+        )
+    if not (0 <= iou_thres <= 1):
+        raise ValueError(
+            f"IoU threshold must be in range [0,1] but set to {iou_thres}."
+        )
+
+    multi_label &= n_classes > 1
+
+    candidate_mask = preds[..., 5] > conf_thres  # all True
+    if not predicts_objectness:
+        candidate_mask = torch.logical_and(
+            candidate_mask,
+            torch.max(preds[..., 6 : 6 + n_classes], dim=-1)[0] > conf_thres,
+        )
+
+    output = [torch.zeros((0, preds.size(-1)), device=preds.device)] * preds.size(0)
+
+    for i, x in enumerate(preds):
+        curr_out = x[candidate_mask[i]]
+
+        if curr_out.size(0) == 0:
+            continue
+
+        if predicts_objectness:
+            if n_classes == 1:
+                curr_out[:, 5 : 5 + n_classes] = curr_out[
+                    :, 4:5
+                ]  # not changed (non_max_suppression)
+            else:
+                curr_out[:, 5 : 5 + n_classes] *= curr_out[
+                    :, 4:5
+                ]  # not changed (non_max_suppression)
+
+        else:
+            curr_out[:, 6 : 6 + n_classes] *= curr_out[:, 5:6]
+
+        bboxes = curr_out[:, :5]
+        keep_mask = torch.zeros(bboxes.size(0)).bool()
+
+        if multi_label:
+            box_idx, class_idx = (
+                (curr_out[:, 6 : 6 + n_classes] > conf_thres).nonzero(as_tuple=False).T
+            )
+            keep_mask[box_idx] = True
+            curr_out = torch.cat(
+                (
+                    bboxes[keep_mask],
+                    curr_out[keep_mask, class_idx + 5, None],  # why 5?
+                    class_idx[:, None].float(),
+                ),
+                1,
+            )
+        else:
+            conf, class_idx = curr_out[:, 6 : 6 + n_classes].max(1, keepdim=True)
+            keep_mask[conf.view(-1) > conf_thres] = True
+            curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[keep_mask]
+
+        if keep_classes is not None:
+            curr_out = curr_out[
+                (
+                    curr_out[:, 6:7]
+                    == torch.tensor(keep_classes, device=curr_out.device)
+                ).any(1)
+            ]
+
+        if not curr_out.size(0):
+            continue
+
+        keep_indices = batched_nms_obb(
+            boxes=curr_out[:, :5],
+            scores=curr_out[:, 5],
+            idxs=curr_out[:, 6].int() * (0 if agnostic else 1),
+            iou_threshold=iou_thres,
+        )
+
+        keep_indices = keep_indices[:max_det]
+
+        output[i] = curr_out[keep_indices]
+
+    return output
+
+
+def batched_nms_obb(
+    boxes: Tensor,
+    scores: Tensor,
+    idxs: Tensor,
+    iou_threshold: float,
+) -> Tensor:
+    # Based on Detectron2 implementation, just manually call nms() on each class independently
+    keep_mask = torch.zeros_like(scores, dtype=torch.bool)
+    for class_id in torch.unique(idxs):
+        curr_indices = torch.where(idxs == class_id)[0]
+        curr_keep_indices = batched_nms_rotated(
+            boxes[curr_indices], scores[curr_indices], iou_threshold
+        )
+        keep_mask[curr_indices[curr_keep_indices]] = True
+    keep_indices = torch.where(keep_mask)[0]
+    return keep_indices[scores[keep_indices].sort(descending=True)[1]]
+
+
+def batched_nms_rotated(boxes, scores, threshold=0.45):
+    """NMS for oriented bounding boxes using probiou and fast-nms.
+
+    Args:
+        boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
+        scores (torch.Tensor): Confidence scores, shape (N,).
+        threshold (float, optional): IoU threshold. Defaults to 0.45.
+
+    Returns:
+        (torch.Tensor): Indices of boxes to keep after NMS.
+    """
+    if len(boxes) == 0:
+        return np.empty((0,), dtype=np.int8)
+    sorted_idx = torch.argsort(scores, descending=True)
+    boxes = boxes[sorted_idx]
+    ious = batch_probiou(boxes, boxes).triu_(diagonal=1)
+    pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1)
+    return sorted_idx[pick]
+
+
 def anchors_from_dataset(
     loader: DataLoader,
     n_anchors: int = 9,

From 25cb8efdb83b21e07d2b5a7699d14959baed43f3 Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Sun, 8 Sep 2024 21:32:35 +0200
Subject: [PATCH 63/75] [WiP]: add a metrics module for obb

---
 .../losses/obb_detection_loss.py              |  11 +-
 .../attached_modules/metrics/__init__.py      |   2 +
 .../metrics/mean_average_precision.py         |   2 +-
 .../metrics/mean_average_precision_obb.py     | 445 ++++++++++++++++++
 .../visualizers/bbox_visualizer.py            |   5 +
 .../predefined_models/detection_model_obb.py  |   8 +-
 .../nodes/heads/efficient_obbox_head.py       |   3 +-
 luxonis_train/utils/boxutils.py               |   7 +-
 8 files changed, 470 insertions(+), 13 deletions(-)
 create mode 100644 luxonis_train/attached_modules/metrics/mean_average_precision_obb.py

diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
index d3ff2070..c46985ea 100644
--- a/luxonis_train/attached_modules/losses/obb_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -180,11 +180,15 @@ def prepare(
             mask_gt,
         )
 
-        # NOTE: make assigned_bboxes_strided and retern it instead of assigned_bboxes
+        xy_unstrided = assigned_bboxes[..., :2] / self.stride_tensor
+        assigned_bboxes_unstrided = torch.cat(
+            [xy_unstrided, assigned_bboxes[..., 2:]], dim=-1
+        )  # xywhr unnormalized with xy strided
+
         return (
             pred_bboxes,
             pred_scores,
-            assigned_bboxes / self.stride_tensor,
+            assigned_bboxes_unstrided,
             assigned_labels,
             assigned_scores,
             mask_positive,
@@ -352,8 +356,7 @@ def forward(
             )
             loss_dfl = (
                 self.dfl_loss(
-                    # pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max),
-                    pred_dist[fg_mask],
+                    pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max),
                     target_ltrb[fg_mask],
                 )
                 * weight
diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py
index 9e73e4ac..7357709a 100644
--- a/luxonis_train/attached_modules/metrics/__init__.py
+++ b/luxonis_train/attached_modules/metrics/__init__.py
@@ -2,6 +2,7 @@
 from .common import Accuracy, F1Score, JaccardIndex, Precision, Recall
 from .mean_average_precision import MeanAveragePrecision
 from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints
+from .mean_average_precision_obb import MeanAveragePrecisionOBB
 from .object_keypoint_similarity import ObjectKeypointSimilarity
 
 __all__ = [
@@ -10,6 +11,7 @@
     "JaccardIndex",
     "BaseMetric",
     "MeanAveragePrecision",
+    "MeanAveragePrecisionOBB",
     "MeanAveragePrecisionKeypoints",
     "ObjectKeypointSimilarity",
     "Precision",
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index d0642bff..ffdf5e22 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -15,7 +15,7 @@ class MeanAveragePrecision(BaseMetric):
     <https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html>}.
     """
 
-    supported_labels = [LabelType.BOUNDINGBOX, LabelType.OBOUNDINGBOX]
+    supported_labels = [LabelType.BOUNDINGBOX]
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
new file mode 100644
index 00000000..a46a26b5
--- /dev/null
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
@@ -0,0 +1,445 @@
+import numpy as np
+import torch
+from torch import Tensor
+from torchvision.ops import box_convert
+
+from luxonis_train.utils.boxutils import batch_probiou
+from luxonis_train.utils.types import Labels, LabelType, Packet
+
+from .base_metric import BaseMetric
+
+
+class MeanAveragePrecisionOBB(BaseMetric):
+    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for
+    oriented object detection predictions.
+
+    Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)
+    <https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html>}.
+    """
+
+    supported_labels = [LabelType.OBOUNDINGBOX]
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        self.p = []  # precision for each class. Shape: (nc,)
+        self.r = []  # recall for each class. Shape: (nc,)
+        self.f1 = []  # F1 score for each class. Shape: (nc,)
+        self.all_ap = []  # AP scores for all classes and all IoU thresholds. Shape: (nc, 10)
+        self.ap_class_index = []  # index of class for each AP score. Shape: (nc,)
+        self.nc = 0  # number of classes
+
+        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
+
+    def update(
+        self,
+        preds: list[dict[str, Tensor]],  # outputs
+        batch: list[dict[str, Tensor]],  # labels
+    ):
+        """Metrics."""
+        for si, pred in enumerate(preds):
+            self.seen += 1
+            npr = len(pred)
+            stat = dict(
+                conf=torch.zeros(0, device=self.device),
+                pred_cls=torch.zeros(0, device=self.device),
+                tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
+            )
+            pbatch = self._prepare_batch(si, batch)
+            cls, bbox = pbatch.pop("cls"), pbatch.pop("bbox")
+            nl = len(cls)
+            stat["target_cls"] = cls
+            stat["target_img"] = cls.unique()
+            if npr == 0:
+                if nl:
+                    for k in self.stats.keys():
+                        self.stats[k].append(stat[k])
+                    if self.args.plots:
+                        self.confusion_matrix.process_batch(
+                            detections=None, gt_bboxes=bbox, gt_cls=cls
+                        )
+                continue
+
+            # Predictions
+            if self.args.single_cls:
+                pred[:, 5] = 0
+            predn = self._prepare_pred(pred, pbatch)
+            stat["conf"] = predn[:, 4]
+            stat["pred_cls"] = predn[:, 5]
+
+            # Evaluate
+            if nl:
+                stat["tp"] = self._process_batch(predn, bbox, cls)
+                if self.args.plots:
+                    self.confusion_matrix.process_batch(predn, bbox, cls)
+            for k in self.stats.keys():
+                self.stats[k].append(stat[k])
+
+            # # Save
+            # if self.args.save_json:
+            #     self.pred_to_json(predn, batch["im_file"][si])
+            # if self.args.save_txt:
+            #     self.save_one_txt(
+            #         predn,
+            #         self.args.save_conf,
+            #         pbatch["ori_shape"],
+            #         self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt',
+            #     )
+
+    def prepare(
+        self, outputs: Packet[Tensor], labels: Labels
+    ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
+        box_label = self.get_label(labels)[0]
+        output_nms = self.get_input_tensors(outputs)
+
+        image_size = self.node.original_in_shape[1:]
+
+        output_list: list[dict[str, Tensor]] = []
+        label_list: list[dict[str, Tensor]] = []
+        for i in range(len(output_nms)):
+            output_list.append(
+                {
+                    "boxes": output_nms[i][:, :4],
+                    "scores": output_nms[i][:, 4],
+                    "labels": output_nms[i][:, 5].int(),
+                }
+            )
+
+            curr_label = box_label[box_label[:, 0] == i]
+            curr_bboxs = box_convert(curr_label[:, 2:], "xywh", "xyxy")
+            curr_bboxs[:, 0::2] *= image_size[1]
+            curr_bboxs[:, 1::2] *= image_size[0]
+            label_list.append({"boxes": curr_bboxs, "labels": curr_label[:, 1].int()})
+
+        return output_list, label_list
+
+    def _prepare_batch(self, si, batch):
+        """Prepares and returns a batch for OBB validation."""
+        idx = batch["batch_idx"] == si
+        cls = batch["cls"][idx].squeeze(-1)
+        bbox = batch["bboxes"][idx]
+        ori_shape = batch["ori_shape"][si]
+        imgsz = batch["img"].shape[2:]
+        ratio_pad = batch["ratio_pad"][si]
+        if len(cls):
+            bbox[..., :4].mul_(
+                torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]
+            )  # target boxes
+            # ops.scale_boxes(
+            #     imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True
+            # )  # native-space labels
+        return {
+            "cls": cls,
+            "bbox": bbox,
+            "ori_shape": ori_shape,
+            "imgsz": imgsz,
+            "ratio_pad": ratio_pad,
+        }
+
+    def _prepare_pred(self, pred, pbatch):
+        """Prepares and returns a batch for OBB validation with scaled and padded
+        bounding boxes."""
+        predn = pred.clone()
+        # ops.scale_boxes(
+        #     pbatch["imgsz"],
+        #     predn[:, :4],
+        #     pbatch["ori_shape"],
+        #     ratio_pad=pbatch["ratio_pad"],
+        #     xywh=True,
+        # )  # native-space pred
+        return predn
+
+    def reset(self) -> None:
+        self.metric.reset()
+
+    def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
+        pass
+        # metric_dict = self.metric.compute()
+
+        # del metric_dict["classes"]
+        # del metric_dict["map_per_class"]
+        # del metric_dict["mar_100_per_class"]
+        # map = metric_dict.pop("map")
+
+        # mat = self._process_batch()
+
+        # return map, metric_dict
+
+    def _process_batch(self, detections, gt_bboxes, gt_cls):
+        """Perform computation of the correct prediction matrix for a batch of
+        detections and ground truth bounding boxes.
+
+        Args:
+            detections (torch.Tensor): A tensor of shape (N, 7) representing the detected bounding boxes and associated
+                data. Each detection is represented as (x1, y1, x2, y2, conf, class, angle).
+            gt_bboxes (torch.Tensor): A tensor of shape (M, 5) representing the ground truth bounding boxes. Each box is
+                represented as (x1, y1, x2, y2, angle).
+            gt_cls (torch.Tensor): A tensor of shape (M,) representing class labels for the ground truth bounding boxes.
+
+        Returns:
+            (torch.Tensor): The correct prediction matrix with shape (N, 10), which includes 10 IoU (Intersection over
+                Union) levels for each detection, indicating the accuracy of predictions compared to the ground truth.
+
+        Example:
+            ```python
+            detections = torch.rand(100, 7)  # 100 sample detections
+            gt_bboxes = torch.rand(50, 5)  # 50 sample ground truth boxes
+            gt_cls = torch.randint(0, 5, (50,))  # 50 ground truth class labels
+            correct_matrix = OBBValidator._process_batch(detections, gt_bboxes, gt_cls)
+            ```
+
+        Note:
+            This method relies on `batch_probiou` to calculate IoU between detections and ground truth bounding boxes.
+        """
+        iou = batch_probiou(
+            gt_bboxes, torch.cat([detections[:, :4], detections[:, -1:]], dim=-1)
+        )
+        return self.match_predictions(detections[:, 5], gt_cls, iou)
+
+    def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False):
+        """Matches predictions to ground truth objects (pred_classes, true_classes)
+        using IoU.
+
+        Args:
+            pred_classes (torch.Tensor): Predicted class indices of shape(N,).
+            true_classes (torch.Tensor): Target class indices of shape(M,).
+            iou (torch.Tensor): An NxM tensor containing the pairwise IoU values for predictions and ground of truth
+            use_scipy (bool): Whether to use scipy for matching (more precise).
+
+        Returns:
+            (torch.Tensor): Correct tensor of shape(N,10) for 10 IoU thresholds.
+        """
+        # Dx10 matrix, where D - detections, 10 - IoU thresholds
+        correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool)
+        # LxD matrix where L - labels (rows), D - detections (columns)
+        correct_class = true_classes[:, None] == pred_classes
+        iou = iou * correct_class  # zero out the wrong classes
+        iou = iou.cpu().numpy()
+        for i, threshold in enumerate(self.iouv.cpu().tolist()):
+            if use_scipy:
+                # WARNING: known issue that reduces mAP in https://github.com/ultralytics/ultralytics/pull/4708
+                import scipy  # scope import to avoid importing for all commands
+
+                cost_matrix = iou * (iou >= threshold)
+                if cost_matrix.any():
+                    labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(
+                        cost_matrix, maximize=True
+                    )
+                    valid = cost_matrix[labels_idx, detections_idx] > 0
+                    if valid.any():
+                        correct[detections_idx[valid], i] = True
+            else:
+                matches = np.nonzero(
+                    iou >= threshold
+                )  # IoU > threshold and classes match
+                matches = np.array(matches).T
+                if matches.shape[0]:
+                    if matches.shape[0] > 1:
+                        matches = matches[
+                            iou[matches[:, 0], matches[:, 1]].argsort()[::-1]
+                        ]
+                        matches = matches[
+                            np.unique(matches[:, 1], return_index=True)[1]
+                        ]
+                        # matches = matches[matches[:, 2].argsort()[::-1]]
+                        matches = matches[
+                            np.unique(matches[:, 0], return_index=True)[1]
+                        ]
+                    correct[matches[:, 1].astype(int), i] = True
+        return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device)
+
+    def update_metric(self, results):
+        """Updates the evaluation metrics of the model with a new set of results.
+
+        Args:
+            results (tuple): A tuple containing the following evaluation metrics:
+                - p (list): Precision for each class. Shape: (nc,).
+                - r (list): Recall for each class. Shape: (nc,).
+                - f1 (list): F1 score for each class. Shape: (nc,).
+                - all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
+                - ap_class_index (list): Index of class for each AP score. Shape: (nc,).
+
+        Side Effects:
+            Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based
+            on the values provided in the `results` tuple.
+        """
+        (
+            self.p,
+            self.r,
+            self.f1,
+            self.all_ap,
+            self.ap_class_index,
+            self.p_curve,
+            self.r_curve,
+            self.f1_curve,
+            self.px,
+            self.prec_values,
+        ) = results
+
+    def process(self, tp, conf, pred_cls, target_cls):
+        """Process predicted results for object detection and update metrics."""
+        results = MeanAveragePrecisionOBB.ap_per_class(
+            tp,
+            conf,
+            pred_cls,
+            target_cls,
+            # plot=self.plot,
+            # save_dir=self.save_dir,
+            # names=self.names,
+            on_plot=self.on_plot,
+        )[2:]
+        self.box.nc = len(self.names)
+        self.box.update(results)
+
+    @staticmethod
+    def ap_per_class(
+        tp,
+        conf,
+        pred_cls,
+        target_cls,
+        plot=False,
+        on_plot=None,
+        # save_dir=Path(),
+        # names={},
+        eps=1e-16,
+        prefix="",
+    ):
+        """Computes the average precision per class for object detection evaluation.
+
+        Args:
+            tp (np.ndarray): Binary array indicating whether the detection is correct (True) or not (False).
+            conf (np.ndarray): Array of confidence scores of the detections.
+            pred_cls (np.ndarray): Array of predicted classes of the detections.
+            target_cls (np.ndarray): Array of true classes of the detections.
+            plot (bool, optional): Whether to plot PR curves or not. Defaults to False.
+            on_plot (func, optional): A callback to pass plots path and data when they are rendered. Defaults to None.
+            save_dir (Path, optional): Directory to save the PR curves. Defaults to an empty path.
+            names (dict, optional): Dict of class names to plot PR curves. Defaults to an empty tuple.
+            eps (float, optional): A small value to avoid division by zero. Defaults to 1e-16.
+            prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string.
+
+        Returns:
+            (tuple): A tuple of six arrays and one array of unique classes, where:
+                tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
+                fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
+                p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
+                r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
+                f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
+                ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
+                unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
+                p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
+                r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
+                f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
+                x (np.ndarray): X-axis values for the curves. Shape: (1000,).
+                prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
+        """
+        # Sort by objectness
+        i = np.argsort(-conf)
+        tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+        # Find unique classes
+        unique_classes, nt = np.unique(target_cls, return_counts=True)
+        nc = unique_classes.shape[0]  # number of classes, number of detections
+
+        # Create Precision-Recall curve and compute AP for each class
+        x, prec_values = np.linspace(0, 1, 1000), []
+
+        # Average precision, precision and recall curves
+        ap, p_curve, r_curve = (
+            np.zeros((nc, tp.shape[1])),
+            np.zeros((nc, 1000)),
+            np.zeros((nc, 1000)),
+        )
+        for ci, c in enumerate(unique_classes):
+            i = pred_cls == c
+            n_l = nt[ci]  # number of labels
+            n_p = i.sum()  # number of predictions
+            if n_p == 0 or n_l == 0:
+                continue
+
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum(0)
+            tpc = tp[i].cumsum(0)
+
+            # Recall
+            recall = tpc / (n_l + eps)  # recall curve
+            r_curve[ci] = np.interp(
+                -x, -conf[i], recall[:, 0], left=0
+            )  # negative x, xp because xp decreases
+
+            # Precision
+            precision = tpc / (tpc + fpc)  # precision curve
+            p_curve[ci] = np.interp(
+                -x, -conf[i], precision[:, 0], left=1
+            )  # p at pr_score
+
+            # AP from recall-precision curve
+            # for j in range(tp.shape[1]):
+            #     ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+            #     if plot and j == 0:
+            #         prec_values.append(np.interp(x, mrec, mpre))  # precision at mAP@0.5
+
+        prec_values = np.array(prec_values)  # (nc, 1000)
+
+        # Compute F1 (harmonic mean of precision and recall)
+        # f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
+        # names = [
+        #     v for k, v in names.items() if k in unique_classes
+        # ]  # list: only classes that have data
+        # names = dict(enumerate(names))  # to dict
+        # if plot:
+        #     plot_pr_curve(
+        #         x,
+        #         prec_values,
+        #         ap,
+        #         save_dir / f"{prefix}PR_curve.png",
+        #         names,
+        #         on_plot=on_plot,
+        #     )
+        #     plot_mc_curve(
+        #         x,
+        #         f1_curve,
+        #         save_dir / f"{prefix}F1_curve.png",
+        #         names,
+        #         ylabel="F1",
+        #         on_plot=on_plot,
+        #     )
+        #     plot_mc_curve(
+        #         x,
+        #         p_curve,
+        #         save_dir / f"{prefix}P_curve.png",
+        #         names,
+        #         ylabel="Precision",
+        #         on_plot=on_plot,
+        #     )
+        #     plot_mc_curve(
+        #         x,
+        #         r_curve,
+        #         save_dir / f"{prefix}R_curve.png",
+        #         names,
+        #         ylabel="Recall",
+        #         on_plot=on_plot,
+        #     )
+
+        # i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
+        # p, r, f1 = (
+        #     p_curve[:, i],
+        #     r_curve[:, i],
+        #     f1_curve[:, i],
+        # )  # max-F1 precision, recall, F1 values
+        # tp = (r * nt).round()  # true positives
+        # fp = (tp / (p + eps) - tp).round()  # false positives
+        return (
+            # tp,
+            # fp,
+            # p,
+            # r,
+            # f1,
+            ap,
+            unique_classes.astype(int),
+            p_curve,
+            r_curve,
+            # f1_curve,
+            x,
+            prec_values,
+        )
diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
index d3c67dd6..980580ce 100644
--- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
@@ -195,3 +195,8 @@ def forward(
             width=self.width,
         )
         return targets_viz, predictions_viz.to(targets_viz.device)
+
+
+class OBBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
+    def forward(self):
+        pass
diff --git a/luxonis_train/models/predefined_models/detection_model_obb.py b/luxonis_train/models/predefined_models/detection_model_obb.py
index e2b220f9..9ba44e02 100644
--- a/luxonis_train/models/predefined_models/detection_model_obb.py
+++ b/luxonis_train/models/predefined_models/detection_model_obb.py
@@ -70,8 +70,8 @@ def losses(self) -> list[LossModuleConfig]:
     def metrics(self) -> list[MetricModuleConfig]:
         return [
             MetricModuleConfig(
-                name="MeanAveragePrecision",
-                alias="detection_map",
+                name="MeanAveragePrecisionOBB",
+                alias="detection_map_obb",
                 attached_to="detection_obb_head",
                 is_main_metric=True,
             ),
@@ -81,8 +81,8 @@ def metrics(self) -> list[MetricModuleConfig]:
     def visualizers(self) -> list[AttachedModuleConfig]:
         return [
             AttachedModuleConfig(
-                name="BBoxVisualizer",
-                alias="detection_visualizer",
+                name="OBBoxVisualizer",
+                alias="detection_visualizer_obb",
                 attached_to="detection_obb_head",
                 params=self.visualizer_params,
             )
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
index 69f075aa..fea0814e 100644
--- a/luxonis_train/nodes/heads/efficient_obbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -119,7 +119,7 @@ def wrap(
                 (features, cls_tensor, reg_tensor, angle_tensor)
             )
             return {
-                "boundingbox": boxes,
+                "oboundingbox": boxes,
                 "features": features,
                 "class_scores": [cls_tensor],
                 "distributions": [reg_tensor],
@@ -182,6 +182,7 @@ def _process_to_bbox(
 
         return non_max_suppression_obb(
             output_merged,
+            # pred,  # for debugging
             n_classes=self.n_classes,
             conf_thres=self.conf_thres,
             iou_thres=self.iou_thres,
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index f6eebdb7..0a3f0b82 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -683,8 +683,6 @@ def non_max_suppression_obb(
     @type multi_label: bool
     @param multi_label: Whether one prediction can have multiple labels. Defaults to
         False.
-    @type bbox_format: BBoxFormatType
-    @param bbox_format: Input bbox format. Defaults to "xyxy".
     @type max_det: int
     @param max_det: Number of maximum output detections. Defaults to 300.
     @type predicts_objectness: bool
@@ -712,7 +710,10 @@ def non_max_suppression_obb(
             torch.max(preds[..., 6 : 6 + n_classes], dim=-1)[0] > conf_thres,
         )
 
-    output = [torch.zeros((0, preds.size(-1)), device=preds.device)] * preds.size(0)
+    # output = [torch.zeros((0, preds.size(-1)), device=preds.device)] * preds.size(0)
+    output = [torch.zeros((0, 7), device=preds.device)] * preds.size(
+        0
+    )  # [x, y, w, h, conf, cls_idx]
 
     for i, x in enumerate(preds):
         curr_out = x[candidate_mask[i]]

From c911a915171c3e504a2056602d5360f5a98bd3c1 Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Tue, 10 Sep 2024 23:05:00 +0200
Subject: [PATCH 64/75] [WiP]: implement an obb visualizer

---
 .../losses/obb_detection_loss.py              |  12 +-
 .../metrics/mean_average_precision_obb.py     | 395 +++++++++---------
 .../attached_modules/visualizers/__init__.py  |   2 +
 .../visualizers/bbox_visualizer.py            |   7 +-
 .../visualizers/obbox_visualizer.py           | 197 +++++++++
 .../attached_modules/visualizers/utils.py     | 154 ++++++-
 .../nodes/heads/efficient_obbox_head.py       |  20 +-
 test_models.py                                |   5 +-
 8 files changed, 572 insertions(+), 220 deletions(-)
 create mode 100644 luxonis_train/attached_modules/visualizers/obbox_visualizer.py

diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
index c46985ea..b3081e61 100644
--- a/luxonis_train/attached_modules/losses/obb_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -132,7 +132,9 @@ def prepare(
                 self.grid_cell_offset,
                 multiply_with_stride=True,
             )
-            self.anchor_points_strided = self.anchor_points / self.stride_tensor
+            self.anchor_points_strided = (
+                self.anchor_points / self.stride_tensor
+            )  # NOTE: check later for dimenstions
 
         target = self._preprocess_target(
             target, batch_size
@@ -180,7 +182,9 @@ def prepare(
             mask_gt,
         )
 
-        xy_unstrided = assigned_bboxes[..., :2] / self.stride_tensor
+        xy_unstrided = (
+            assigned_bboxes[..., :2] / self.stride_tensor
+        )  # NOTE: check for dimensions during training
         assigned_bboxes_unstrided = torch.cat(
             [xy_unstrided, assigned_bboxes[..., 2:]], dim=-1
         )  # xywhr unnormalized with xy strided
@@ -258,7 +262,9 @@ def _preprocess_target(self, target: Tensor, batch_size: int):
 
         scaled_target = out_target[:, :, 1:5] * self.gt_bboxes_scale
         scaled_target_angle = torch.cat(
-            [scaled_target, out_target[:, :, 5].transpose(0, 1).unsqueeze(0)], dim=-1
+            # [scaled_target, out_target[:, :, 5].transpose(0, 1).unsqueeze(0)], dim=-1
+            [scaled_target, out_target[:, :, 5].unsqueeze(-1)],
+            dim=-1,
         )
         # out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
         out_target[..., 1:] = scaled_target_angle
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
index a46a26b5..b7b38c8d 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
@@ -1,9 +1,8 @@
 import numpy as np
 import torch
 from torch import Tensor
-from torchvision.ops import box_convert
 
-from luxonis_train.utils.boxutils import batch_probiou
+from luxonis_train.utils.boxutils import batch_probiou, xyxyxyxy2xywhr
 from luxonis_train.utils.types import Labels, LabelType, Packet
 
 from .base_metric import BaseMetric
@@ -29,145 +28,123 @@ def __init__(self, **kwargs):
         self.ap_class_index = []  # index of class for each AP score. Shape: (nc,)
         self.nc = 0  # number of classes
 
-        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
+        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[])
+
+        self.iouv = torch.linspace(
+            0.5, 0.95, 10
+        )  # IoU thresholds from 0.50 to 0.95 in spaces of 0.05 mAP@0.5:0.95
 
     def update(
         self,
-        preds: list[dict[str, Tensor]],  # outputs
-        batch: list[dict[str, Tensor]],  # labels
+        outputs: list[Tensor],  # preds
+        labels: Tensor,  # batch
     ):
-        """Metrics."""
-        for si, pred in enumerate(preds):
-            self.seen += 1
-            npr = len(pred)
-            stat = dict(
-                conf=torch.zeros(0, device=self.device),
-                pred_cls=torch.zeros(0, device=self.device),
-                tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
+        """Update metrics without erasing stats from the previous batch, i.e. the
+        metrics are calculated cumulatively.
+
+        preds: [x1, y1, x2, y2, conf, cls_idx, r] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
+        batch: [cls_idx, x1, y1, x2, y2, r] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 6)]
+        """
+        for si, output in enumerate(outputs):
+            self.stats["conf"].append(output[:, 4])
+            self.stats["pred_cls"].append(output[:, 5])
+            self.stats["target_cls"].append(labels[si][:, 0])
+            gt_cls = labels[si][:, :1]  # cls_idx
+            gt_bboxes = labels[si][:, 1:]  # [x1, y1, x2, y2, r]
+            self.stats["tp"].append(
+                self._process_batch(
+                    detections=output, gt_bboxes=gt_bboxes, gt_cls=gt_cls
+                )
             )
-            pbatch = self._prepare_batch(si, batch)
-            cls, bbox = pbatch.pop("cls"), pbatch.pop("bbox")
-            nl = len(cls)
-            stat["target_cls"] = cls
-            stat["target_img"] = cls.unique()
-            if npr == 0:
-                if nl:
-                    for k in self.stats.keys():
-                        self.stats[k].append(stat[k])
-                    if self.args.plots:
-                        self.confusion_matrix.process_batch(
-                            detections=None, gt_bboxes=bbox, gt_cls=cls
-                        )
-                continue
 
-            # Predictions
-            if self.args.single_cls:
-                pred[:, 5] = 0
-            predn = self._prepare_pred(pred, pbatch)
-            stat["conf"] = predn[:, 4]
-            stat["pred_cls"] = predn[:, 5]
-
-            # Evaluate
-            if nl:
-                stat["tp"] = self._process_batch(predn, bbox, cls)
-                if self.args.plots:
-                    self.confusion_matrix.process_batch(predn, bbox, cls)
-            for k in self.stats.keys():
-                self.stats[k].append(stat[k])
-
-            # # Save
-            # if self.args.save_json:
-            #     self.pred_to_json(predn, batch["im_file"][si])
-            # if self.args.save_txt:
-            #     self.save_one_txt(
-            #         predn,
-            #         self.args.save_conf,
-            #         pbatch["ori_shape"],
-            #         self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt',
-            #     )
+        results = self._process(
+            torch.cat(self.stats["tp"]).cpu().numpy(),
+            torch.cat(self.stats["conf"]).cpu().numpy(),
+            torch.cat(self.stats["pred_cls"]).cpu().numpy(),
+            torch.cat(self.stats["target_cls"]).cpu().numpy(),
+        )
+
+        self._update_metrics(results)
 
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
-    ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        box_label = self.get_label(labels)[0]
+    ) -> tuple[list[Tensor], list[Tensor]]:
+        # outputs_nms: [x, y, w, h, r, conf, cls_idx] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
+        # obb_labels: [img_id, cls_idx, x1, y1, x2, y2, x3, y3, x4, y4] normalized (in [0, 1] range) [Tensor(n_bboxes, 10)]
+
+        # preds: [xc, yc, w, h, conf, cls_idx, r] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
+        # batch: [cls_idx, xc, yc, w, h, r] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 6)]
+
+        obb_labels = self.get_label(labels)[0]
         output_nms = self.get_input_tensors(outputs)
+        pred_scores = self.get_input_tensors(outputs, "class_scores")[
+            0
+        ]  # needed for batch size and device
 
-        image_size = self.node.original_in_shape[1:]
+        # device = pred_scores.device
+        batch_size = pred_scores.shape[0]
+        img_size = self.node.original_in_shape[1:]
 
-        output_list: list[dict[str, Tensor]] = []
-        label_list: list[dict[str, Tensor]] = []
+        output_labels = []
         for i in range(len(output_nms)):
-            output_list.append(
-                {
-                    "boxes": output_nms[i][:, :4],
-                    "scores": output_nms[i][:, 4],
-                    "labels": output_nms[i][:, 5].int(),
-                }
+            output_nms[i][..., [0, 1, 2, 3, 4, 5, 6]] = output_nms[i][
+                ..., [0, 1, 2, 3, 5, 6, 4]
+            ]  # move angle to the end
+            # output_list.append(output_nms[i])
+
+            curr_label = obb_labels[obb_labels[:, 0] == i]
+            output_labels.append(
+                self._preprocess_target(curr_label, batch_size, img_size)
             )
 
-            curr_label = box_label[box_label[:, 0] == i]
-            curr_bboxs = box_convert(curr_label[:, 2:], "xywh", "xyxy")
-            curr_bboxs[:, 0::2] *= image_size[1]
-            curr_bboxs[:, 1::2] *= image_size[0]
-            label_list.append({"boxes": curr_bboxs, "labels": curr_label[:, 1].int()})
-
-        return output_list, label_list
-
-    def _prepare_batch(self, si, batch):
-        """Prepares and returns a batch for OBB validation."""
-        idx = batch["batch_idx"] == si
-        cls = batch["cls"][idx].squeeze(-1)
-        bbox = batch["bboxes"][idx]
-        ori_shape = batch["ori_shape"][si]
-        imgsz = batch["img"].shape[2:]
-        ratio_pad = batch["ratio_pad"][si]
-        if len(cls):
-            bbox[..., :4].mul_(
-                torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]
-            )  # target boxes
-            # ops.scale_boxes(
-            #     imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True
-            # )  # native-space labels
-        return {
-            "cls": cls,
-            "bbox": bbox,
-            "ori_shape": ori_shape,
-            "imgsz": imgsz,
-            "ratio_pad": ratio_pad,
-        }
-
-    def _prepare_pred(self, pred, pbatch):
-        """Prepares and returns a batch for OBB validation with scaled and padded
-        bounding boxes."""
-        predn = pred.clone()
-        # ops.scale_boxes(
-        #     pbatch["imgsz"],
-        #     predn[:, :4],
-        #     pbatch["ori_shape"],
-        #     ratio_pad=pbatch["ratio_pad"],
-        #     xywh=True,
-        # )  # native-space pred
-        return predn
+        return output_nms, output_labels
+
+    def _preprocess_target(self, target: Tensor, batch_size: int, img_size):
+        """Preprocess target in shape [batch_size, N, 6] where N is maximum number of
+        instances in one image."""
+        cls_idx = target[:, 1].unsqueeze(-1)
+        xyxyxyxy = target[:, 2:]
+        xyxyxyxy[:, 0::2] *= img_size[1]  # scale x
+        xyxyxyxy[:, 1::2] *= img_size[0]  # scale y
+        xcycwhr = xyxyxyxy2xywhr(xyxyxyxy)
+        if isinstance(xcycwhr, np.ndarray):
+            xcycwhr = torch.tensor(xcycwhr)
+        out_target = torch.cat([cls_idx, xcycwhr], dim=-1)
+        return out_target
 
     def reset(self) -> None:
-        self.metric.reset()
+        self.p = []
+        self.r = []
+        self.f1 = []
+        self.all_ap = []
+        self.ap_class_index = []
 
-    def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
-        pass
-        # metric_dict = self.metric.compute()
+    def compute(
+        self,
+    ) -> tuple[Tensor, dict[str, Tensor]]:  # NOTE: change to the appropriate types
+        """Process predicted results for object detection and update metrics."""
+        results = self._process(
+            torch.cat(self.stats["tp"]).cpu().numpy(),
+            torch.cat(self.stats["conf"]).cpu().numpy(),
+            torch.cat(self.stats["pred_cls"]).cpu().numpy(),
+            torch.cat(self.stats["target_cls"]).cpu().numpy(),
+        )
 
-        # del metric_dict["classes"]
-        # del metric_dict["map_per_class"]
-        # del metric_dict["mar_100_per_class"]
-        # map = metric_dict.pop("map")
+        metrics = {
+            "p": torch.tensor(np.mean(results[0])),
+            "r": torch.tensor(np.mean(results[1])),
+            "f1": torch.tensor(np.mean(results[2])),
+            "all_ap": torch.tensor(np.mean(results[3])),
+            "ap_class_index": torch.tensor(np.mean(results[4])),
+        }
 
-        # mat = self._process_batch()
+        map = torch.tensor(MeanAveragePrecisionOBB.map(results[5]))  # all_ap
 
-        # return map, metric_dict
+        return map, metrics
 
     def _process_batch(self, detections, gt_bboxes, gt_cls):
-        """Perform computation of the correct prediction matrix for a batch of
-        detections and ground truth bounding boxes.
+        """Perform computation of the correct prediction matrix for a batch of # "fp":
+        torch.from_numpy(results[1]), detections and ground truth bounding boxes.
 
         Args:
             detections (torch.Tensor): A tensor of shape (N, 7) representing the detected bounding boxes and associated
@@ -192,7 +169,8 @@ def _process_batch(self, detections, gt_bboxes, gt_cls):
             This method relies on `batch_probiou` to calculate IoU between detections and ground truth bounding boxes.
         """
         iou = batch_probiou(
-            gt_bboxes, torch.cat([detections[:, :4], detections[:, -1:]], dim=-1)
+            gt_bboxes,
+            torch.cat([detections[:, :4], detections[:, -1:]], dim=-1),
         )
         return self.match_predictions(detections[:, 5], gt_cls, iou)
 
@@ -248,7 +226,7 @@ def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False):
                     correct[matches[:, 1].astype(int), i] = True
         return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device)
 
-    def update_metric(self, results):
+    def _update_metrics(self, results):
         """Updates the evaluation metrics of the model with a new set of results.
 
         Args:
@@ -263,20 +241,26 @@ def update_metric(self, results):
             Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based
             on the values provided in the `results` tuple.
         """
-        (
-            self.p,
-            self.r,
-            self.f1,
-            self.all_ap,
-            self.ap_class_index,
-            self.p_curve,
-            self.r_curve,
-            self.f1_curve,
-            self.px,
-            self.prec_values,
-        ) = results
-
-    def process(self, tp, conf, pred_cls, target_cls):
+        # The following logic impies averaging AP over all classes
+        self.p = torch.tensor(np.mean(results[0]))
+        self.r = torch.tensor(np.mean(results[1]))
+        self.f1 = torch.tensor(np.mean(results[2]))
+        self.all_ap = torch.tensor(np.mean(results[3]))
+        self.ap_class_index = torch.tensor(np.mean(results[4]))
+        # (
+        #     self.p,
+        #     self.r,
+        #     self.f1,
+        #     self.all_ap,
+        #     self.ap_class_index,
+        #     _,  # self.p_curve,
+        #     _,  # self.r_curve,
+        #     _,  # self.f1_curve,
+        #     _,  # self.px,
+        #     _,  # self.prec_values,
+        # ) = results
+
+    def _process(self, tp, conf, pred_cls, target_cls) -> tuple[np.ndarray, ...]:
         """Process predicted results for object detection and update metrics."""
         results = MeanAveragePrecisionOBB.ap_per_class(
             tp,
@@ -286,10 +270,9 @@ def process(self, tp, conf, pred_cls, target_cls):
             # plot=self.plot,
             # save_dir=self.save_dir,
             # names=self.names,
-            on_plot=self.on_plot,
+            # on_plot=self.on_plot,
         )[2:]
-        self.box.nc = len(self.names)
-        self.box.update(results)
+        return results
 
     @staticmethod
     def ap_per_class(
@@ -297,12 +280,12 @@ def ap_per_class(
         conf,
         pred_cls,
         target_cls,
-        plot=False,
-        on_plot=None,
+        # plot=False,
+        # on_plot=None,
         # save_dir=Path(),
         # names={},
         eps=1e-16,
-        prefix="",
+        # prefix="",
     ):
         """Computes the average precision per class for object detection evaluation.
 
@@ -374,72 +357,90 @@ def ap_per_class(
             )  # p at pr_score
 
             # AP from recall-precision curve
-            # for j in range(tp.shape[1]):
-            #     ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
-            #     if plot and j == 0:
-            #         prec_values.append(np.interp(x, mrec, mpre))  # precision at mAP@0.5
+            for j in range(tp.shape[1]):
+                ap[ci, j], mpre, mrec = MeanAveragePrecisionOBB.compute_ap(
+                    recall[:, j], precision[:, j]
+                )
+                # if plot and j == 0:
+                #     prec_values.append(np.interp(x, mrec, mpre))  # precision at mAP@0.5
 
         prec_values = np.array(prec_values)  # (nc, 1000)
 
         # Compute F1 (harmonic mean of precision and recall)
-        # f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
-        # names = [
-        #     v for k, v in names.items() if k in unique_classes
-        # ]  # list: only classes that have data
-        # names = dict(enumerate(names))  # to dict
-        # if plot:
-        #     plot_pr_curve(
-        #         x,
-        #         prec_values,
-        #         ap,
-        #         save_dir / f"{prefix}PR_curve.png",
-        #         names,
-        #         on_plot=on_plot,
-        #     )
-        #     plot_mc_curve(
-        #         x,
-        #         f1_curve,
-        #         save_dir / f"{prefix}F1_curve.png",
-        #         names,
-        #         ylabel="F1",
-        #         on_plot=on_plot,
-        #     )
-        #     plot_mc_curve(
-        #         x,
-        #         p_curve,
-        #         save_dir / f"{prefix}P_curve.png",
-        #         names,
-        #         ylabel="Precision",
-        #         on_plot=on_plot,
-        #     )
-        #     plot_mc_curve(
-        #         x,
-        #         r_curve,
-        #         save_dir / f"{prefix}R_curve.png",
-        #         names,
-        #         ylabel="Recall",
-        #         on_plot=on_plot,
-        #     )
-
-        # i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
-        # p, r, f1 = (
-        #     p_curve[:, i],
-        #     r_curve[:, i],
-        #     f1_curve[:, i],
-        # )  # max-F1 precision, recall, F1 values
-        # tp = (r * nt).round()  # true positives
-        # fp = (tp / (p + eps) - tp).round()  # false positives
+        f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
+
+        i = MeanAveragePrecisionOBB.smooth(
+            f1_curve.mean(0), 0.1
+        ).argmax()  # max F1 index
+        p, r, f1 = (
+            p_curve[:, i],
+            r_curve[:, i],
+            f1_curve[:, i],
+        )  # max-F1 precision, recall, F1 values
+        tp = (r * nt).round()  # true positives
+        fp = (tp / (p + eps) - tp).round()  # false positives
         return (
-            # tp,
-            # fp,
-            # p,
-            # r,
-            # f1,
+            tp,
+            fp,
+            p,
+            r,
+            f1,
             ap,
             unique_classes.astype(int),
             p_curve,
             r_curve,
-            # f1_curve,
+            f1_curve,
             x,
             prec_values,
         )
+
+    @staticmethod
+    def compute_ap(recall, precision):
+        """Compute the average precision (AP) given the recall and precision curves.
+
+        Args:
+            recall (list): The recall curve.
+            precision (list): The precision curve.
+
+        Returns:
+            (float): Average precision.
+            (np.ndarray): Precision envelope curve.
+            (np.ndarray): Modified recall curve with sentinel values added at the beginning and end.
+        """
+        # Append sentinel values to beginning and end
+        mrec = np.concatenate(([0.0], recall, [1.0]))
+        mpre = np.concatenate(([1.0], precision, [0.0]))
+
+        # Compute the precision envelope
+        mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+
+        # Integrate area under curve
+        method = "interp"  # methods: 'continuous', 'interp'
+        if method == "interp":
+            x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
+            ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
+        else:  # 'continuous'
+            i = np.where(mrec[1:] != mrec[:-1])[
+                0
+            ]  # points where x-axis (recall) changes
+            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
+
+        return ap, mpre, mrec
+
+    @staticmethod
+    def smooth(y, f=0.05):
+        """Box filter of fraction f."""
+        nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
+        p = np.ones(nf // 2)  # ones padding
+        yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
+        return np.convolve(yp, np.ones(nf) / nf, mode="valid")  # y-smoothed
+
+    @staticmethod
+    def map(all_ap):
+        """
+        Returns the mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
+
+        Returns:
+            (float): The mAP over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
+        """
+        return all_ap.mean() if len(all_ap) else 0.0
diff --git a/luxonis_train/attached_modules/visualizers/__init__.py b/luxonis_train/attached_modules/visualizers/__init__.py
index a5652cb4..5f29e744 100644
--- a/luxonis_train/attached_modules/visualizers/__init__.py
+++ b/luxonis_train/attached_modules/visualizers/__init__.py
@@ -3,6 +3,7 @@
 from .classification_visualizer import ClassificationVisualizer
 from .keypoint_visualizer import KeypointVisualizer
 from .multi_visualizer import MultiVisualizer
+from .obbox_visualizer import OBBoxVisualizer
 from .segmentation_visualizer import SegmentationVisualizer
 from .utils import (
     combine_visualizations,
@@ -18,6 +19,7 @@
 
 __all__ = [
     "BBoxVisualizer",
+    "OBBoxVisualizer",
     "BaseVisualizer",
     "ClassificationVisualizer",
     "KeypointVisualizer",
diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
index 980580ce..df3ac933 100644
--- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
@@ -10,7 +10,7 @@
 
 
 class BBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
-    supported_labels = [LabelType.BOUNDINGBOX, LabelType.OBOUNDINGBOX]
+    supported_labels = [LabelType.BOUNDINGBOX]
 
     def __init__(
         self,
@@ -195,8 +195,3 @@ def forward(
             width=self.width,
         )
         return targets_viz, predictions_viz.to(targets_viz.device)
-
-
-class OBBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
-    def forward(self):
-        pass
diff --git a/luxonis_train/attached_modules/visualizers/obbox_visualizer.py b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
new file mode 100644
index 00000000..3945e1af
--- /dev/null
+++ b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
@@ -0,0 +1,197 @@
+import logging
+
+import torch
+from torch import Tensor
+
+from luxonis_train.utils.types import LabelType
+
+from .base_visualizer import BaseVisualizer
+from .utils import Color, draw_obounding_box, get_color
+
+
+class OBBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
+    supported_labels = [LabelType.OBOUNDINGBOX]
+
+    def __init__(
+        self,
+        labels: dict[int, str] | list[str] | None = None,
+        draw_labels: bool = True,
+        colors: dict[str, Color] | list[Color] | None = None,
+        fill: bool = False,
+        width: int | None = None,
+        font: str | None = None,
+        font_size: int | None = None,
+        **kwargs,
+    ):
+        """Visualizer for bounding box predictions.
+
+        Creates a visualization of the bounding box predictions and labels.
+
+        @type labels: dict[int, str] | list[str] | None
+        @param labels: Either a dictionary mapping class indices to names, or a list of
+            names. If list is provided, the label mapping is done by index. By default,
+            no labels are drawn.
+        @type draw_labels: bool
+        @param draw_labels: Whether or not to draw labels. Defaults to C{True}.
+        @type colors: dict[int, Color] | list[Color] | None
+        @param colors: Either a dictionary mapping class indices to colors, or a list of
+            colors. If list is provided, the color mapping is done by index. By default,
+            random colors are used.
+        @type fill: bool
+        @param fill: Whether or not to fill the bounding boxes. Defaults to C{False}.
+        @type width: int | None
+        @param width: The width of the bounding box lines. Defaults to C{1}.
+        @type font: str | None
+        @param font: A filename containing a TrueType font. Defaults to C{None}.
+        @type font_size: int | None
+        @param font_size: The font size to use for the labels. Defaults to C{None}.
+        """
+        super().__init__(**kwargs)
+        if isinstance(labels, list):
+            labels = {i: label for i, label in enumerate(labels)}
+
+        self.bbox_labels = labels or {
+            i: label for i, label in enumerate(self.node.class_names)
+        }
+
+        if colors is None:
+            colors = {label: get_color(i) for i, label in self.bbox_labels.items()}
+        if isinstance(colors, list):
+            colors = {self.bbox_labels[i]: color for i, color in enumerate(colors)}
+        self.colors = colors
+        self.fill = fill
+        self.width = width
+        self.font = font
+        self.font_size = font_size
+        self.draw_labels = draw_labels
+
+    @staticmethod
+    def draw_targets(
+        canvas: Tensor,
+        targets: Tensor,
+        width: int | None = None,
+        colors: list[Color] | None = None,
+        labels: list[str] | None = None,
+        label_dict: dict[int, str] | None = None,
+        color_dict: dict[str, Color] | None = None,
+        draw_labels: bool = True,
+        **kwargs,
+    ) -> Tensor:
+        viz = torch.zeros_like(canvas)
+
+        for i in range(len(canvas)):
+            target = targets[targets[:, 0] == i]
+            target_classes = target[:, 1].int()
+            cls_labels = labels or (
+                [label_dict[int(c)] for c in target_classes]
+                if draw_labels and label_dict is not None
+                else None
+            )
+            cls_colors = colors or (
+                [color_dict[label_dict[int(c)]] for c in target_classes]
+                if color_dict is not None and label_dict is not None
+                else None
+            )
+
+            *_, H, W = canvas.shape
+            width = width or max(1, int(min(H, W) / 100))
+            viz[i] = draw_obounding_box(
+                canvas[i].clone(),
+                target[:, 2:],
+                width=width,
+                labels=cls_labels,
+                colors=cls_colors,
+                **kwargs,
+            ).to(canvas.device)
+
+        return viz
+
+    @staticmethod
+    def draw_predictions(
+        canvas: Tensor,
+        predictions: list[Tensor],
+        width: int | None = None,
+        colors: list[Color] | None = None,
+        labels: list[str] | None = None,
+        label_dict: dict[int, str] | None = None,
+        color_dict: dict[str, Color] | None = None,
+        draw_labels: bool = True,
+        **kwargs,
+    ) -> Tensor:
+        viz = torch.zeros_like(canvas)
+
+        for i in range(len(canvas)):
+            prediction = predictions[i]
+            prediction_classes = prediction[..., 5].int()
+            cls_labels = labels or (
+                [label_dict[int(c)] for c in prediction_classes]
+                if draw_labels and label_dict is not None
+                else None
+            )
+            cls_colors = colors or (
+                [color_dict[label_dict[int(c)]] for c in prediction_classes]
+                if color_dict is not None and label_dict is not None
+                else None
+            )
+
+            *_, H, W = canvas.shape
+            width = width or max(1, int(min(H, W) / 100))
+            try:
+                viz[i] = draw_obounding_box(
+                    canvas[i].clone(),
+                    prediction[:, :5],
+                    width=width,
+                    labels=cls_labels,
+                    colors=cls_colors,
+                    **kwargs,
+                )
+            except ValueError as e:
+                logging.getLogger(__name__).warning(
+                    f"Failed to draw bounding boxes: {e}. Skipping visualization."
+                )
+                viz = canvas
+        return viz
+
+    def forward(
+        self,
+        label_canvas: Tensor,
+        prediction_canvas: Tensor,
+        predictions: list[Tensor],
+        targets: Tensor,
+    ) -> tuple[Tensor, Tensor]:
+        """Creates a visualization of the bounding box predictions and labels.
+
+        @type label_canvas: Tensor
+        @param label_canvas: The canvas containing the labels.
+        @type prediction_canvas: Tensor
+        @param prediction_canvas: The canvas containing the predictions.
+        @type prediction: Tensor
+        @param prediction: The predicted bounding boxes. The shape should be [N, 6],
+            where N is the number of bounding boxes and the last dimension is [x1, y1,
+            x2, y2, class, conf].
+        @type targets: Tensor
+        @param targets: The target bounding boxes.
+        """
+        targets_viz = self.draw_targets(
+            label_canvas,
+            targets,
+            color_dict=self.colors,
+            label_dict=self.bbox_labels,
+            draw_labels=self.draw_labels,
+            fill=self.fill,
+            font=self.font,
+            font_size=self.font_size,
+            width=self.width,
+        )
+        predictions_viz = self.draw_predictions(
+            prediction_canvas,
+            predictions,
+            label_dict=self.bbox_labels,
+            color_dict=self.colors,
+            draw_labels=self.draw_labels,
+            fill=self.fill,
+            font=self.font,
+            font_size=self.font_size,
+            width=self.width,
+        )
+        return targets_viz, predictions_viz.to(targets_viz.device)
diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py
index c55b12ce..ec61341a 100644
--- a/luxonis_train/attached_modules/visualizers/utils.py
+++ b/luxonis_train/attached_modules/visualizers/utils.py
@@ -1,6 +1,7 @@
 import colorsys
 import io
-from typing import Literal
+import warnings
+from typing import List, Literal, Optional, Tuple, Union
 
 import cv2
 import matplotlib.pyplot as plt
@@ -10,15 +11,21 @@
 import torchvision.transforms.functional as F
 import torchvision.transforms.functional as TF
 from matplotlib.figure import Figure
-from PIL import Image
+from PIL import Image, ImageDraw, ImageFont
 from torch import Tensor
 from torchvision.ops import box_convert
 from torchvision.utils import (
+    _log_api_usage_once,
+    _parse_colors,
     draw_bounding_boxes,
     draw_keypoints,
     draw_segmentation_masks,
 )
 
+from luxonis_train.utils.boxutils import (
+    xywhr2xyxyxyxy,
+    xyxyxyxy2xywhr,
+)
 from luxonis_train.utils.config import Config
 
 Color = str | tuple[int, int, int]
@@ -144,6 +151,149 @@ def draw_bounding_box_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor:
     return draw_bounding_boxes(img, bboxs, **kwargs)
 
 
+def draw_obounding_box(img: Tensor, obbox: Tensor | np.ndarray, **kwargs) -> Tensor:
+    """Draws oriented bounding box (obb) labels on an image.
+
+    @type img: Tensor
+    @param img: Image to draw on.
+    @type obbox: Tensor
+    @param obbox: Oriented bounding box. The shape should be (n_instances, 8) or
+        (n_instances, 5), where the last dimension is (x1, y1, x2, y2, x3, y3, x4, y4)
+        or (xc, yc, w, h, r).
+    @type kwargs: dict
+    @param kwargs: Additional arguments to pass to L{draw_obounding_boxes}.
+    @rtype: Tensor
+    @return: Image with bounding box labels drawn on.
+    """
+    _, H, W = img.shape
+    # bboxs = box_convert(label, "xywh", "xyxy")
+    # The conversion below is needed for fitting a rectangle to the 4 label points, which can form
+    # a polygon sometimes
+    if obbox.shape[-1] > 5:
+        obbox = xyxyxyxy2xywhr(obbox)  # xywhr
+    bboxs_2 = xywhr2xyxyxyxy(obbox)  # shape: (bs, 4, 2)
+    if isinstance(bboxs_2, np.ndarray):
+        bboxs_2 = torch.tensor(bboxs_2)
+    if bboxs_2.numel() == 0:
+        raise ValueError
+    bboxs = bboxs_2.view(bboxs_2.size(0), -1)  # x1y1x2y2x3y3x4y4
+    bboxs[:, 0::2] *= W
+    bboxs[:, 1::2] *= H
+    return draw_obounding_boxes(img, bboxs, **kwargs)
+
+
+def draw_obounding_boxes(
+    image: torch.Tensor,
+    boxes: torch.Tensor,
+    labels: Optional[List[str]] = None,
+    colors: Optional[
+        Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]
+    ] = None,
+    fill: Optional[bool] = False,
+    width: int = 1,
+    font: Optional[str] = None,
+    font_size: Optional[int] = None,
+) -> torch.Tensor:
+    """Draws oriented bounding boxes (obb) on given RGB image. The image values should
+    be uint8 in [0, 255] or float in [0, 1]. If fill is True, Resulting Tensor should be
+    saved as PNG image.
+
+    Args:
+        image (Tensor): Tensor of shape (C, H, W) and dtype uint8 or float.
+        boxes (Tensor): Tensor of size (N, 8) containing bounding boxes in (x1, y1, x2, y2, x3, y3, x4, y4)
+            format. Note that the boxes are absolute coordinates with respect to the image. In other words: `0 <= x < W` and
+            `0 <= y < H`.
+        labels (List[str]): List containing the labels of bounding boxes.
+        colors (color or list of colors, optional): List containing the colors
+            of the boxes or single color for all boxes. The color can be represented as
+            PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
+            By default, random colors are generated for boxes.
+        fill (bool): If `True` fills the bounding box with specified color.
+        width (int): Width of bounding box.
+        font (str): A filename containing a TrueType font. If the file is not found in this filename, the loader may
+            also search in other directories, such as the `fonts/` directory on Windows or `/Library/Fonts/`,
+            `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS.
+        font_size (int): The requested font size in points.
+
+    Returns:
+        img (Tensor[C, H, W]): Image Tensor of dtype uint8 with bounding boxes plotted.
+    """
+    import torchvision.transforms.v2.functional as F  # noqa
+
+    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
+        _log_api_usage_once(draw_obounding_boxes)
+    if not isinstance(image, torch.Tensor):
+        raise TypeError(f"Tensor expected, got {type(image)}")
+    elif not (image.dtype == torch.uint8 or image.is_floating_point()):
+        raise ValueError(f"The image dtype must be uint8 or float, got {image.dtype}")
+    elif image.dim() != 3:
+        raise ValueError("Pass individual images, not batches")
+    elif image.size(0) not in {1, 3}:
+        raise ValueError("Only grayscale and RGB images are supported")
+    # elif (boxes[:, 0] > boxes[:, 2]).any() or (boxes[:, 1] > boxes[:, 3]).any():
+    #     raise ValueError(
+    #         "Boxes need to be in (xmin, ymin, xmax, ymax) format. Use torchvision.ops.box_convert to convert them"
+    #     )
+
+    num_boxes = boxes.shape[0]
+
+    if num_boxes == 0:
+        warnings.warn("boxes doesn't contain any box. No box was drawn")
+        return image
+
+    if labels is None:
+        labels: Union[List[str], List[None]] = [None] * num_boxes  # type: ignore[no-redef]
+    elif len(labels) != num_boxes:
+        raise ValueError(
+            f"Number of boxes ({num_boxes}) and labels ({len(labels)}) mismatch. Please specify labels for each box."
+        )
+
+    colors = _parse_colors(colors, num_objects=num_boxes)
+
+    if font is None:
+        if font_size is not None:
+            warnings.warn(
+                "Argument 'font_size' will be ignored since 'font' is not set."
+            )
+        txt_font = ImageFont.load_default()
+    else:
+        txt_font = ImageFont.truetype(font=font, size=font_size or 10)
+
+    # Handle Grayscale images
+    if image.size(0) == 1:
+        image = torch.tile(image, (3, 1, 1))
+
+    original_dtype = image.dtype
+    if original_dtype.is_floating_point:
+        image = F.to_dtype(image, dtype=torch.uint8, scale=True)
+
+    img_to_draw = F.to_pil_image(image)
+    img_boxes = boxes.to(torch.int64).tolist()
+
+    if fill:
+        draw = ImageDraw.Draw(img_to_draw, "RGBA")
+    else:
+        draw = ImageDraw.Draw(img_to_draw)
+
+    for bbox, color, label in zip(img_boxes, colors, labels):  # type: ignore[arg-type]
+        if fill:
+            fill_color = color + (100,)
+            draw.polygon(bbox, width=width, outline=color, fill=fill_color)
+        else:
+            draw.polygon(bbox, width=width, outline=color)
+
+        if label is not None:
+            margin = width + 1
+            draw.text(
+                (bbox[0] + margin, bbox[1] + margin), label, fill=color, font=txt_font
+            )
+
+    out = F.pil_to_tensor(img_to_draw)
+    if original_dtype.is_floating_point:
+        out = F.to_dtype(out, dtype=original_dtype, scale=True)
+    return out
+
+
 def draw_keypoint_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor:
     """Draws keypoint labels on an image.
 
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
index fea0814e..d724e61a 100644
--- a/luxonis_train/nodes/heads/efficient_obbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -130,7 +130,7 @@ def _process_to_bbox(
         self, output: tuple[list[Tensor], Tensor, Tensor, Tensor]
     ) -> list[Tensor]:
         """Performs post-processing of the output and returns bboxs after NMS."""
-        features, cls_score_list, reg_dist_list, angles_list = output
+        features, cls_score_tensor, reg_dist_tensor, angles_tensor = output
         _, anchor_points, _, stride_tensor = anchors_for_fpn_features(
             features,
             self.stride,
@@ -143,18 +143,18 @@ def _process_to_bbox(
         # branch (used in DFL)
         # if self.use_dfl: # consider adding this as a parameter
         proj = torch.arange(
-            self.reg_max, dtype=torch.float, device=reg_dist_list.device
+            self.reg_max, dtype=torch.float, device=reg_dist_tensor.device
         )
-        b, a, c = reg_dist_list.shape  # batch, anchors, channels
-        reg_dist_tensor = (  # we get a tensor of the expected values (mean) of the regression predictions
-            reg_dist_list.view(b, a, 4, c // 4)
+        b, a, c = reg_dist_tensor.shape  # batch, anchors, channels
+        reg_dist_mean_tensor = (  # we get a tensor of the expected values (mean) of the regression predictions
+            reg_dist_tensor.view(b, a, 4, c // 4)
             .softmax(3)
-            .matmul(proj.type(reg_dist_list.dtype))
+            .matmul(proj.type(reg_dist_tensor.dtype))
         )
         pred_bboxes = torch.cat(
             (
-                dist2rbbox(reg_dist_tensor, angles_list, anchor_points),
-                angles_list,
+                dist2rbbox(reg_dist_mean_tensor, angles_tensor, anchor_points),
+                angles_tensor,
             ),
             dim=-1,
         )  # xywhr
@@ -172,12 +172,12 @@ def _process_to_bbox(
                     dtype=pred_bboxes.dtype,
                     device=pred_bboxes.device,
                 ),
-                cls_score_list,
+                cls_score_tensor,
             ],
             dim=-1,
         )
 
-        # pred = torch.rand((1, 1344, 15))
+        # pred = torch.rand((2, 1344, 15), device=pred_bboxes.device)
         # pred[..., 5] = 1
 
         return non_max_suppression_obb(
diff --git a/test_models.py b/test_models.py
index 064f57f4..6f88619b 100644
--- a/test_models.py
+++ b/test_models.py
@@ -5,7 +5,7 @@
 TEST_OUTPUT = Path("./probe")
 OPTS = {
     "trainer.epochs": 1,
-    "trainer.batch_size": 1,
+    "trainer.batch_size": 4,
     "trainer.validation_interval": 1,
     "trainer.callbacks": "[]",
     "tracker.save_directory": str(TEST_OUTPUT),
@@ -16,7 +16,8 @@
 def main():
     config_file = "obb_detection_model"
     config_file = f"configs/{config_file}.yaml"
-    model = LuxonisModel(config_file, opts=OPTS)
+    # model = LuxonisModel(config_file, opts=OPTS)
+    model = LuxonisModel(config_file)
     model.train()
     model.test()
 

From 0df07a5dc7386d558a47dd1d8b8e9c7cdd4de483 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Fri, 13 Sep 2024 12:15:16 +0200
Subject: [PATCH 65/75] Increased default validation interval (#71)

---
 luxonis_train/utils/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index e3e4c0fb..bdcd13dc 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -280,7 +280,7 @@ class TrainerConfig(BaseModelExtraForbid):
     epochs: PositiveInt = 100
     num_workers: NonNegativeInt = 4
     train_metrics_interval: Literal[-1] | PositiveInt = -1
-    validation_interval: Literal[-1] | PositiveInt = 1
+    validation_interval: Literal[-1] | PositiveInt = 5
     num_log_images: NonNegativeInt = 4
     skip_last_batch: bool = True
     pin_memory: bool = True

From b3f9f5d787f564cc86dbddaf75997f805a9cc90a Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Tue, 17 Sep 2024 09:14:40 +0200
Subject: [PATCH 66/75] [Fix] Corrected config valid sequence for predefined
 models (#72)

---
 luxonis_train/utils/config.py | 42 +++++++++++++++++------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index bdcd13dc..31e4fe5b 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -77,27 +77,6 @@ class ModelConfig(BaseModelExtraForbid):
     visualizers: list[AttachedModuleConfig] = []
     outputs: list[str] = []
 
-    @model_validator(mode="after")
-    def check_main_metric(self) -> Self:
-        for metric in self.metrics:
-            if metric.is_main_metric:
-                logger.info(f"Main metric: `{metric.name}`")
-                return self
-
-        logger.warning("No main metric specified.")
-        if self.metrics:
-            metric = self.metrics[0]
-            metric.is_main_metric = True
-            name = metric.alias or metric.name
-            logger.info(f"Setting '{name}' as main metric.")
-        else:
-            logger.error(
-                "No metrics specified. "
-                "This is likely unintended unless "
-                "the configuration is not used for training."
-            )
-        return self
-
     @model_validator(mode="after")
     def check_predefined_model(self) -> Self:
         from luxonis_train.utils.registry import MODELS
@@ -120,6 +99,27 @@ def check_predefined_model(self) -> Self:
 
         return self
 
+    @model_validator(mode="after")
+    def check_main_metric(self) -> Self:
+        for metric in self.metrics:
+            if metric.is_main_metric:
+                logger.info(f"Main metric: `{metric.name}`")
+                return self
+
+        logger.warning("No main metric specified.")
+        if self.metrics:
+            metric = self.metrics[0]
+            metric.is_main_metric = True
+            name = metric.alias or metric.name
+            logger.info(f"Setting '{name}' as main metric.")
+        else:
+            logger.error(
+                "No metrics specified. "
+                "This is likely unintended unless "
+                "the configuration is not used for training."
+            )
+        return self
+
     @model_validator(mode="after")
     def check_graph(self) -> Self:
         from luxonis_train.utils.general import is_acyclic

From ae9c26d103086f362ccd959480afa0a4d284db66 Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Tue, 17 Sep 2024 11:27:46 +0200
Subject: [PATCH 67/75] [WiP]: add obb_loader and minor fixes

---
 configs/obb_detection_model.yaml              |  26 +-
 .../losses/obb_detection_loss.py              |  43 +--
 .../metrics/mean_average_precision_obb.py     | 126 ++++---
 .../visualizers/obbox_visualizer.py           |  14 +-
 .../attached_modules/visualizers/utils.py     |   1 -
 .../nodes/heads/efficient_obbox_head.py       |   2 +-
 luxonis_train/utils/boxutils.py               |  22 +-
 luxonis_train/utils/loaders/__init__.py       |   2 +
 luxonis_train/utils/loaders/obb_tmp_loader.py | 328 ++++++++++++++++++
 test_models.py                                |   1 +
 10 files changed, 463 insertions(+), 102 deletions(-)
 create mode 100644 luxonis_train/utils/loaders/obb_tmp_loader.py

diff --git a/configs/obb_detection_model.yaml b/configs/obb_detection_model.yaml
index a1f7f42d..316bcfd1 100644
--- a/configs/obb_detection_model.yaml
+++ b/configs/obb_detection_model.yaml
@@ -8,33 +8,41 @@ model:
       use_neck: True
 
 loader:
+  # name: OBBLoaderTorch
+  train_view: train
+  val_view: val
+  # test_view: train
+
   params:
     dataset_name: obb_test
-    dataset_dir: "../dota8"
+    # dataset_dir: "../dota8"
+    dataset_dir: "../DOTA"
     dataset_type: YOLOV6OBB
 
 trainer:
   # preprocessing:
-  #   train_image_size: [&height 256, &width 320]
+    # train_image_size: [&height 512, &width 512]
+    # normalize:
+    #   active: True
   #   keep_aspect_ratio: False
   #   normalize:
   #     active: True
 
-  batch_size: 4
-  epochs: &epochs 200
+  batch_size: 8
+  epochs: &epochs 10
   num_workers: 4
   validation_interval: 10
   num_log_images: 8
 
   callbacks:
-    - name: ExportOnTrainEnd
+    # - name: ExportOnTrainEnd
     - name: TestOnTrainEnd
 
   optimizer:
-    name: SGD
+    name: Adam
     params:
-      lr: 0.02
+      lr: 0.001
 
-  scheduler:
-    name: ConstantLR
+  # scheduler:
+  #   name: ConstantLR
 
diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
index b3081e61..baa80d3d 100644
--- a/luxonis_train/attached_modules/losses/obb_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -40,13 +40,10 @@ def __init__(
         reg_max: int = 16,
         **kwargs,
     ):
-        """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications
-        <https://arxiv.org/pdf/2209.02976.pdf>}. It combines IoU based bbox regression loss and varifocal loss
-        for classification.
-        Code is adapted from U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models}.
+        """OBBox (oriented bounding box) loss partially adapted from U{YOLOv8:
+        https://github.com/ultralytics/ultralytics/blob/ba438aea5ae4d0e7c28d59ed8408955d16ca71ec/ultralytics/utils/loss.py#L610
+        }. It combines IoU based bbox regression, varifocal, and dfl losses.
 
-        @type n_warmup_epochs: int
-        @param n_warmup_epochs: Number of epochs where ATSS assigner is used, after that we switch to TAL assigner.
         @type iou_type: L{IoUType}
         @param iou_type: IoU type used for bbox regression loss.
         @type reduction: Literal["sum", "mean"]
@@ -55,6 +52,10 @@ def __init__(
         @param class_loss_weight: Weight of classification loss.
         @type iou_loss_weight: float
         @param iou_loss_weight: Weight of IoU loss.
+        @type dfl_loss_weight: float
+        @param dfl_loss_weight: Weight of DFL loss.
+        @type reg_max: int
+        @param reg_max: Number of bins for predicting the distributions of bounding box coordinates.
         @type kwargs: dict
         @param kwargs: Additional arguments to pass to L{BaseLoss}.
         """
@@ -83,12 +84,6 @@ def __init__(
         self.varifocal_loss = VarifocalLoss()
         # self.bce = nn.BCEWithLogitsLoss(reduction="none")
 
-        # self.n_warmup_epochs = n_warmup_epochs
-        # self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes)
-        # self.tal_assigner = TaskAlignedAssigner(
-        #     topk=13, n_classes=self.n_classes, alpha=1.0, beta=6.0
-        # )
-
         self.class_loss_weight = class_loss_weight
         self.iou_loss_weight = iou_loss_weight
         self.dfl_loss_weight = dfl_loss_weight
@@ -132,9 +127,7 @@ def prepare(
                 self.grid_cell_offset,
                 multiply_with_stride=True,
             )
-            self.anchor_points_strided = (
-                self.anchor_points / self.stride_tensor
-            )  # NOTE: check later for dimenstions
+            self.anchor_points_strided = self.anchor_points / self.stride_tensor
 
         target = self._preprocess_target(
             target, batch_size
@@ -182,9 +175,7 @@ def prepare(
             mask_gt,
         )
 
-        xy_unstrided = (
-            assigned_bboxes[..., :2] / self.stride_tensor
-        )  # NOTE: check for dimensions during training
+        xy_unstrided = assigned_bboxes[..., :2] / self.stride_tensor
         assigned_bboxes_unstrided = torch.cat(
             [xy_unstrided, assigned_bboxes[..., 2:]], dim=-1
         )  # xywhr unnormalized with xy strided
@@ -211,6 +202,7 @@ def forward(
 
         # CLS loss
         loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label)
+        # loss_cls = self.bce(pred_scores, assigned_scores)
         if assigned_scores.sum() > 1:
             loss_cls /= assigned_scores.sum()
 
@@ -266,7 +258,6 @@ def _preprocess_target(self, target: Tensor, batch_size: int):
             [scaled_target, out_target[:, :, 5].unsqueeze(-1)],
             dim=-1,
         )
-        # out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
         out_target[..., 1:] = scaled_target_angle
         return out_target
 
@@ -302,7 +293,12 @@ def forward(
 
 
 class DFLoss(nn.Module):
-    """Criterion class for computing DFL losses during training."""
+    """Criterion class for computing DFL losses during training.
+
+    @type reg_max: int
+    @param reg_max: Number of bins for predicting the distributions of bounding box
+        coordinates.
+    """
 
     def __init__(self, reg_max=16) -> None:
         """Initialize the DFL module."""
@@ -330,7 +326,12 @@ def __call__(self, pred_dist, target):
 
 
 class RotatedBboxLoss(nn.Module):
-    """Criterion class for computing training losses during training."""
+    """Criterion class for computing training losses during training.
+
+    @type reg_max: int
+    @param reg_max: Number of bins for predicting the distributions of bounding box
+        coordinates.
+    """
 
     def __init__(self, reg_max):
         """Initialize the BboxLoss module with regularization maximum and DFL
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
index b7b38c8d..ea9dfac4 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
@@ -9,11 +9,11 @@
 
 
 class MeanAveragePrecisionOBB(BaseMetric):
-    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for
-    oriented object detection predictions.
+    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object
+    detection predictions using oriented bounding boxes.
 
-    Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)
-    <https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html>}.
+    Partially adapted from U{YOLOv8 OBBMetrics
+    <https://github.com/ultralytics/ultralytics/blob/ba438aea5ae4d0e7c28d59ed8408955d16ca71ec/ultralytics/utils/metrics.py#L1223>}.
     """
 
     supported_labels = [LabelType.OBOUNDINGBOX]
@@ -37,13 +37,17 @@ def __init__(self, **kwargs):
     def update(
         self,
         outputs: list[Tensor],  # preds
-        labels: Tensor,  # batch
+        labels: list[Tensor],  # batch
     ):
         """Update metrics without erasing stats from the previous batch, i.e. the
         metrics are calculated cumulatively.
 
-        preds: [x1, y1, x2, y2, conf, cls_idx, r] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
-        batch: [cls_idx, x1, y1, x2, y2, r] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 6)]
+        @type outputs: list[Tensor]
+        @param outputs: Network predictions [x1, y1, x2, y2, conf, cls_idx, r]
+            unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
+        @type labels: list[Tensor]
+        @param labels: [cls_idx, x1, y1, x2, y2, r] unnormalized (not in [0, 1] range)
+            [Tensor(n_bboxes, 6)]
         """
         for si, output in enumerate(outputs):
             self.stats["conf"].append(output[:, 4])
@@ -71,17 +75,12 @@ def prepare(
     ) -> tuple[list[Tensor], list[Tensor]]:
         # outputs_nms: [x, y, w, h, r, conf, cls_idx] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
         # obb_labels: [img_id, cls_idx, x1, y1, x2, y2, x3, y3, x4, y4] normalized (in [0, 1] range) [Tensor(n_bboxes, 10)]
-
-        # preds: [xc, yc, w, h, conf, cls_idx, r] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
-        # batch: [cls_idx, xc, yc, w, h, r] unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 6)]
-
         obb_labels = self.get_label(labels)[0]
         output_nms = self.get_input_tensors(outputs)
         pred_scores = self.get_input_tensors(outputs, "class_scores")[
             0
-        ]  # needed for batch size and device
+        ]  # needed for batch size
 
-        # device = pred_scores.device
         batch_size = pred_scores.shape[0]
         img_size = self.node.original_in_shape[1:]
 
@@ -90,7 +89,6 @@ def prepare(
             output_nms[i][..., [0, 1, 2, 3, 4, 5, 6]] = output_nms[i][
                 ..., [0, 1, 2, 3, 5, 6, 4]
             ]  # move angle to the end
-            # output_list.append(output_nms[i])
 
             curr_label = obb_labels[obb_labels[:, 0] == i]
             output_labels.append(
@@ -99,7 +97,7 @@ def prepare(
 
         return output_nms, output_labels
 
-    def _preprocess_target(self, target: Tensor, batch_size: int, img_size):
+    def _preprocess_target(self, target: Tensor, batch_size: int, img_size) -> Tensor:
         """Preprocess target in shape [batch_size, N, 6] where N is maximum number of
         instances in one image."""
         cls_idx = target[:, 1].unsqueeze(-1)
@@ -121,7 +119,7 @@ def reset(self) -> None:
 
     def compute(
         self,
-    ) -> tuple[Tensor, dict[str, Tensor]]:  # NOTE: change to the appropriate types
+    ) -> tuple[Tensor, dict[str, Tensor]]:
         """Process predicted results for object detection and update metrics."""
         results = self._process(
             torch.cat(self.stats["tp"]).cpu().numpy(),
@@ -142,20 +140,23 @@ def compute(
 
         return map, metrics
 
-    def _process_batch(self, detections, gt_bboxes, gt_cls):
+    def _process_batch(
+        self, detections: Tensor, gt_bboxes: Tensor, gt_cls: Tensor
+    ) -> Tensor:
         """Perform computation of the correct prediction matrix for a batch of # "fp":
         torch.from_numpy(results[1]), detections and ground truth bounding boxes.
 
-        Args:
-            detections (torch.Tensor): A tensor of shape (N, 7) representing the detected bounding boxes and associated
-                data. Each detection is represented as (x1, y1, x2, y2, conf, class, angle).
-            gt_bboxes (torch.Tensor): A tensor of shape (M, 5) representing the ground truth bounding boxes. Each box is
-                represented as (x1, y1, x2, y2, angle).
-            gt_cls (torch.Tensor): A tensor of shape (M,) representing class labels for the ground truth bounding boxes.
-
-        Returns:
-            (torch.Tensor): The correct prediction matrix with shape (N, 10), which includes 10 IoU (Intersection over
-                Union) levels for each detection, indicating the accuracy of predictions compared to the ground truth.
+        @type detections: Tensor
+        @param detections: A tensor of shape (N, 7) representing the detected bounding boxes and associated
+            data. Each detection is represented as (x1, y1, x2, y2, conf, class, angle).
+        @type gt_bboxes: Tensor
+        @param gt_bboxes: A tensor of shape (M, 5) representing the ground truth bounding boxes. Each box is
+            represented as (x1, y1, x2, y2, angle).
+        @type gt_cls: Tensor
+        @param gt_cls: A tensor of shape (M,) representing class labels for the ground truth bounding boxes.
+        @rtype: Tensor
+        @return: The correct prediction matrix with shape (N, 10), which includes 10 IoU (Intersection over
+            Union) levels for each detection, indicating the accuracy of predictions compared to the ground truth.
 
         Example:
             ```python
@@ -174,18 +175,27 @@ def _process_batch(self, detections, gt_bboxes, gt_cls):
         )
         return self.match_predictions(detections[:, 5], gt_cls, iou)
 
-    def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False):
+    def match_predictions(
+        self,
+        pred_classes: Tensor,
+        true_classes: Tensor,
+        iou: Tensor,
+        use_scipy: bool = False,
+    ) -> Tensor:
         """Matches predictions to ground truth objects (pred_classes, true_classes)
         using IoU.
 
-        Args:
-            pred_classes (torch.Tensor): Predicted class indices of shape(N,).
-            true_classes (torch.Tensor): Target class indices of shape(M,).
-            iou (torch.Tensor): An NxM tensor containing the pairwise IoU values for predictions and ground of truth
-            use_scipy (bool): Whether to use scipy for matching (more precise).
-
-        Returns:
-            (torch.Tensor): Correct tensor of shape(N,10) for 10 IoU thresholds.
+        @type pred_classes: Tensor
+        @param pred_classes: Predicted class indices of shape(N,).
+        @type true_classes: Tensor
+        @param true_classes: Target class indices of shape(M,).
+        @type iou: Tensor
+        @param iou: An NxM tensor containing the pairwise IoU values for predictions and
+            ground of truth
+        @type use_scipy: bool
+        @param use_scipy: Whether to use scipy for matching (more precise).
+        @rtype: Tensor
+        @return: Correct tensor of shape(N,10) for 10 IoU thresholds.
         """
         # Dx10 matrix, where D - detections, 10 - IoU thresholds
         correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool)
@@ -226,16 +236,16 @@ def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False):
                     correct[matches[:, 1].astype(int), i] = True
         return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device)
 
-    def _update_metrics(self, results):
+    def _update_metrics(self, results: tuple[np.ndarray, ...]):
         """Updates the evaluation metrics of the model with a new set of results.
 
-        Args:
-            results (tuple): A tuple containing the following evaluation metrics:
-                - p (list): Precision for each class. Shape: (nc,).
-                - r (list): Recall for each class. Shape: (nc,).
-                - f1 (list): F1 score for each class. Shape: (nc,).
-                - all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
-                - ap_class_index (list): Index of class for each AP score. Shape: (nc,).
+        @type results: tuple[np.ndarray, ...]
+        @param results: A tuple containing the following evaluation metrics:
+            - p (list): Precision for each class. Shape: (nc,).
+            - r (list): Recall for each class. Shape: (nc,).
+            - f1 (list): F1 score for each class. Shape: (nc,).
+            - all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
+            - ap_class_index (list): Index of class for each AP score. Shape: (nc,).
 
         Side Effects:
             Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based
@@ -260,7 +270,13 @@ def _update_metrics(self, results):
         #     _,  # self.prec_values,
         # ) = results
 
-    def _process(self, tp, conf, pred_cls, target_cls) -> tuple[np.ndarray, ...]:
+    def _process(
+        self,
+        tp: np.ndarray,
+        conf: np.ndarray,
+        pred_cls: np.ndarray,
+        target_cls: np.ndarray,
+    ) -> tuple[np.ndarray, ...]:
         """Process predicted results for object detection and update metrics."""
         results = MeanAveragePrecisionOBB.ap_per_class(
             tp,
@@ -276,17 +292,17 @@ def _process(self, tp, conf, pred_cls, target_cls) -> tuple[np.ndarray, ...]:
 
     @staticmethod
     def ap_per_class(
-        tp,
-        conf,
-        pred_cls,
-        target_cls,
+        tp: np.ndarray,
+        conf: np.ndarray,
+        pred_cls: np.ndarray,
+        target_cls: np.ndarray,
         # plot=False,
         # on_plot=None,
         # save_dir=Path(),
         # names={},
-        eps=1e-16,
+        eps: float = 1e-16,
         # prefix="",
-    ):
+    ) -> tuple[np.ndarray, ...]:
         """Computes the average precision per class for object detection evaluation.
 
         Args:
@@ -395,7 +411,9 @@ def ap_per_class(
         )
 
     @staticmethod
-    def compute_ap(recall, precision):
+    def compute_ap(
+        recall: list[float], precision: list[float]
+    ) -> tuple[float, np.ndarray, np.ndarray]:
         """Compute the average precision (AP) given the recall and precision curves.
 
         Args:
@@ -428,7 +446,7 @@ def compute_ap(recall, precision):
         return ap, mpre, mrec
 
     @staticmethod
-    def smooth(y, f=0.05):
+    def smooth(y: np.ndarray, f: float = 0.05) -> np.ndarray:
         """Box filter of fraction f."""
         nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
         p = np.ones(nf // 2)  # ones padding
@@ -436,7 +454,7 @@ def smooth(y, f=0.05):
         return np.convolve(yp, np.ones(nf) / nf, mode="valid")  # y-smoothed
 
     @staticmethod
-    def map(all_ap):
+    def map(all_ap: np.ndarray) -> float:
         """
         Returns the mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
 
diff --git a/luxonis_train/attached_modules/visualizers/obbox_visualizer.py b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
index 3945e1af..1da141c5 100644
--- a/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
@@ -23,9 +23,9 @@ def __init__(
         font_size: int | None = None,
         **kwargs,
     ):
-        """Visualizer for bounding box predictions.
+        """Visualizer for oriented bounding box predictions.
 
-        Creates a visualization of the bounding box predictions and labels.
+        Creates a visualization of the oriented bounding box predictions and labels.
 
         @type labels: dict[int, str] | list[str] | None
         @param labels: Either a dictionary mapping class indices to names, or a list of
@@ -159,16 +159,16 @@ def forward(
         predictions: list[Tensor],
         targets: Tensor,
     ) -> tuple[Tensor, Tensor]:
-        """Creates a visualization of the bounding box predictions and labels.
+        """Creates a visualization of the oriented bounding box predictions and labels.
 
         @type label_canvas: Tensor
         @param label_canvas: The canvas containing the labels.
         @type prediction_canvas: Tensor
         @param prediction_canvas: The canvas containing the predictions.
-        @type prediction: Tensor
-        @param prediction: The predicted bounding boxes. The shape should be [N, 6],
-            where N is the number of bounding boxes and the last dimension is [x1, y1,
-            x2, y2, class, conf].
+        @type predictions: Tensor
+        @param predictions: The predicted bounding boxes. The shape should be [N, 7],
+            where N is the number of bounding boxes and the last dimension is [xc, yc,
+            w, h, conf, class]. # NOTE: check it
         @type targets: Tensor
         @param targets: The target bounding boxes.
         """
diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py
index ec61341a..f4665ce4 100644
--- a/luxonis_train/attached_modules/visualizers/utils.py
+++ b/luxonis_train/attached_modules/visualizers/utils.py
@@ -166,7 +166,6 @@ def draw_obounding_box(img: Tensor, obbox: Tensor | np.ndarray, **kwargs) -> Ten
     @return: Image with bounding box labels drawn on.
     """
     _, H, W = img.shape
-    # bboxs = box_convert(label, "xywh", "xyxy")
     # The conversion below is needed for fitting a rectangle to the 4 label points, which can form
     # a polygon sometimes
     if obbox.shape[-1] > 5:
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
index d724e61a..436c8854 100644
--- a/luxonis_train/nodes/heads/efficient_obbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -26,7 +26,7 @@ def __init__(
         reg_max: int = 16,
         **kwargs,
     ):
-        """Head for object detection.
+        """Head for object detection using oriented bounding boxes.
 
         TODO: add more documentation
 
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 0a3f0b82..d64e44b8 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -187,7 +187,7 @@ def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor:
     return dist
 
 
-def xyxyxyxy2xywhr(x):
+def xyxyxyxy2xywhr(x: Tensor) -> Tensor | np.ndarray:
     """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh,
     rotation]. Rotation values are returned in radians from 0 to pi/2.
 
@@ -213,7 +213,7 @@ def xyxyxyxy2xywhr(x):
     )
 
 
-def xywhr2xyxyxyxy(x):
+def xywhr2xyxyxyxy(x: Tensor) -> Tensor | np.ndarray:
     """Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2,
     xy3, xy4]. Rotation values should be in radians from 0 to pi/2.
 
@@ -243,7 +243,7 @@ def xywhr2xyxyxyxy(x):
     return stack([pt1, pt2, pt3, pt4], -2)
 
 
-def xyxy2xywh(x):
+def xyxy2xywh(x: Tensor) -> Tensor:
     """Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width,
     height) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-
     right corner.
@@ -267,7 +267,7 @@ def xyxy2xywh(x):
     return y
 
 
-def xywh2xyxy(x):
+def xywh2xyxy(x: Tensor) -> Tensor:
     """Convert bounding box coordinates from (x, y, width, height) format to (x1, y1,
     x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-
     right corner. Note: ops per 2 channels faster than per channel.
@@ -403,7 +403,9 @@ def bbox_iou(
         return iou
 
 
-def probiou(obb1, obb2, CIoU=False, eps=1e-7):
+def probiou(
+    obb1: Tensor, obb2: Tensor, CIoU: bool = False, eps: float = 1e-7
+) -> Tensor:
     """Calculate probabilistic IoU between oriented bounding boxes.
 
     Implements the algorithm from https://arxiv.org/pdf/2106.06072v1.pdf.
@@ -456,7 +458,7 @@ def probiou(obb1, obb2, CIoU=False, eps=1e-7):
     return iou
 
 
-def batch_probiou(obb1, obb2, eps=1e-7):
+def batch_probiou(obb1: Tensor, obb2: Tensor, eps: float = 1e-7) -> Tensor:
     """
     Calculate the prob IoU between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.
 
@@ -498,14 +500,14 @@ def batch_probiou(obb1, obb2, eps=1e-7):
     return 1 - hd
 
 
-def _get_covariance_matrix(boxes):
+def _get_covariance_matrix(boxes: Tensor) -> tuple[Tensor, ...]:
     """Generating covariance matrix from obbs.
 
     Args:
         boxes (torch.Tensor): A tensor of shape (N, 5) representing rotated bounding boxes, with xywhr format.
 
     Returns:
-        (torch.Tensor): Covariance matrices corresponding to original rotated bounding boxes.
+        tuple(torch.Tensor): Covariance matrices corresponding to original rotated bounding boxes.
     """
     # Gaussian bounding boxes, ignore the center points (the first two columns) because they are not needed here.
     gbbs = torch.cat((boxes[:, 2:4].pow(2) / 12, boxes[:, 4:]), dim=-1)
@@ -798,7 +800,9 @@ def batched_nms_obb(
     return keep_indices[scores[keep_indices].sort(descending=True)[1]]
 
 
-def batched_nms_rotated(boxes, scores, threshold=0.45):
+def batched_nms_rotated(
+    boxes: Tensor, scores: Tensor, threshold: float = 0.45
+) -> Tensor:
     """NMS for oriented bounding boxes using probiou and fast-nms.
 
     Args:
diff --git a/luxonis_train/utils/loaders/__init__.py b/luxonis_train/utils/loaders/__init__.py
index eaa08ff7..4b089462 100644
--- a/luxonis_train/utils/loaders/__init__.py
+++ b/luxonis_train/utils/loaders/__init__.py
@@ -1,8 +1,10 @@
 from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput, collate_fn
 from .luxonis_loader_torch import LuxonisLoaderTorch
+from .obb_tmp_loader import OBBLoaderTorch
 
 __all__ = [
     "LuxonisLoaderTorch",
+    "OBBLoaderTorch",
     "collate_fn",
     "BaseLoaderTorch",
     "LuxonisLoaderTorchOutput",
diff --git a/luxonis_train/utils/loaders/obb_tmp_loader.py b/luxonis_train/utils/loaders/obb_tmp_loader.py
new file mode 100644
index 00000000..a8c0e97e
--- /dev/null
+++ b/luxonis_train/utils/loaders/obb_tmp_loader.py
@@ -0,0 +1,328 @@
+import json
+import logging
+import random
+import warnings
+from operator import itemgetter
+from pathlib import Path
+from typing import Dict, List, Literal, Optional, Tuple, Union
+
+import cv2
+import numpy as np
+from luxonis_ml.data import (
+    Augmentations,
+    BucketStorage,
+    BucketType,
+    LuxonisDataset,
+    LuxonisLoader,
+)
+from luxonis_ml.data.loaders.base_loader import LuxonisLoaderOutput
+from luxonis_ml.data.parsers import LuxonisParser
+from luxonis_ml.data.utils.enums import LabelType
+from luxonis_ml.enums import DatasetType
+from torch import Size, Tensor
+from typeguard import typechecked
+
+from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput
+
+logger = logging.getLogger(__name__)
+
+
+class OBBLoaderTorch(BaseLoaderTorch):
+    @typechecked
+    def __init__(
+        self,
+        dataset_name: str | None = None,
+        dataset_dir: str | None = None,
+        dataset_type: DatasetType | None = None,
+        team_id: str | None = None,
+        bucket_type: Literal["internal", "external"] = "internal",
+        bucket_storage: Literal["local", "s3", "gcs", "azure"] = "local",
+        stream: bool = False,
+        delete_existing: bool = True,
+        view: str | list[str] = "train",
+        augmentations: Augmentations | None = None,
+        **kwargs,
+    ):
+        """Torch-compatible loader for Luxonis datasets for obb.
+
+        Can either use an already existing dataset or parse a new one from a directory.
+
+        @type dataset_name: str | None
+        @param dataset_name: Name of the dataset to load. If not provided, the
+            C{dataset_dir} argument must be provided instead. If both C{dataset_dir} and
+            C{dataset_name} are provided, the dataset will be parsed from the directory
+            and saved with the provided name.
+        @type dataset_dir: str | None
+        @param dataset_dir: Path to the dataset directory. It can be either a local path
+            or a URL. The data can be in a zip file. If not provided, C{dataset_name} of
+            an existing dataset must be provided.
+        @type dataset_type: str | None
+        @param dataset_type: Type of the dataset. Only relevant when C{dataset_dir} is
+            provided. If not provided, the type will be inferred from the directory
+            structure.
+        @type team_id: str | None
+        @param team_id: Optional unique team identifier for the cloud.
+        @type bucket_type: Literal["internal", "external"]
+        @param bucket_type: Type of the bucket. Only relevant for remote datasets.
+            Defaults to 'internal'.
+        @type bucket_storage: Literal["local", "s3", "gcs", "azure"]
+        @param bucket_storage: Type of the bucket storage. Defaults to 'local'.
+        @type stream: bool
+        @param stream: Flag for data streaming. Defaults to C{False}.
+        @type delete_existing: bool
+        @param delete_existing: Only relevant when C{dataset_dir} is provided. By
+            default, the dataset is parsed again every time the loader is created
+            because the underlying data might have changed. If C{delete_existing} is set
+            to C{False} and a dataset of the same name already exists, the existing
+            dataset will be used instead of re-parsing the data.
+        @type view: str | list[str]
+        @param view: A single split or a list of splits that will be used to create a
+            view of the dataset. Each split is a string that represents a subset of the
+            dataset. The available splits depend on the dataset, but usually include
+            'train', 'val', and 'test'. Defaults to 'train'.
+        @type augmentations: Augmentations | None
+        @param augmentations: Augmentations to apply to the data. Defaults to C{None}.
+        """
+        super().__init__(view=view, augmentations=augmentations, **kwargs)
+        if dataset_dir is not None:
+            self.dataset = self._parse_dataset(
+                dataset_dir, dataset_name, dataset_type, delete_existing
+            )
+        else:
+            if dataset_name is None:
+                raise ValueError(
+                    "Either `dataset_dir` or `dataset_name` must be provided."
+                )
+            self.dataset = LuxonisDataset(
+                dataset_name=dataset_name,
+                team_id=team_id,
+                bucket_type=BucketType(bucket_type),
+                bucket_storage=BucketStorage(bucket_storage),
+            )
+
+        self.instances = []
+        splits_path = self.dataset.metadata_path / "splits.json"
+        if not splits_path.exists():
+            raise RuntimeError(
+                "Cannot find splits! Ensure you call dataset.make_splits()"
+            )
+        with open(splits_path, "r") as file:
+            splits = json.load(file)
+
+        for view in self.view:
+            self.instances.extend(splits[view])
+
+        self.base_loader = OBBLoader(
+            dataset=self.dataset,
+            view=self.view,
+            stream=stream,
+            augmentations=self.augmentations,
+        )
+
+    def __len__(self) -> int:
+        return len(self.base_loader)
+
+    @property
+    def input_shapes(self) -> dict[str, Size]:
+        img = self[0][0][self.image_source]
+        return {self.image_source: img.shape}
+
+    def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
+        img, labels = self.base_loader[idx]
+
+        img = np.transpose(img, (2, 0, 1))  # HWC to CHW
+        tensor_img = Tensor(img)
+        tensor_labels = {}
+        for task, (array, label_type) in labels.items():
+            tensor_labels[task] = (Tensor(array), label_type)
+
+        return {self.image_source: tensor_img}, tensor_labels
+
+    def get_classes(self) -> dict[str, list[str]]:
+        _, classes = self.dataset.get_classes()
+        return {task: classes[task] for task in classes}
+
+    def get_n_keypoints(self) -> dict[str, int]:
+        skeletons = self.dataset.get_skeletons()
+        return {task: len(skeletons[task][0]) for task in skeletons}
+
+    def _parse_dataset(
+        self,
+        dataset_dir: str,
+        dataset_name: str | None,
+        dataset_type: DatasetType | None,
+        delete_existing: bool,
+    ) -> LuxonisDataset:
+        if dataset_name is None:
+            dataset_name = Path(dataset_dir).stem
+            if dataset_type is not None:
+                dataset_name += f"_{dataset_type.value}"
+
+        if LuxonisDataset.exists(dataset_name):
+            if not delete_existing:
+                return LuxonisDataset(dataset_name=dataset_name)
+            else:
+                logger.warning(
+                    f"Dataset {dataset_name} already exists. "
+                    "The dataset will be generated again to ensure the latest data are used. "
+                    "If you don't want to regenerate the dataset every time, set `delete_existing=False`'"
+                )
+
+        if dataset_type is None:
+            logger.warning(
+                "Dataset type is not set. "
+                "Attempting to infer it from the directory structure. "
+                "If this fails, please set the dataset type manually. "
+                f"Supported types are: {', '.join(DatasetType.__members__)}."
+            )
+
+        logger.info(f"Parsing dataset from {dataset_dir} with name '{dataset_name}'")
+
+        return LuxonisParser(
+            dataset_dir,
+            dataset_name=dataset_name,
+            dataset_type=dataset_type,
+            save_dir="data",
+            delete_existing=True,
+        ).parse()
+
+
+class OBBLoader(LuxonisLoader):
+    def __init__(
+        self,
+        dataset: LuxonisDataset,
+        view: Union[str, List[str]] = "train",
+        stream: bool = False,
+        augmentations: Optional[Augmentations] = None,
+        *,
+        force_resync: bool = False,
+    ) -> None:
+        """A loader class used for loading data from L{LuxonisDataset} for oriented
+        bounding boxes.
+
+        @type dataset: LuxonisDataset
+        @param dataset: LuxonisDataset to use
+        @type view: Union[str, List[str]]
+        @param view: What splits to use. Can be either a single split or a list of
+            splits. Defaults to "train".
+        @type stream: bool
+        @param stream: Flag for data streaming. Defaults to C{False}.
+        @type augmentations: Optional[luxonis_ml.loader.Augmentations]
+        @param augmentations: Augmentation class that performs augmentations. Defaults
+            to C{None}.
+        @type force_resync: bool
+        @param force_resync: Flag to force resync from cloud. Defaults to C{False}.
+        """
+        super().__init__(
+            dataset=dataset,
+            view=view,
+            stream=stream,
+            augmentations=augmentations,
+            force_resync=force_resync,
+        )
+
+    def __getitem__(self, idx: int) -> LuxonisLoaderOutput:
+        """Function to load a sample consisting of an image and its annotations.
+
+        @type idx: int
+        @param idx: The (often random) integer index to retrieve a sample from the
+            dataset.
+        @rtype: LuxonisLoaderOutput
+        @return: The loader ouput consisting of the image and a dictionary defining its
+            annotations.
+        """
+
+        if self.augmentations is None:
+            return self._load_image_with_annotations(idx)
+
+        indices = [idx]
+        if self.augmentations.is_batched:
+            other_indices = [i for i in range(len(self)) if i != idx]
+            if self.augmentations.aug_batch_size > len(self):
+                warnings.warn(
+                    f"Augmentations batch_size ({self.augmentations.aug_batch_size}) is larger than dataset size ({len(self)}), samples will include repetitions."
+                )
+                random_fun = random.choices
+            else:
+                random_fun = random.sample
+            picked_indices = random_fun(
+                other_indices, k=self.augmentations.aug_batch_size - 1
+            )
+            indices.extend(picked_indices)
+
+        out_dict: Dict[str, Tuple[np.ndarray, LabelType]] = {}
+        loaded_anns = [self._load_image_with_annotations(i) for i in indices]
+        random_state = random.getstate()
+        np_random_state = np.random.get_state()
+        while loaded_anns[0][1]:
+            aug_input_data = []
+            label_to_task = {}
+            nk = 0
+            ns = 0
+            for img, annotations in loaded_anns:
+                label_dict: Dict[LabelType, np.ndarray] = {}
+                task_dict: Dict[LabelType, str] = {}
+                for task in sorted(list(annotations.keys())):
+                    array, label_type = annotations[task]
+                    if label_type not in label_dict:
+                        # ensure that bounding box annotations are added to the
+                        # `label_dict` before keypoints
+                        if label_type == LabelType.KEYPOINTS:
+                            if (
+                                LabelType.BOUNDINGBOX
+                                in map(itemgetter(1), list(annotations.values()))
+                                and LabelType.BOUNDINGBOX not in label_dict  # type: ignore
+                            ):
+                                continue
+
+                            if (
+                                LabelType.BOUNDINGBOX in label_dict  # type: ignore
+                                and LabelType.BOUNDINGBOX
+                                in map(itemgetter(1), list(annotations.values()))
+                            ):
+                                bbox_task = task_dict[LabelType.BOUNDINGBOX]
+                                *_, bbox_suffix = bbox_task.split("-", 1)
+                                *_, kp_suffix = task.split("-", 1)
+                                if bbox_suffix != kp_suffix:
+                                    continue
+
+                        label_dict[label_type] = array
+                        label_to_task[label_type] = task
+                        task_dict[label_type] = task
+                        annotations.pop(task)
+                        if label_type == LabelType.KEYPOINTS:
+                            nk = (array.shape[1] - 1) // 3
+                        if label_type == LabelType.SEGMENTATION:
+                            ns = array.shape[0]
+
+                aug_input_data.append((img, label_dict))
+
+            # NOTE: To ensure the same augmentation is applied to all samples
+            # in case of multiple tasks per LabelType
+            random.setstate(random_state)
+            np.random.set_state(np_random_state)
+
+            # NOTE: consider implementing resizing using the aspect ratio of the original input images
+            # height, width = img.shape[0], img.shape[1]
+            # # Determine the larger dimension
+            # if height > width:
+            #     aspect_ratio = round(height / width, 2)
+            #     new_height = 640
+            #     new_width = round(int(640 / aspect_ratio), -1)
+            # else:
+            #     aspect_ratio = round(width / height, 2)
+            #     new_width = 640
+            #     new_height = round(int(640 / aspect_ratio), -1)
+
+            # img_resized = cv2.resize(img, (new_height, new_width), interpolation=cv2.INTER_AREA)
+
+            # NOTE: Temporary solution, to demonstrate training functionality oh the DOTA dataset.
+            # If it's needed can be changed to the size from config file
+            img_resized = cv2.resize(img, (512, 512), interpolation=cv2.INTER_AREA)
+            img_norm = img_resized / 255  # [0, 1]
+
+            img, aug_annotations = self.augmentations(aug_input_data, nk=nk, ns=ns)
+            for label_type, array in aug_annotations.items():
+                out_dict[label_to_task[label_type]] = (array, label_type)
+
+        return img_norm, out_dict  # type: ignore
diff --git a/test_models.py b/test_models.py
index 6f88619b..31a0634e 100644
--- a/test_models.py
+++ b/test_models.py
@@ -15,6 +15,7 @@
 
 def main():
     config_file = "obb_detection_model"
+    # config_file = "detection_model"
     config_file = f"configs/{config_file}.yaml"
     # model = LuxonisModel(config_file, opts=OPTS)
     model = LuxonisModel(config_file)

From 426584582acbb491ac19329111d64e8ba1c588c7 Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Tue, 17 Sep 2024 15:35:01 +0200
Subject: [PATCH 68/75] [Fix]: remove debug files

---
 test_models.py | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 test_models.py

diff --git a/test_models.py b/test_models.py
deleted file mode 100644
index 31a0634e..00000000
--- a/test_models.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from pathlib import Path
-
-from luxonis_train.core import LuxonisModel
-
-TEST_OUTPUT = Path("./probe")
-OPTS = {
-    "trainer.epochs": 1,
-    "trainer.batch_size": 4,
-    "trainer.validation_interval": 1,
-    "trainer.callbacks": "[]",
-    "tracker.save_directory": str(TEST_OUTPUT),
-    "tuner.n_trials": 4,
-}
-
-
-def main():
-    config_file = "obb_detection_model"
-    # config_file = "detection_model"
-    config_file = f"configs/{config_file}.yaml"
-    # model = LuxonisModel(config_file, opts=OPTS)
-    model = LuxonisModel(config_file)
-    model.train()
-    model.test()
-
-
-if __name__ == "__main__":
-    main()

From 7daabdc60552ec9068085e15243496568cbc5492 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 19 Sep 2024 01:05:59 -0400
Subject: [PATCH 69/75] Code Cleanup and Improved Tests (#69)

Co-authored-by: GitHub Actions <actions@github.com>
---
 .github/CODEOWNERS                            |   1 +
 .github/labeler.yaml                          |  32 +
 .github/workflows/ci.yaml                     | 175 ++++
 .github/workflows/docs.yaml                   |  26 -
 .github/workflows/pre-commit.yaml             |  13 -
 .github/workflows/tests.yaml                  | 126 ---
 .gitignore                                    |   2 +-
 .pre-commit-config.yaml                       |   6 +-
 CONTRIBUTING.md                               | 103 ++-
 configs/README.md                             |   6 +-
 configs/classification_model.yaml             |   4 +-
 configs/coco_model.yaml                       |  10 +-
 configs/detection_model.yaml                  |   4 +-
 configs/efficient_coco_model.yaml             |  10 +-
 configs/example_export.yaml                   |   4 +-
 configs/example_multi_input.yaml              |   4 +-
 configs/example_tuning.yaml                   |   2 +-
 configs/keypoint_bbox_model.yaml              |   4 +-
 configs/resnet_model.yaml                     |   4 +-
 configs/segmentation_model.yaml               |   4 +-
 luxonis_train/__init__.py                     |   8 +-
 luxonis_train/__main__.py                     |  33 +-
 .../{utils => }/assigners/__init__.py         |   0
 .../{utils => }/assigners/atts_assigner.py    |  84 +-
 .../{utils => }/assigners/tal_assigner.py     |  59 +-
 luxonis_train/{utils => }/assigners/utils.py  |  23 +-
 .../attached_modules/base_attached_module.py  | 212 +++--
 .../losses/adaptive_detection_loss.py         | 220 +++--
 .../attached_modules/losses/base_loss.py      |  22 +-
 .../losses/bce_with_logits.py                 |  63 +-
 .../attached_modules/losses/cross_entropy.py  |  15 +-
 .../losses/efficient_keypoint_bbox_loss.py    | 293 +++---
 .../losses/implicit_keypoint_bbox_loss.py     | 136 +--
 .../attached_modules/losses/keypoint_loss.py  |  95 +-
 .../losses/sigmoid_focal_loss.py              |  10 +-
 .../losses/smooth_bce_with_logits.py          |  65 +-
 .../losses/softmax_focal_loss.py              |  28 +-
 .../attached_modules/metrics/__init__.py      |   2 +-
 .../attached_modules/metrics/base_metric.py   |  19 +-
 .../attached_modules/metrics/common.py        |  92 --
 .../metrics/mean_average_precision.py         |  42 +-
 .../mean_average_precision_keypoints.py       | 112 ++-
 .../metrics/object_keypoint_similarity.py     | 171 ++--
 .../attached_modules/metrics/torchmetrics.py  | 114 +++
 .../visualizers/base_visualizer.py            |  18 +-
 .../visualizers/bbox_visualizer.py            |  60 +-
 .../visualizers/classification_visualizer.py  |  28 +-
 .../visualizers/keypoint_visualizer.py        |  24 +-
 .../visualizers/multi_visualizer.py           |  19 +-
 .../visualizers/segmentation_visualizer.py    |  17 +-
 .../attached_modules/visualizers/utils.py     |  36 +-
 luxonis_train/callbacks/__init__.py           |   8 +
 .../callbacks/archive_on_train_end.py         |   4 +-
 .../callbacks/export_on_train_end.py          |   2 +-
 luxonis_train/callbacks/gpu_stats_monitor.py  | 109 +--
 .../callbacks/luxonis_progress_bar.py         |  35 +-
 luxonis_train/callbacks/metadata_logger.py    |  37 +-
 luxonis_train/callbacks/module_freezer.py     |   3 +-
 luxonis_train/callbacks/needs_checkpoint.py   |   7 +-
 luxonis_train/callbacks/test_on_train_end.py  |   4 +-
 luxonis_train/callbacks/upload_checkpoint.py  |   7 +-
 luxonis_train/core/core.py                    | 234 +++--
 luxonis_train/core/utils/archive_utils.py     |  33 +-
 luxonis_train/core/utils/export_utils.py      |   8 +-
 luxonis_train/core/utils/train_utils.py       |  23 +-
 luxonis_train/core/utils/tune_utils.py        |  12 +-
 luxonis_train/{utils => }/loaders/__init__.py |   0
 .../{utils => }/loaders/base_loader.py        |  97 +-
 .../loaders/luxonis_loader_torch.py           |   4 +-
 luxonis_train/models/luxonis_lightning.py     | 223 +++--
 luxonis_train/models/luxonis_output.py        |   3 +-
 .../base_predefined_model.py                  |  26 +-
 .../predefined_models/classification_model.py |   4 +-
 .../predefined_models/detection_model.py      |   6 +-
 .../keypoint_detection_model.py               |   6 +-
 .../predefined_models/segmentation_model.py   |   2 +-
 luxonis_train/nodes/README.md                 |  10 +-
 luxonis_train/nodes/activations/__init__.py   |   4 +-
 .../nodes/activations/activations.py          |  11 -
 .../nodes/backbones/contextspatial.py         |  97 +-
 luxonis_train/nodes/backbones/efficientnet.py |  48 +-
 .../nodes/backbones/efficientrep/__init__.py  |   3 +
 .../{ => efficientrep}/efficientrep.py        |  98 +-
 .../nodes/backbones/efficientrep/variants.py  |  44 +
 luxonis_train/nodes/backbones/micronet.py     | 842 ------------------
 .../nodes/backbones/micronet/__init__.py      |   3 +
 .../nodes/backbones/micronet/blocks.py        | 515 +++++++++++
 .../nodes/backbones/micronet/micronet.py      |  62 ++
 .../nodes/backbones/micronet/variants.py      | 344 +++++++
 luxonis_train/nodes/backbones/mobilenetv2.py  |  57 +-
 .../nodes/backbones/mobileone/__init__.py     |   3 +
 .../{mobileone.py => mobileone/blocks.py}     | 214 +----
 .../nodes/backbones/mobileone/mobileone.py    | 197 ++++
 .../nodes/backbones/mobileone/variants.py     |  39 +
 luxonis_train/nodes/backbones/repvgg.py       | 149 ----
 .../nodes/backbones/repvgg/__init__.py        |   3 +
 .../nodes/backbones/repvgg/repvgg.py          | 135 +++
 .../nodes/backbones/repvgg/variants.py        |  31 +
 luxonis_train/nodes/backbones/resnet.py       | 128 ++-
 luxonis_train/nodes/backbones/rexnetv1.py     | 102 ++-
 luxonis_train/nodes/base_node.py              | 330 ++++---
 luxonis_train/nodes/blocks/blocks.py          |  86 +-
 luxonis_train/nodes/heads/bisenet_head.py     |  50 +-
 .../nodes/heads/classification_head.py        |  15 +-
 .../nodes/heads/efficient_bbox_head.py        |  72 +-
 .../heads/efficient_keypoint_bbox_head.py     |  52 +-
 .../heads/implicit_keypoint_bbox_head.py      | 103 ++-
 .../nodes/heads/segmentation_head.py          |  32 +-
 luxonis_train/nodes/necks/reppan_neck.py      | 148 +--
 luxonis_train/optimizers/__init__.py          |   1 +
 .../{utils => optimizers}/optimizers.py       |   2 +-
 luxonis_train/schedulers/__init__.py          |   1 +
 .../{utils => schedulers}/schedulers.py       |   0
 luxonis_train/utils/__init__.py               |  57 +-
 .../utils/{boxutils.py => boundingbox.py}     | 161 ++--
 luxonis_train/utils/config.py                 |  72 +-
 luxonis_train/utils/dataset_metadata.py       | 154 ++++
 luxonis_train/utils/exceptions.py             |  12 +
 luxonis_train/utils/general.py                | 345 +++----
 luxonis_train/utils/graph.py                  |  92 ++
 luxonis_train/utils/keypoints.py              |  85 ++
 luxonis_train/utils/registry.py               |  26 +-
 luxonis_train/utils/tracker.py                |   9 +-
 luxonis_train/utils/types.py                  |  44 +-
 media/coverage_badge.svg                      |   6 +-
 pyproject.toml                                |  48 +-
 requirements-dev.txt                          |   2 +
 tests/__init__.py                             |   0
 tests/configs/archive_config.yaml             |  43 +
 tests/configs/parking_lot_config.yaml         |  81 +-
 tests/configs/segmentation_parse_loader.yaml  |   4 +-
 tests/conftest.py                             |  18 +
 tests/integration/__init__.py                 |   0
 tests/integration/conftest.py                 | 104 ++-
 tests/integration/multi_input_modules.py      |  22 +-
 tests/integration/parking_lot.json            |  65 +-
 tests/integration/test_detection.py           |  95 ++
 tests/integration/test_sanity.py              | 136 ---
 tests/integration/test_segmentation.py        | 134 +++
 tests/integration/test_simple.py              | 215 +++++
 tests/unittests/__init__.py                   |   2 -
 tests/unittests/test_assigners/__init__.py    |   0
 .../test_assigners/test_atts_assigner.py      |  21 +-
 .../test_assigners/test_tal_assigner.py       | 135 +++
 .../test_assigners/test_utils.py              |   2 +-
 tests/unittests/test_base_attached_module.py  | 153 ++++
 tests/unittests/test_base_node.py             | 160 ++++
 tests/unittests/test_blocks.py                |  15 +
 tests/unittests/test_callbacks/__init__.py    |   0
 .../test_callbacks/test_needs_checkpoint.py   |   6 +
 tests/unittests/test_loaders/__init__.py      |   0
 .../test_loaders/test_base_loader.py          |  94 ++
 .../test_losses/test_bce_with_logits_loss.py  |  10 +-
 .../test_metrics/test_torchmetrics.py         |  52 ++
 .../test_assigners/test_tal_assigner.py       | 165 ----
 tests/unittests/test_utils/test_boxutils.py   |  79 +-
 .../test_utils/test_dataset_metadata.py       |  53 ++
 tests/unittests/test_utils/test_general.py    |  44 +
 tests/unittests/test_utils/test_graph.py      |  79 ++
 tests/unittests/test_utils/test_keypoints.py  |  24 +
 .../test_loaders/test_base_loader.py          |  69 --
 161 files changed, 6539 insertions(+), 4242 deletions(-)
 create mode 100644 .github/CODEOWNERS
 create mode 100644 .github/labeler.yaml
 create mode 100644 .github/workflows/ci.yaml
 delete mode 100644 .github/workflows/docs.yaml
 delete mode 100644 .github/workflows/pre-commit.yaml
 delete mode 100644 .github/workflows/tests.yaml
 rename luxonis_train/{utils => }/assigners/__init__.py (100%)
 rename luxonis_train/{utils => }/assigners/atts_assigner.py (84%)
 rename luxonis_train/{utils => }/assigners/tal_assigner.py (87%)
 rename luxonis_train/{utils => }/assigners/utils.py (88%)
 delete mode 100644 luxonis_train/attached_modules/metrics/common.py
 create mode 100644 luxonis_train/attached_modules/metrics/torchmetrics.py
 rename luxonis_train/{utils => }/loaders/__init__.py (100%)
 rename luxonis_train/{utils => }/loaders/base_loader.py (65%)
 rename luxonis_train/{utils => }/loaders/luxonis_loader_torch.py (98%)
 create mode 100644 luxonis_train/nodes/backbones/efficientrep/__init__.py
 rename luxonis_train/nodes/backbones/{ => efficientrep}/efficientrep.py (53%)
 create mode 100644 luxonis_train/nodes/backbones/efficientrep/variants.py
 delete mode 100644 luxonis_train/nodes/backbones/micronet.py
 create mode 100644 luxonis_train/nodes/backbones/micronet/__init__.py
 create mode 100644 luxonis_train/nodes/backbones/micronet/blocks.py
 create mode 100644 luxonis_train/nodes/backbones/micronet/micronet.py
 create mode 100644 luxonis_train/nodes/backbones/micronet/variants.py
 create mode 100644 luxonis_train/nodes/backbones/mobileone/__init__.py
 rename luxonis_train/nodes/backbones/{mobileone.py => mobileone/blocks.py} (55%)
 create mode 100644 luxonis_train/nodes/backbones/mobileone/mobileone.py
 create mode 100644 luxonis_train/nodes/backbones/mobileone/variants.py
 delete mode 100644 luxonis_train/nodes/backbones/repvgg.py
 create mode 100644 luxonis_train/nodes/backbones/repvgg/__init__.py
 create mode 100644 luxonis_train/nodes/backbones/repvgg/repvgg.py
 create mode 100644 luxonis_train/nodes/backbones/repvgg/variants.py
 create mode 100644 luxonis_train/optimizers/__init__.py
 rename luxonis_train/{utils => optimizers}/optimizers.py (92%)
 create mode 100644 luxonis_train/schedulers/__init__.py
 rename luxonis_train/{utils => schedulers}/schedulers.py (100%)
 rename luxonis_train/utils/{boxutils.py => boundingbox.py} (87%)
 create mode 100644 luxonis_train/utils/dataset_metadata.py
 create mode 100644 luxonis_train/utils/exceptions.py
 create mode 100644 luxonis_train/utils/graph.py
 create mode 100644 luxonis_train/utils/keypoints.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/configs/archive_config.yaml
 create mode 100644 tests/conftest.py
 create mode 100644 tests/integration/__init__.py
 create mode 100644 tests/integration/test_detection.py
 delete mode 100644 tests/integration/test_sanity.py
 create mode 100644 tests/integration/test_segmentation.py
 create mode 100644 tests/integration/test_simple.py
 create mode 100644 tests/unittests/test_assigners/__init__.py
 rename tests/unittests/{test_utils => }/test_assigners/test_atts_assigner.py (88%)
 create mode 100644 tests/unittests/test_assigners/test_tal_assigner.py
 rename tests/unittests/{test_utils => }/test_assigners/test_utils.py (96%)
 create mode 100644 tests/unittests/test_base_attached_module.py
 create mode 100644 tests/unittests/test_base_node.py
 create mode 100644 tests/unittests/test_blocks.py
 create mode 100644 tests/unittests/test_callbacks/__init__.py
 create mode 100644 tests/unittests/test_callbacks/test_needs_checkpoint.py
 create mode 100644 tests/unittests/test_loaders/__init__.py
 create mode 100644 tests/unittests/test_loaders/test_base_loader.py
 create mode 100644 tests/unittests/test_metrics/test_torchmetrics.py
 delete mode 100644 tests/unittests/test_utils/test_assigners/test_tal_assigner.py
 create mode 100644 tests/unittests/test_utils/test_dataset_metadata.py
 create mode 100644 tests/unittests/test_utils/test_general.py
 create mode 100644 tests/unittests/test_utils/test_graph.py
 create mode 100644 tests/unittests/test_utils/test_keypoints.py
 delete mode 100644 tests/unittests/test_utils/test_loaders/test_base_loader.py

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 00000000..a6eef919
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @luxonis/ML-Reviewers
diff --git a/.github/labeler.yaml b/.github/labeler.yaml
new file mode 100644
index 00000000..33749bd5
--- /dev/null
+++ b/.github/labeler.yaml
@@ -0,0 +1,32 @@
+tests:
+  - changed-files:
+    - any-glob-to-any-file: 'tests/*'
+  - head-branch:
+    - 'test/*'
+    - 'tests/*'
+
+DevOps:
+  - changed-files:
+    - any-glob-to-any-file: '.github/*'
+
+CLI:
+  - changed-files:
+    - any-glob-to-any-file: '**/__main__.py'
+
+release:
+  - base-branch: 'main'
+
+enhancement:
+  - head-branch:
+    - 'feature/*'
+    - 'feat/*'
+    - 'enhancement/*'
+
+fix:
+  - head-branch:
+    - 'fix/*'
+    - 'bug/*'
+    - 'hotfix/*'
+    - 'issue/*'
+    - 'bugfix/*'
+    - 'patch/*'
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 00000000..6dbf1a87
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,175 @@
+name: CI
+
+on:
+  pull_request:
+    branches: [ dev, main ]
+    paths:
+      - 'luxonis_train/**'
+      - 'tests/**'
+      - .github/workflows/ci.yaml
+      - '!**/*.md'
+      - '!luxonis_train/__main__.py'
+
+permissions:
+  pull-requests: write
+  contents: write
+  checks: write
+
+jobs:
+  assigner:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Auto-assign
+        uses: toshimaru/auto-author-assign@v2.1.1
+
+  labeler:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref }}
+
+      - name: Labeler
+        uses: actions/labeler@v5
+        with:
+          configuration-path: .github/labeler.yaml
+
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.head_ref }}
+
+    - name: Run pre-commit
+      uses: pre-commit/action@v3.0.1
+
+  docs:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref }}
+
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y pandoc
+          pip install pydoctor
+          curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py"
+
+      - name: Build docs
+        run: python gen-docs.py luxonis_train
+
+  type-check:
+    needs:
+      - pre-commit
+      - docs
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.head_ref }}
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+        cache: pip
+
+    - name: Install dependencies
+      run: pip install -e .[dev]
+
+    - name: Type check
+      uses: jakebailey/pyright-action@v2
+      with:
+        version: '1.1.380'
+        level: warning
+        warnings: true
+        python-version: '3.10'
+        project: pyproject.toml
+
+  tests:
+    needs:
+      - type-check
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.head_ref }}
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+        cache: pip
+
+    - name: Install dependencies
+      run: pip install -e .[dev]
+
+    - name: Authenticate to Google Cloud
+      id: google-auth
+      uses: google-github-actions/auth@v2
+      with:
+        credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
+        create_credentials_file: true
+        export_environment_variables: true
+        token_format: access_token
+
+    - name: Run pytest
+      uses: pavelzw/pytest-action@v2
+      env:
+        LUXONISML_BUCKET: luxonis-test-bucket
+        PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0
+      with:
+        emoji: false
+        custom-arguments: --junit-xml pytest.xml --cov luxonis_train --cov-report xml
+
+    - name: Create Test Report
+      uses: EnricoMi/publish-unit-test-result-action@v2
+      if: matrix.os == 'ubuntu-latest'
+      with:
+        files: pytest.xml
+
+    - name: Generate coverage badge
+      uses: tj-actions/coverage-badge-py@v2
+      if: matrix.os == 'ubuntu-latest'
+      with:
+        output: media/coverage_badge.svg
+
+    - name: Generate coverage report
+      uses: orgoro/coverage@v3.2
+      if: matrix.os == 'ubuntu-latest'
+      with:
+        coverageFile: coverage.xml
+        token: ${{ secrets.GITHUB_TOKEN }}
+        thresholdAll: 0.9
+        thresholdNew: 0.8
+
+    - name: Commit coverage badge
+      if: matrix.os == 'ubuntu-latest'
+      run: |
+        git config --global user.name 'GitHub Actions'
+        git config --global user.email 'actions@github.com'
+        git diff --quiet media/coverage_badge.svg || {
+          git add media/coverage_badge.svg
+          git commit -m "[Automated] Updated coverage badge"
+        }
+
+    - name: Push changes
+      uses: ad-m/github-push-action@master
+      if: matrix.os == 'ubuntu-latest'
+      with:
+        branch: ${{ github.head_ref }}
+
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
deleted file mode 100644
index f3c69761..00000000
--- a/.github/workflows/docs.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-name: Docs
-
-on:
-  pull_request:
-    branches: [ dev, main ]
-    paths:
-      - 'luxonis_train/**'
-      - .github/workflows/docs.yaml
-
-jobs:
-  docs:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.head_ref }}
-
-      - name: Install dependencies
-        run: |
-          pip install pydoctor
-          curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py"
-
-      - name: Build docs
-        run: |
-          python gen-docs.py luxonis_train
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
deleted file mode 100644
index ce6b816b..00000000
--- a/.github/workflows/pre-commit.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: pre-commit
-
-on:
-  pull_request:
-    branches: [dev, main]
-
-jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-    - uses: actions/setup-python@v3
-    - uses: pre-commit/action@v3.0.0
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
deleted file mode 100644
index a0999d9b..00000000
--- a/.github/workflows/tests.yaml
+++ /dev/null
@@ -1,126 +0,0 @@
-name: Tests
-
-on:
-  pull_request:
-    branches: [ dev, main ]
-    paths:
-      - 'luxonis_train/**/**.py'
-      - 'tests/**/**.py'
-      - .github/workflows/tests.yaml
-
-jobs:
-  run_tests:
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, windows-latest]
-        version: ['3.10']
-
-    runs-on: ${{ matrix.os }}
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v4
-      with:
-        ref: ${{ github.head_ref }}
-
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.version }}
-        cache: pip
-
-    - name: Install dependencies [Ubuntu]
-      if: matrix.os == 'ubuntu-latest'
-      run: |
-        sudo apt update
-        sudo apt install -y pandoc
-        pip install -e .[dev]
-
-    - name: Install dependencies [Windows]
-      if: matrix.os == 'windows-latest'
-      run: pip install -e .[dev]
-
-    - name: Install dependencies [macOS]
-      if: matrix.os == 'macOS-latest'
-      run: pip install -e .[dev]
-
-    - name: Authenticate to Google Cloud
-      id: google-auth
-      uses: google-github-actions/auth@v2
-      with:
-        credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
-        create_credentials_file: true
-        export_environment_variables: true
-        token_format: access_token
-
-    - name: Run tests with coverage [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml
-
-    - name: Run tests [Windows, macOS]
-      env:
-        PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0
-      if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10'
-      run: pytest tests --junit-xml pytest.xml
-
-    - name: Generate coverage badge [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      run: coverage-badge -o media/coverage_badge.svg -f
-
-    - name: Generate coverage report [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      uses: orgoro/coverage@v3.1
-      with:
-        coverageFile: coverage.xml
-        token: ${{ secrets.GITHUB_TOKEN }}
-
-    - name: Commit coverage badge [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      run: |
-        git config --global user.name 'GitHub Actions'
-        git config --global user.email 'actions@github.com'
-        git diff --quiet media/coverage_badge.svg || {
-          git add media/coverage_badge.svg
-          git commit -m "[Automated] Updated coverage badge"
-        }
-
-    - name: Push changes [Ubuntu]
-      if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10'
-      uses: ad-m/github-push-action@master
-      with:
-        branch: ${{ github.head_ref }}
-
-    - name: Upload Test Results
-      if: always()
-      uses: actions/upload-artifact@v4
-      with:
-        name: Test Results [${{ matrix.os }}] (Python ${{ matrix.version }})
-        path: pytest.xml
-        retention-days: 10
-        if-no-files-found: error
-
-  publish-test-results:
-    name: "Publish Tests Results"
-    needs: run_tests
-    runs-on: ubuntu-latest
-    permissions:
-      checks: write
-      pull-requests: write
-    if: always()
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.head_ref }}
-
-      - name: Download Artifacts
-        uses: actions/download-artifact@v4
-        with:
-          path: artifacts
-
-      - name: Publish Test Results
-        uses: EnricoMi/publish-unit-test-result-action@v2
-        with:
-          files: "artifacts/**/*.xml"
diff --git a/.gitignore b/.gitignore
index 7f182cf4..03ba884c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -152,5 +152,5 @@ mlartifacts
 mlruns
 wandb
 tests/_data
-tests/integration/_test-output
+tests/integration/save-directory
 data
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3f95fc26..3d68c872 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,12 +1,11 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.8
+    rev: v0.6.4
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
         types_or: [python, pyi, jupyter]
       - id: ruff-format
-        args: [--line-length, '88']
         types_or: [python, pyi, jupyter]
 
   - repo: https://github.com/PyCQA/docformatter
@@ -14,7 +13,7 @@ repos:
     hooks:
       - id: docformatter
         additional_dependencies: [tomli]
-        args: [--in-place, --black, --style=epytext]
+        args: [--in-place, --style=epytext]
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0
@@ -28,4 +27,3 @@ repos:
       - id: mdformat
         additional_dependencies:
           - mdformat-gfm
-          - mdformat-toc
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d113518b..20fd3607 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -3,25 +3,45 @@
 **This guide is intended for our internal development team.**
 It outlines our workflow and standards for contributing to this project.
 
-## Table of Contents
+## Table Of Contents
 
+- [Pre-requisites](#pre-requisites)
 - [Pre-commit Hooks](#pre-commit-hooks)
 - [Documentation](#documentation)
+- [Type Checking](#type-checking)
   - [Editor Support](#editor-support)
 - [Tests](#tests)
 - [GitHub Actions](#github-actions)
 - [Making and Reviewing Changes](#making-and-reviewing-changes)
-- [Notes](#notes)
+
+## Pre-requisites
+
+Clone the repository and navigate to the root directory:
+
+```bash
+git clone git@github.com:luxonis/luxonis-train.git
+cd luxonis-train
+```
+
+Install the development dependencies by running `pip install -r requirements-dev.txt` or install the package with the `dev` extra flag:
+
+```bash
+pip install -e .[dev]
+```
+
+> \[!NOTE\]
+> This will install the package in editable mode (`-e`),
+> so you can make changes to the code and run them immediately.
 
 ## Pre-commit Hooks
 
 We use pre-commit hooks to ensure code quality and consistency:
 
-1. Install pre-commit (see [pre-commit.com](https://pre-commit.com/#install)).
+1. Install `pre-commit` (see [pre-commit.com](https://pre-commit.com/#install)).
 1. Clone the repository and run `pre-commit install` in the root directory.
-1. The pre-commit hook will now run automatically on `git commit`.
+1. The `pre-commit` hook will now run automatically on `git commit`.
    - If the hook fails, it will print an error message and abort the commit.
-   - It will also modify the files in-place to fix any issues it can.
+   - Some hooks will also modify the files in-place to fix found issues.
 
 ## Documentation
 
@@ -29,52 +49,75 @@ We use the [Epytext](https://epydoc.sourceforge.net/epytext.html) markup languag
 To verify that your documentation is formatted correctly, follow these steps:
 
 1. Download [`get-docs.py`](https://github.com/luxonis/python-api-analyzer-to-json/blob/main/gen-docs.py) script
-1. Run `python3 get-docs.py luxonis_ml` in the root directory.
+1. Run `python3 get-docs.py luxonis_train` in the root directory.
    - If the script runs successfully and produces `docs.json` file, your documentation is formatted correctly.
-   - **NOTE:** If the script fails, it might not give the specific error message. In that case, you can run
-     the script for each file individually until you find the one that is causing the error.
 
-### Editor Support
+> \[!NOTE\]
+> If the script fails, it might not give a specific error message.
+> In that case, you can run the script for each file individually
+> until you find the one that is causing the error.
+
+**Editor Support:**
 
 - **PyCharm** - built in support for generating `epytext` docstrings
-- **Visual Studie Code** - [AI Docify](https://marketplace.visualstudio.com/items?itemName=AIC.docify) extension offers support for `epytext`
+- **Visual Studio Code** - [AI Docify](https://marketplace.visualstudio.com/items?itemName=AIC.docify) extension offers support for `epytext`
 - **NeoVim** - [vim-python-docstring](https://github.com/pixelneo/vim-python-docstring) supports `epytext` style
 
+## Type Checking
+
+The codebase is type-checked using [pyright](https://github.com/microsoft/pyright) `v1.1.380`. To run type checking, use the following command in the root project directory:
+
+```bash
+pyright --warnings --level warning --pythonversion 3.10 luxonis_train
+```
+
+**Editor Support:**
+
+- **PyCharm** - [Pyright](https://plugins.jetbrains.com/plugin/24145-pyright) extension
+- **Visual Studio Code** - [Pyright](https://marketplace.visualstudio.com/items?itemName=ms-pyright.pyright) extension
+- **NeoVim** - [LSP-Config](https://github.com/neovim/nvim-lspconfig) plugin with the [pyright configuration](https://github.com/neovim/nvim-lspconfig/blob/master/doc/server_configurations.md#pyright)
+
 ## Tests
 
 We use [pytest](https://docs.pytest.org/en/stable/) for testing.
-The tests are located in the `tests` directory. You can run the tests locally with:
+The tests are located in the `tests` directory. To run the tests with coverage, use the following command:
 
 ```bash
-pytest tests --cov=luxonis_train
+pytest --cov=luxonis_train --cov-report=html
 ```
 
-This command will run all tests and print a coverage report. The coverage report
-is only informational for now, but we may enforce a minimum coverage in the future.
+This command will run all tests and generate HTML coverage report.
+
+> \[!TIP\]
+> The coverage report will be saved to `htmlcov` directory.
+> If you want to inspect the coverage in more detail, open `htmlcov/index.html` in a browser.
+
+> \[!TIP\]
+> You can choose to run only the unit-tests or only the integration tests by adding `-m unit` or `-m integration` to the `pytest` command.
 
-**If a new feature is added, a new test should be added to cover it.**
+> \[!IMPORTANT\]
+> If a new feature is added, a new test should be added to cover it.
+> The minimum overall test coverage for a PR to be merged is 90%.
+> The minimum coverage for new files is 80%.
 
 ## GitHub Actions
 
 Our GitHub Actions workflow is run when a new PR is opened.
-It first checks that the pre-commit hook passes and that the documentation builds successfully.
-The tests are run only if the pre-commit hook and documentation build pass.
-Successful tests are required for merging a PR.
 
-1. Checks and tests are run automatically when you open a pull request.
-1. For the tests to run, the [pre-commit](#pre-commit-hooks) hook must pass and
-   the [documentation](#documentation) must be built successfully.
-1. Review the GitHub Actions output if your PR fails.
-1. Fix any issues to ensure that all checks and tests pass.
+1. First, the [pre-commit](#pre-commit-hooks) hooks must pass and the [documentation](#documentation) must be built successfully.
+1. Next, the [type checking](#type-checking) is run.
+1. If all previous checks pass, the [tests](#tests) are run.
+
+> \[!TIP\]
+> Review the GitHub Actions output if your PR fails.
+
+> \[!IMPORTANT\]
+> Successful completion of all the workflow checks is required for merging a PR.
 
-## Making and Reviewing Changes
+## Making and Submitting Changes
 
 1. Make changes in a new branch.
 1. Test your changes locally.
-1. Commit (pre-commit hook will run).
-1. Push to your branch and create a pull request. Always request a review from:
-   - [Martin Kozlovský](https://github.com/kozlov721)
-   - [Matija Teršek](https://github.com/tersekmatija)
-   - [Conor Simmons](https://github.com/conorsim)
-1. Any other relevant team members can be added as reviewers as well.
+1. Commit your changes (pre-commit hooks will run).
+1. Push your branch and create a pull request.
 1. The team will review and merge your PR.
diff --git a/configs/README.md b/configs/README.md
index a85d5221..96444f66 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -147,16 +147,16 @@ Here you can change everything related to actual training of the model.
 | accumulate_grad_batches | int                                            | 1             | number of batches for gradient accumulation                                                                                                      |
 | use_weighted_sampler    | bool                                           | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
 | epochs                  | int                                            | 100           | number of training epochs                                                                                                                        |
-| num_workers             | int                                            | 2             | number of workers for data loading                                                                                                               |
+| n_workers               | int                                            | 2             | number of workers for data loading                                                                                                               |
 | train_metrics_interval  | int                                            | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
 | validation_interval     | int                                            | 1             | frequency of computing metrics on validation data                                                                                                |
-| num_log_images          | int                                            | 4             | maximum number of images to visualize and log                                                                                                    |
+| n_log_images            | int                                            | 4             | maximum number of images to visualize and log                                                                                                    |
 | skip_last_batch         | bool                                           | True          | whether to skip last batch while training                                                                                                        |
 | accelerator             | Literal\["auto", "cpu", "gpu"\]                | "auto"        | What accelerator to use for training.                                                                                                            |
 | devices                 | int \| list\[int\] \| str                      | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
 | matmul_precision        | Literal\["medium", "high", "highest"\] \| None | None          | Sets the internal precision of float32 matrix multiplications.                                                                                   |
 | strategy                | Literal\["auto", "ddp"\]                       | "auto"        | What strategy to use for training.                                                                                                               |
-| num_sanity_val_steps    | int                                            | 2             | Number of sanity validation steps performed before training.                                                                                     |
+| n_sanity_val_steps      | int                                            | 2             | Number of sanity validation steps performed before training.                                                                                     |
 | profiler                | Literal\["simple", "advanced"\] \| None        | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
 | verbose                 | bool                                           | True          | Print all intermediate results to console.                                                                                                       |
 
diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml
index be5a5006..4db7a9b1 100644
--- a/configs/classification_model.yaml
+++ b/configs/classification_model.yaml
@@ -25,9 +25,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
index 9af25feb..23516bea 100644
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -7,7 +7,7 @@ model:
     - name: EfficientRep
       params:
         channels_list: [64, 128, 256, 512, 1024]
-        num_repeats: [1, 6, 12, 18, 6]
+        n_repeats: [1, 6, 12, 18, 6]
         depth_mul: 0.33
         width_mul: 0.33
 
@@ -16,7 +16,7 @@ model:
         - EfficientRep
       params:
         channels_list: [256, 128, 128, 256, 256, 512]
-        num_repeats: [12, 12, 12, 12]
+        n_repeats: [12, 12, 12, 12]
         depth_mul: 0.33
         width_mul: 0.33
 
@@ -108,16 +108,16 @@ trainer:
   devices: auto
   strategy: auto
 
-  num_sanity_val_steps: 1
+  n_sanity_val_steps: 1
   profiler: null
   verbose: True
   batch_size: 4
   accumulate_grad_batches: 1
   epochs: &epochs 200
-  num_workers: 8
+  n_workers: 8
   train_metrics_interval: -1
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
   skip_last_batch: True
   log_sub_losses: True
   save_top_k: 3
diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml
index 45c3431e..7bc87eef 100644
--- a/configs/detection_model.yaml
+++ b/configs/detection_model.yaml
@@ -20,9 +20,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/configs/efficient_coco_model.yaml b/configs/efficient_coco_model.yaml
index 64aa48e0..f2c9db5d 100644
--- a/configs/efficient_coco_model.yaml
+++ b/configs/efficient_coco_model.yaml
@@ -5,7 +5,7 @@ model:
     - name: EfficientRep
       params:
         channels_list: [64, 128, 256, 512, 1024]
-        num_repeats: [1, 6, 12, 18, 6]
+        n_repeats: [1, 6, 12, 18, 6]
         depth_mul: 0.33
         width_mul: 0.33
 
@@ -14,7 +14,7 @@ model:
         - EfficientRep
       params:
         channels_list: [256, 128, 128, 256, 256, 512]
-        num_repeats: [12, 12, 12, 12]
+        n_repeats: [12, 12, 12, 12]
         depth_mul: 0.33
         width_mul: 0.33
 
@@ -91,14 +91,14 @@ loader:
 
 trainer:
 
-  num_sanity_val_steps: 1
+  n_sanity_val_steps: 1
   batch_size: 4
   accumulate_grad_batches: 1
   epochs: 200
-  num_workers: 4
+  n_workers: 4
   train_metrics_interval: -1
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
   save_top_k: 3
 
   preprocessing:
diff --git a/configs/example_export.yaml b/configs/example_export.yaml
index f86f1dfa..51f768dc 100644
--- a/configs/example_export.yaml
+++ b/configs/example_export.yaml
@@ -22,9 +22,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   optimizer:
     name: SGD
diff --git a/configs/example_multi_input.yaml b/configs/example_multi_input.yaml
index d185f37e..9632ed43 100644
--- a/configs/example_multi_input.yaml
+++ b/configs/example_multi_input.yaml
@@ -97,9 +97,9 @@ tracker:
 trainer:
   batch_size: 1
   epochs: 10
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 4
+  n_log_images: 4
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
index b350ea2f..d8c9027d 100644
--- a/configs/example_tuning.yaml
+++ b/configs/example_tuning.yaml
@@ -30,7 +30,7 @@ trainer:
   batch_size: 4
   epochs: &epochs 100
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   scheduler:
     name: CosineAnnealingLR
diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml
index 5b1ebb2d..51554f73 100644
--- a/configs/keypoint_bbox_model.yaml
+++ b/configs/keypoint_bbox_model.yaml
@@ -18,9 +18,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml
index e8353870..bb9f8f62 100644
--- a/configs/resnet_model.yaml
+++ b/configs/resnet_model.yaml
@@ -36,9 +36,9 @@ loader:
 trainer:
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   preprocessing:
     train_image_size: [&height 224, &width 224]
diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml
index a822d7c1..b403a75e 100644
--- a/configs/segmentation_model.yaml
+++ b/configs/segmentation_model.yaml
@@ -21,9 +21,9 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 200
-  num_workers: 4
+  n_workers: 4
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
 
   callbacks:
     - name: ExportOnTrainEnd
diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py
index 60d8d501..ebc4a719 100644
--- a/luxonis_train/__init__.py
+++ b/luxonis_train/__init__.py
@@ -1,7 +1,11 @@
+__version__ = "0.0.1"
+
+
 from .attached_modules import *
 from .core import *
+from .loaders import *
 from .models import *
 from .nodes import *
+from .optimizers import *
+from .schedulers import *
 from .utils import *
-
-__version__ = "0.0.1"
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 454e9525..c3164227 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -41,7 +41,9 @@ class _ViewType(str, Enum):
     ),
 ]
 
-ViewType = Annotated[_ViewType, typer.Option(help="Which dataset view to use.")]
+ViewType = Annotated[
+    _ViewType, typer.Option(help="Which dataset view to use.")
+]
 
 SaveDirType = Annotated[
     Optional[Path],
@@ -53,7 +55,8 @@ class _ViewType(str, Enum):
 def train(
     config: ConfigType = None,
     resume: Annotated[
-        Optional[str], typer.Option(help="Resume training from this checkpoint.")
+        Optional[str],
+        typer.Option(help="Resume training from this checkpoint."),
     ] = None,
     opts: OptsType = None,
 ):
@@ -65,7 +68,9 @@ def train(
 
 @app.command()
 def test(
-    config: ConfigType = None, view: ViewType = _ViewType.VAL, opts: OptsType = None
+    config: ConfigType = None,
+    view: ViewType = _ViewType.VAL,
+    opts: OptsType = None,
 ):
     """Evaluate model."""
     from luxonis_train.core import LuxonisModel
@@ -115,13 +120,26 @@ def inspect(
             case_sensitive=False,
         ),
     ] = "train",  # type: ignore
+    size_multiplier: Annotated[
+        float,
+        typer.Option(
+            ...,
+            "--size-multiplier",
+            "-s",
+            help=(
+                "Multiplier for the image size. "
+                "By default the images are shown in their original size."
+            ),
+            show_default=False,
+        ),
+    ] = 1.0,
     opts: OptsType = None,
 ):
     """Inspect dataset."""
     from lightning.pytorch import seed_everything
     from luxonis_ml.data.__main__ import inspect as lxml_inspect
 
-    from luxonis_train.utils.config import Config
+    from luxonis_train.utils import Config
 
     cfg = Config.get_config(config, opts)
     if cfg.trainer.seed is not None:
@@ -144,6 +162,7 @@ def inspect(
             name=cfg.loader.params["dataset_name"],
             view=[view],
             aug_config=f.name,
+            size_multiplier=size_multiplier,
         )
 
 
@@ -166,7 +185,7 @@ def archive(
 
 def version_callback(value: bool):
     if value:
-        typer.echo(f"LuxonisTrain Version: {version(__package__)}")
+        typer.echo(f"LuxonisTrain Version: {version('luxonis_train')}")
         raise typer.Exit()
 
 
@@ -175,7 +194,9 @@ def common(
     _: Annotated[
         bool,
         typer.Option(
-            "--version", callback=version_callback, help="Show version and exit."
+            "--version",
+            callback=version_callback,
+            help="Show version and exit.",
         ),
     ] = False,
     source: Annotated[
diff --git a/luxonis_train/utils/assigners/__init__.py b/luxonis_train/assigners/__init__.py
similarity index 100%
rename from luxonis_train/utils/assigners/__init__.py
rename to luxonis_train/assigners/__init__.py
diff --git a/luxonis_train/utils/assigners/atts_assigner.py b/luxonis_train/assigners/atts_assigner.py
similarity index 84%
rename from luxonis_train/utils/assigners/atts_assigner.py
rename to luxonis_train/assigners/atts_assigner.py
index 9a0466da..269496fa 100644
--- a/luxonis_train/utils/assigners/atts_assigner.py
+++ b/luxonis_train/assigners/atts_assigner.py
@@ -49,9 +49,10 @@ def forward(
         @type pred_bboxes: Tensor
         @param pred_bboxes: Predicted bboxes of shape [bs, n_anchors, 4]
         @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
-        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and
-            output positive mask of shape [bs, n_anchors].
+        @return: Assigned labels of shape [bs, n_anchors], assigned
+            bboxes of shape [bs, n_anchors, 4], assigned scores of shape
+            [bs, n_anchors, n_classes] and output positive mask of shape
+            [bs, n_anchors].
         """
 
         self.n_anchors = anchor_bboxes.size(0)
@@ -61,9 +62,13 @@ def forward(
         if self.n_max_boxes == 0:
             device = gt_bboxes.device
             return (
-                torch.full([self.bs, self.n_anchors], self.n_classes).to(device),
+                torch.full([self.bs, self.n_anchors], self.n_classes).to(
+                    device
+                ),
                 torch.zeros([self.bs, self.n_anchors, 4]).to(device),
-                torch.zeros([self.bs, self.n_anchors, self.n_classes]).to(device),
+                torch.zeros([self.bs, self.n_anchors, self.n_classes]).to(
+                    device
+                ),
                 torch.zeros([self.bs, self.n_anchors]).to(device),
                 torch.zeros([self.bs, self.n_anchors]).to(device),
             )
@@ -78,7 +83,10 @@ def forward(
         gt_centers = self._get_bbox_center(gt_bboxes_flat)
         anchor_centers = self._get_bbox_center(anchor_bboxes)
         distances = (
-            (gt_centers[:, None, :] - anchor_centers[None, :, :]).pow(2).sum(-1).sqrt()
+            (gt_centers[:, None, :] - anchor_centers[None, :, :])
+            .pow(2)
+            .sum(-1)
+            .sqrt()
         )
         distances = distances.reshape([self.bs, -1, self.n_anchors])
 
@@ -103,15 +111,18 @@ def forward(
         )
 
         # Generate final assignments based on masks
-        assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments(
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+        ) = self._get_final_assignments(
             gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
         )
 
         # Soft label with IoU
-        if pred_bboxes is not None:
-            ious = batch_iou(gt_bboxes, pred_bboxes) * mask_pos
-            ious = ious.max(dim=-2)[0].unsqueeze(-1)
-            assigned_scores *= ious
+        ious = batch_iou(gt_bboxes, pred_bboxes) * mask_pos
+        ious = ious.max(dim=-2)[0].unsqueeze(-1)
+        assigned_scores *= ious
 
         out_mask_positive = mask_pos_sum.bool()
 
@@ -141,12 +152,13 @@ def _select_topk_candidates(
         @type mask_gt: Tensor
         @param mask_gt: Mask for valid GT per image.
         @rtype: tuple[Tensor, Tensor]
-        @return: Mask of selected anchors and indices of selected anchors.
+        @return: Mask of selected anchors and indices of selected
+            anchors.
         """
         mask_gt = mask_gt.repeat(1, 1, self.topk).bool()
         level_distances = torch.split(distances, n_level_bboxes, dim=-1)
-        is_in_topk_list = []
-        topk_idxs = []
+        is_in_topk_list: list[Tensor] = []
+        topk_idxs: list[Tensor] = []
         start_idx = 0
         for per_level_distances, per_level_boxes in zip(
             level_distances, n_level_bboxes
@@ -158,18 +170,20 @@ def _select_topk_candidates(
             )
             topk_idxs.append(per_level_topk_idxs + start_idx)
             per_level_topk_idxs = torch.where(
-                mask_gt, per_level_topk_idxs, torch.zeros_like(per_level_topk_idxs)
+                mask_gt,
+                per_level_topk_idxs,
+                torch.zeros_like(per_level_topk_idxs),
+            )
+            is_in_topk = F.one_hot(per_level_topk_idxs, per_level_boxes).sum(
+                dim=-2
             )
-            is_in_topk = F.one_hot(per_level_topk_idxs, per_level_boxes).sum(dim=-2)
             is_in_topk = torch.where(
                 is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk
             )
             is_in_topk_list.append(is_in_topk.to(distances.dtype))
             start_idx = end_idx
 
-        is_in_topk_list = torch.cat(is_in_topk_list, dim=-1)
-        topk_idxs = torch.cat(topk_idxs, dim=-1)
-        return is_in_topk_list, topk_idxs
+        return torch.cat(is_in_topk_list, dim=-1), torch.cat(topk_idxs, dim=-1)
 
     def _get_positive_samples(
         self,
@@ -177,14 +191,18 @@ def _get_positive_samples(
         topk_idxs: Tensor,
         overlaps: Tensor,
     ) -> Tensor:
-        """Computes threshold and returns mask for samples over threshold.
+        """Computes threshold and returns mask for samples over
+        threshold.
 
         @type is_in_topk: Tensor
-        @param is_in_topk: Mask of selected anchors [bx, n_max_boxes, n_anchors]
+        @param is_in_topk: Mask of selected anchors [bx, n_max_boxes,
+            n_anchors]
         @type topk_idxs: Tensor
-        @param topk_idxs: Indices of selected anchors [bx, n_max_boxes, topK * n_levels]
+        @param topk_idxs: Indices of selected anchors [bx, n_max_boxes,
+            topK * n_levels]
         @type overlaps: Tensor
-        @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, n_anchors]
+        @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes,
+            n_anchors]
         @rtype: Tensor
         @return: Mask of positive samples [bx, n_max_boxes, n_anchors]
         """
@@ -199,14 +217,17 @@ def _get_positive_samples(
         assist_idxs = assist_idxs[:, None]
         flatten_idxs = topk_idxs + assist_idxs
         candidate_overlaps = _candidate_overlaps.reshape(-1)[flatten_idxs]
-        candidate_overlaps = candidate_overlaps.reshape([self.bs, self.n_max_boxes, -1])
+        candidate_overlaps = candidate_overlaps.reshape(
+            [self.bs, self.n_max_boxes, -1]
+        )
 
         overlaps_mean_per_gt = candidate_overlaps.mean(dim=-1, keepdim=True)
         overlaps_std_per_gt = candidate_overlaps.std(dim=-1, keepdim=True)
         overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
 
         is_pos = torch.where(
-            _candidate_overlaps > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]),
+            _candidate_overlaps
+            > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]),
             is_in_topk,
             torch.zeros_like(is_in_topk),
         )
@@ -230,15 +251,18 @@ def _get_final_assignments(
         @type mask_pos_sum: Tensor
         @param mask_pos_sum: Mask of matched GTs [bs, n_anchors]
         @rtype: tuple[Tensor, Tensor, Tensor]
-        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes].
+        @return: Assigned labels of shape [bs, n_anchors], assigned
+            bboxes of shape [bs, n_anchors, 4], assigned scores of shape
+            [bs, n_anchors, n_classes].
         """
         # assigned target labels
         batch_idx = torch.arange(
             self.bs, dtype=gt_labels.dtype, device=gt_labels.device
         )
         batch_idx = batch_idx[..., None]
-        assigned_gt_idx = (assigned_gt_idx + batch_idx * self.n_max_boxes).long()
+        assigned_gt_idx = (
+            assigned_gt_idx + batch_idx * self.n_max_boxes
+        ).long()
         assigned_labels = gt_labels.flatten()[assigned_gt_idx.flatten()]
         assigned_labels = assigned_labels.reshape([self.bs, self.n_anchors])
         assigned_labels = torch.where(
@@ -252,7 +276,9 @@ def _get_final_assignments(
         assigned_bboxes = assigned_bboxes.reshape([self.bs, self.n_anchors, 4])
 
         # assigned target scores
-        assigned_scores = F.one_hot(assigned_labels.long(), self.n_classes + 1).float()
+        assigned_scores = F.one_hot(
+            assigned_labels.long(), self.n_classes + 1
+        ).float()
         assigned_scores = assigned_scores[:, :, : self.n_classes]
 
         return assigned_labels, assigned_bboxes, assigned_scores
diff --git a/luxonis_train/utils/assigners/tal_assigner.py b/luxonis_train/assigners/tal_assigner.py
similarity index 87%
rename from luxonis_train/utils/assigners/tal_assigner.py
rename to luxonis_train/assigners/tal_assigner.py
index 08b5b461..ea228eba 100644
--- a/luxonis_train/utils/assigners/tal_assigner.py
+++ b/luxonis_train/assigners/tal_assigner.py
@@ -66,9 +66,10 @@ def forward(
         @type mask_gt: Tensor
         @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1]
         @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor]
-        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and
-            output mask of shape [bs, n_anchors]
+        @return: Assigned labels of shape [bs, n_anchors], assigned
+            bboxes of shape [bs, n_anchors, 4], assigned scores of shape
+            [bs, n_anchors, n_classes] and output mask of shape [bs,
+            n_anchors]
         """
         self.bs = pred_scores.size(0)
         self.n_max_boxes = gt_bboxes.size(1)
@@ -76,7 +77,9 @@ def forward(
         if self.n_max_boxes == 0:
             device = gt_bboxes.device
             return (
-                torch.full_like(pred_scores[..., 0], self.n_classes).to(device),
+                torch.full_like(pred_scores[..., 0], self.n_classes).to(
+                    device
+                ),
                 torch.zeros_like(pred_bboxes).to(device),
                 torch.zeros_like(pred_scores).to(device),
                 torch.zeros_like(pred_scores[..., 0]).to(device),
@@ -105,7 +108,11 @@ def forward(
         )
 
         # Generate final targets based on masks
-        assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments(
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+        ) = self._get_final_assignments(
             gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
         )
 
@@ -137,7 +144,8 @@ def _get_alignment_metric(
         gt_labels: Tensor,
         gt_bboxes: Tensor,
     ):
-        """Calculates anchor alignment metric and IoU between GTs and predicted bboxes.
+        """Calculates anchor alignment metric and IoU between GTs and
+        predicted bboxes.
 
         @type pred_scores: Tensor
         @param pred_scores: Predicted scores [bs, n_anchors, 1]
@@ -151,7 +159,9 @@ def _get_alignment_metric(
         pred_scores = pred_scores.permute(0, 2, 1)
         gt_labels = gt_labels.to(torch.long)
         ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long)
-        ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes)
+        ind[0] = (
+            torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes)
+        )
         ind[1] = gt_labels.squeeze(-1)
         bbox_scores = pred_scores[ind[0], ind[1]]
 
@@ -169,24 +179,30 @@ def _select_topk_candidates(
         """Selects k anchors based on provided metrics tensor.
 
         @type metrics: Tensor
-        @param metrics: Metrics tensor of shape [bs, n_max_boxes, n_anchors]
+        @param metrics: Metrics tensor of shape [bs, n_max_boxes,
+            n_anchors]
         @type largest: bool
-        @param largest: Flag if should keep largest topK. Defaults to True.
+        @param largest: Flag if should keep largest topK. Defaults to
+            True.
         @type topk_mask: Tensor
-        @param topk_mask: Mask for valid GTs of shape [bs, n_max_boxes, topk]
+        @param topk_mask: Mask for valid GTs of shape [bs, n_max_boxes,
+            topk]
         @rtype: Tensor
-        @return: Mask of selected anchors of shape [bs, n_max_boxes, n_anchors]
+        @return: Mask of selected anchors of shape [bs, n_max_boxes,
+            n_anchors]
         """
-        num_anchors = metrics.shape[-1]
+        n_anchors = metrics.shape[-1]
         topk_metrics, topk_idxs = torch.topk(
             metrics, self.topk, dim=-1, largest=largest
         )
         if topk_mask is None:
-            topk_mask = (topk_metrics.max(dim=-1, keepdim=True)[0] > self.eps).tile(
-                [1, 1, self.topk]
-            )
-        topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs))
-        is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(dim=-2)
+            topk_mask = (
+                topk_metrics.max(dim=-1, keepdim=True)[0] > self.eps
+            ).tile([1, 1, self.topk])
+        topk_idxs = torch.where(
+            topk_mask, topk_idxs, torch.zeros_like(topk_idxs)
+        )
+        is_in_topk = F.one_hot(topk_idxs, n_anchors).sum(dim=-2)
         is_in_topk = torch.where(
             is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk
         )
@@ -210,8 +226,9 @@ def _get_final_assignments(
         @type mask_pos_sum: Tensor
         @param mask_pos_sum: Mask of matched GTs [bs, n_anchors]
         @rtype: tuple[Tensor, Tensor, Tensor]
-        @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs,
-            n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes].
+        @return: Assigned labels of shape [bs, n_anchors], assigned
+            bboxes of shape [bs, n_anchors, 4], assigned scores of shape
+            [bs, n_anchors, n_classes].
         """
         # assigned target labels
         batch_ind = torch.arange(
@@ -228,7 +245,9 @@ def _get_final_assignments(
         assigned_scores = F.one_hot(assigned_labels, self.n_classes)
         mask_pos_scores = mask_pos_sum[:, :, None].repeat(1, 1, self.n_classes)
         assigned_scores = torch.where(
-            mask_pos_scores > 0, assigned_scores, torch.full_like(assigned_scores, 0)
+            mask_pos_scores > 0,
+            assigned_scores,
+            torch.full_like(assigned_scores, 0),
         )
 
         assigned_labels = torch.where(
diff --git a/luxonis_train/utils/assigners/utils.py b/luxonis_train/assigners/utils.py
similarity index 88%
rename from luxonis_train/utils/assigners/utils.py
rename to luxonis_train/assigners/utils.py
index fadf5f8e..fe9fba4b 100644
--- a/luxonis_train/utils/assigners/utils.py
+++ b/luxonis_train/assigners/utils.py
@@ -2,7 +2,7 @@
 import torch.nn.functional as F
 from torch import Tensor
 
-from luxonis_train.utils.boxutils import bbox_iou
+from luxonis_train.utils import bbox_iou
 
 
 def candidates_in_gt(
@@ -20,7 +20,9 @@ def candidates_in_gt(
     @return: Mask for anchors inside any GT bbox
     """
     n_anchors = anchor_centers.size(0)
-    anchor_centers = anchor_centers.unsqueeze(0).repeat(gt_bboxes.size(0), 1, 1)
+    anchor_centers = anchor_centers.unsqueeze(0).repeat(
+        gt_bboxes.size(0), 1, 1
+    )
     gt_bboxes_lt = gt_bboxes[:, :2].unsqueeze(1).repeat(1, n_anchors, 1)
     gt_bboxes_rb = gt_bboxes[:, 2:].unsqueeze(1).repeat(1, n_anchors, 1)
     bbox_delta_lt = anchor_centers - gt_bboxes_lt
@@ -33,12 +35,15 @@ def candidates_in_gt(
 def fix_collisions(
     mask_pos: Tensor, overlaps: Tensor, n_max_boxes: int
 ) -> tuple[Tensor, Tensor, Tensor]:
-    """If an anchor is assigned to multiple GTs, the one with highest IoU is selected.
+    """If an anchor is assigned to multiple GTs, the one with highest
+    IoU is selected.
 
     @type mask_pos: Tensor
-    @param mask_pos: Mask of assigned anchors [bs, n_max_boxes, n_anchors]
+    @param mask_pos: Mask of assigned anchors [bs, n_max_boxes,
+        n_anchors]
     @type overlaps: Tensor
-    @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, n_anchors]
+    @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes,
+        n_anchors]
     @type n_max_boxes: int
     @param n_max_boxes: Number of maximum boxes per image
     @rtype: tuple[Tensor, Tensor, Tensor]
@@ -46,7 +51,9 @@ def fix_collisions(
     """
     mask_pos_sum = mask_pos.sum(dim=-2)
     if mask_pos_sum.max() > 1:
-        mask_multi_gts = (mask_pos_sum.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1])
+        mask_multi_gts = (mask_pos_sum.unsqueeze(1) > 1).repeat(
+            [1, n_max_boxes, 1]
+        )
         max_overlaps_idx = overlaps.argmax(dim=1)
         is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes)
         is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype)
@@ -57,8 +64,8 @@ def fix_collisions(
 
 
 def batch_iou(batch1: Tensor, batch2: Tensor) -> Tensor:
-    """Calculates IoU for each pair of bboxes in the batch. Bboxes must be in xyxy
-    format.
+    """Calculates IoU for each pair of bboxes in the batch. Bboxes must
+    be in xyxy format.
 
     @type batch1: Tensor
     @param batch1: Tensor of shape C{[bs, N, 4]}
diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py
index 17a4c277..904120a2 100644
--- a/luxonis_train/attached_modules/base_attached_module.py
+++ b/luxonis_train/attached_modules/base_attached_module.py
@@ -1,13 +1,15 @@
 import logging
 from abc import ABC
+from contextlib import suppress
 from typing import Generic
 
+from luxonis_ml.data import LabelType
 from luxonis_ml.utils.registry import AutoRegisterMeta
-from torch import Tensor, nn
+from torch import Size, Tensor, nn
 from typing_extensions import TypeVarTuple, Unpack
 
 from luxonis_train.nodes import BaseNode
-from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
+from luxonis_train.utils import IncompatibleException, Labels, Packet
 
 logger = logging.getLogger(__name__)
 
@@ -15,7 +17,11 @@
 
 
 class BaseAttachedModule(
-    nn.Module, Generic[Unpack[Ts]], ABC, metaclass=AutoRegisterMeta, register=False
+    nn.Module,
+    Generic[Unpack[Ts]],
+    ABC,
+    metaclass=AutoRegisterMeta,
+    register=False,
 ):
     """Base class for all modules that are attached to a L{LuxonisNode}.
 
@@ -58,21 +64,38 @@ def __init__(self, *, node: BaseNode | None = None):
         self._node = node
         self._epoch = 0
 
-        self._required_labels: tuple[LabelType, ...] | None = None
-        if self._node and self.supported_labels and self.node.tasks:
+        self.required_labels: list[LabelType] = []
+        if self._node and self.supported_labels:
+            module_supported = [
+                label.value
+                if isinstance(label, LabelType)
+                else f"({' + '.join(label)})"
+                for label in self.supported_labels
+            ]
+            module_supported = f"[{', '.join(module_supported)}]"
+            if not self.node.tasks:
+                raise IncompatibleException(
+                    f"Module '{self.name}' requires one of the following "
+                    f"labels or combinations of labels: {module_supported}, "
+                    f"but is connected to node '{self.node.name}' which does not specify any tasks."
+                )
             node_tasks = set(self.node.tasks)
             for required_labels in self.supported_labels:
                 if isinstance(required_labels, LabelType):
-                    required_labels = (required_labels,)
+                    required_labels = [required_labels]
+                else:
+                    required_labels = list(required_labels)
                 if set(required_labels) <= node_tasks:
-                    self._required_labels = required_labels
+                    self.required_labels = required_labels
                     break
             else:
-                raise ValueError(
-                    f"Module {self.name} supports labels {self.supported_labels}, "
-                    f"but is connected to node {self.node.name} which does not support any of them. "
-                    f"{self.node.name} supports {list(self.node_tasks.keys())}."
+                node_supported = [task.value for task in self.node.tasks]
+                raise IncompatibleException(
+                    f"Module '{self.name}' requires one of the following labels or combinations of labels: {module_supported}, "
+                    f"but is connected to node '{self.node.name}' which does not support any of them. "
+                    f"{self.node.name} supports {node_supported}."
                 )
+        self._check_node_type_override()
 
     @property
     def name(self) -> str:
@@ -83,7 +106,8 @@ def node(self) -> BaseNode:
         """Reference to the node that this module is attached to.
 
         @type: L{BaseNode}
-        @raises RuntimeError: If the node was not provided during initialization.
+        @raises RuntimeError: If the node was not provided during
+            initialization.
         """
         if self._node is None:
             raise RuntimeError(
@@ -93,20 +117,63 @@ def node(self) -> BaseNode:
         return self._node
 
     @property
-    def required_labels(self) -> tuple[LabelType, ...]:
-        if self._required_labels is None:
-            raise ValueError(f"{self.name} does not require any labels.")
-        return self._required_labels
+    def n_keypoints(self) -> int:
+        """Getter for the number of keypoints.
+
+        @type: int
+        @raises ValueError: If the node does not support keypoints.
+        @raises RuntimeError: If the node doesn't define any task.
+        """
+        return self.node.n_keypoints
+
+    @property
+    def n_classes(self) -> int:
+        """Getter for the number of classes.
+
+        @type: int
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the number of classes is different for
+            different tasks. In that case, use the L{get_n_classes}
+            method.
+        """
+        return self.node.n_classes
+
+    @property
+    def original_in_shape(self) -> Size:
+        """Getter for the original input shape as [N, H, W].
+
+        @type: Size
+        """
+        return self.node.original_in_shape
+
+    @property
+    def class_names(self) -> list[str]:
+        """Getter for the class names.
+
+        @type: list[str]
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the class names are different for
+            different tasks. In that case, use the L{get_class_names}
+            method.
+        """
+        return self.node.class_names
 
     @property
     def node_tasks(self) -> dict[LabelType, str]:
+        """Getter for the tasks of the attached node.
+
+        @type: dict[LabelType, str]
+        @raises RuntimeError: If the node does not have the `tasks` attribute set.
+        """
         if self.node._tasks is None:
-            raise ValueError("Node must have the `tasks` attribute specified.")
+            raise RuntimeError(
+                "Node must have the `tasks` attribute specified."
+            )
         return self.node._tasks
 
     def get_label(
         self, labels: Labels, label_type: LabelType | None = None
-    ) -> tuple[Tensor, LabelType]:
+    ) -> Tensor:
         """Extracts a specific label from the labels dictionary.
 
         If the label type is not provided, the first label that matches the
@@ -114,11 +181,11 @@ def get_label(
 
         Example::
             >>> # supported_labels = [LabelType.SEGMENTATION]
-            >>> labels = {"segmentation": ..., "boundingbox": ...}
+            >>> labels = {"segmentation": seg_tensor, "boundingbox": bbox_tensor}
             >>> get_label(labels)
-            (..., LabelType.SEGMENTATION)  # returns the first matching label
+            seg_tensor  # returns the first matching label
             >>> get_label(labels, LabelType.BOUNDINGBOX)
-            (..., LabelType.BOUNDINGBOX)  # returns the bounding box label
+            bbox_tensor # returns the bounding box label
             >>> get_label(labels, LabelType.CLASSIFICATION)
             IncompatibleException: Label 'classification' is missing from the dataset.
 
@@ -126,13 +193,18 @@ def get_label(
         @param labels: Labels from the dataset.
         @type label_type: LabelType | None
         @param label_type: Type of the label to extract.
-        @raises IncompatibleException: If the label is not found in the labels dictionary.
-        @raises NotImplementedError: If the module requires multiple labels. For such cases,
-            the `prepare` method should be overridden.
 
-        @rtype: tuple[Tensor, LabelType]
-        @return: Extracted label and its type.
+        @rtype: Tensor
+        @return: Extracted label
+
+        @raises ValueError: If the module requires multiple labels and the C{label_type} is not provided.
+        @raises IncompatibleException: If the label is not found in the labels dictionary.
         """
+        return self._get_label(labels, label_type)[0]
+
+    def _get_label(
+        self, labels: Labels, label_type: LabelType | None = None
+    ) -> tuple[Tensor, LabelType]:
         if label_type is None:
             if len(self.required_labels) == 1:
                 label_type = self.required_labels[0]
@@ -145,16 +217,9 @@ def get_label(
                 )
             return labels[task_name]
 
-        if len(self.required_labels) > 1:
-            raise NotImplementedError(
-                f"{self.name} requires multiple labels. You must provide the "
-                "`label_type` argument to extract the desired label."
-            )
-        for label, label_type in labels.values():
-            if label_type == self.required_labels[0]:
-                return label, label_type
-        raise IncompatibleException.from_missing_task(
-            self.required_labels[0].value, list(labels.keys()), self.name
+        raise ValueError(
+            f"{self.name} requires multiple labels. You must provide the "
+            "`label_type` argument to extract the desired label."
         )
 
     def get_input_tensors(
@@ -181,33 +246,37 @@ def get_input_tensors(
         @rtype: list[Tensor]
         @return: Extracted input tensors
 
-        @raises ValueError: If the task type is not supported by the node or if the task
-            is not present in the inputs.
+        @raises IncompatibleException: If the task type is not supported by the node.
+        @raises IncompatibleException: If the task is not present in the inputs.
 
-        @raises NotImplementedError: If the module requires multiple labels.
+        @raises ValueError: If the module requires multiple labels.
             For such cases, the `prepare` method should be overridden.
         """
         if task_type is not None:
             if isinstance(task_type, LabelType):
                 if task_type not in self.node_tasks:
-                    raise ValueError(
+                    raise IncompatibleException(
                         f"Task {task_type.value} is not supported by the node "
                         f"{self.node.name}."
                     )
                 return inputs[self.node_tasks[task_type]]
             else:
                 if task_type not in inputs:
-                    raise ValueError(f"Task {task_type} is not present in the inputs.")
+                    raise IncompatibleException(
+                        f"Task {task_type} is not present in the inputs."
+                    )
                 return inputs[task_type]
 
         if len(self.required_labels) > 1:
-            raise NotImplementedError(
+            raise ValueError(
                 f"{self.name} requires multiple labels, "
                 "you must provide the `task_type` argument to extract the desired input."
             )
         return inputs[self.node_tasks[self.required_labels[0]]]
 
-    def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
+    def prepare(
+        self, inputs: Packet[Tensor], labels: Labels
+    ) -> tuple[Unpack[Ts]]:
         """Prepares node outputs for the forward pass of the module.
 
         This default implementation selects the output and label based on
@@ -223,48 +292,63 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
 
         @rtype: tuple[Unpack[Ts]]
         @return: Prepared inputs. Should allow the following usage with the
-            L{forward} method:
+            L{forward} method::
 
                 >>> loss.forward(*loss.prepare(outputs, labels))
 
-        @raises NotImplementedError: If the module requires multiple labels.
-        @raises IncompatibleException: If the inputs are not compatible with the module.
+        @raises RuntimeError: If the module requires multiple labels and
+            is connected to a multi-task node. In this case, the default
+            implementation cannot be used and the C{prepare} method should be overridden.
+
+        @raises RuntimeError: If the C{tasks} attribute is not set on the node.
+        @raises RuntimeError: If the C{supported_labels} attribute is not set on the module.
         """
         if self.node._tasks is None:
-            raise ValueError(
+            raise RuntimeError(
                 f"{self.node.name} must have the `tasks` attribute specified "
                 f"for {self.name} to make use of the default `prepare` method."
             )
         if self.supported_labels is None:
-            raise ValueError(
+            raise RuntimeError(
                 f"{self.name} must have the `supported_labels` attribute "
                 "specified in order to use the default `prepare` method."
             )
         if len(self.supported_labels) > 1:
-            if len(self.node._tasks) > 1:
-                raise NotImplementedError(
+            if len(self.node_tasks) > 1:
+                raise RuntimeError(
                     f"{self.name} supports more than one label type"
                     f"and is connected to {self.node.name} node "
                     "which is a multi-task node. The default `prepare` "
                     "implementation cannot be used in this case."
                 )
             self.supported_labels = list(
-                set(self.supported_labels) & set(self.node._tasks)
+                set(self.supported_labels) & set(self.node_tasks)
             )
         x = self.get_input_tensors(inputs)
-        label, label_type = self.get_label(labels)
+        label, label_type = self._get_label(labels)
         if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
-            if isinstance(x, list):
-                if len(x) == 1:
-                    x = x[0]
-                else:
-                    logger.warning(
-                        f"Module {self.name} expects a single tensor as input, "
-                        f"but got {len(x)} tensors. Using the last tensor. "
-                        f"If this is not the desired behavior, please override the "
-                        "`prepare` method of the attached module or the `wrap` "
-                        f"method of {self.node.name}."
-                    )
-                    x = x[-1]
+            if len(x) == 1:
+                x = x[0]
+            else:
+                logger.warning(
+                    f"Module {self.name} expects a single tensor as input, "
+                    f"but got {len(x)} tensors. Using the last tensor. "
+                    f"If this is not the desired behavior, please override the "
+                    "`prepare` method of the attached module or the `wrap` "
+                    f"method of {self.node.name}."
+                )
+                x = x[-1]
 
         return x, label  # type: ignore
+
+    def _check_node_type_override(self) -> None:
+        if "node" not in self.__annotations__:
+            return
+
+        node_type = self.__annotations__["node"]
+        with suppress(RuntimeError):
+            if not isinstance(self.node, node_type):
+                raise IncompatibleException(
+                    f"Module '{self.name}' is attached to the '{self.node.name}' node, "
+                    f"but '{self.name}' is only compatible with nodes of type '{node_type.__name__}'."
+                )
diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index 6a28bff9..d25825cb 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -1,31 +1,39 @@
-from typing import Literal, cast
+import logging
+from typing import Any, Literal, cast
 
 import torch
 import torch.nn.functional as F
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 from torchvision.ops import box_convert
 
+from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner
 from luxonis_train.nodes import EfficientBBoxHead
-from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner
-from luxonis_train.utils.boxutils import (
-    IoUType,
+from luxonis_train.utils import (
+    Labels,
+    Packet,
     anchors_for_fpn_features,
     compute_iou_loss,
     dist2bbox,
 )
-from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
+from luxonis_train.utils.boundingbox import IoUType
 
 from .base_loss import BaseLoss
 
+logger = logging.getLogger(__name__)
 
-class AdaptiveDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]):
+
+class AdaptiveDetectionLoss(
+    BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]
+):
     node: EfficientBBoxHead
     supported_labels = [LabelType.BOUNDINGBOX]
 
-    class NodePacket(Packet[Tensor]):
-        features: list[Tensor]
-        class_scores: Tensor
-        distributions: Tensor
+    anchors: Tensor
+    anchor_points: Tensor
+    n_anchors_list: list[int]
+    stride_tensor: Tensor
+    gt_bboxes_scale: Tensor
 
     def __init__(
         self,
@@ -34,7 +42,7 @@ def __init__(
         reduction: Literal["sum", "mean"] = "mean",
         class_loss_weight: float = 1.0,
         iou_loss_weight: float = 2.5,
-        **kwargs,
+        **kwargs: Any,
     ):
         """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications
         <https://arxiv.org/pdf/2209.02976.pdf>}. It combines IoU based bbox regression loss and varifocal loss
@@ -51,23 +59,15 @@ def __init__(
         @param class_loss_weight: Weight of classification loss.
         @type iou_loss_weight: float
         @param iou_loss_weight: Weight of IoU loss.
-        @type kwargs: dict
-        @param kwargs: Additional arguments to pass to L{BaseLoss}.
         """
         super().__init__(**kwargs)
 
-        if not isinstance(self.node, EfficientBBoxHead):
-            raise IncompatibleException(
-                f"Loss `{self.name}` is only "
-                "compatible with nodes of type `EfficientBBoxHead`."
-            )
         self.iou_type: IoUType = iou_type
         self.reduction = reduction
-        self.n_classes = self.node.n_classes
         self.stride = self.node.stride
         self.grid_cell_size = self.node.grid_cell_size
         self.grid_cell_offset = self.node.grid_cell_offset
-        self.original_img_size = self.node.original_in_shape[1:]
+        self.original_img_size = self.original_in_shape[1:]
 
         self.n_warmup_epochs = n_warmup_epochs
         self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes)
@@ -79,84 +79,41 @@ def __init__(
         self.class_loss_weight = class_loss_weight
         self.iou_loss_weight = iou_loss_weight
 
-        self.anchors = None
-        self.anchor_points = None
-        self.n_anchors_list = None
-        self.stride_tensor = None
-        self.gt_bboxes_scale = None
+        self._logged_assigner_change = False
 
     def prepare(
-        self, outputs: Packet[Tensor], labels: Labels
+        self, inputs: Packet[Tensor], labels: Labels
     ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
-        feats = self.get_input_tensors(outputs, "features")
-        pred_scores = self.get_input_tensors(outputs, "class_scores")[0]
-        pred_distri = self.get_input_tensors(outputs, "distributions")[0]
+        feats = self.get_input_tensors(inputs, "features")
+        pred_scores = self.get_input_tensors(inputs, "class_scores")[0]
+        pred_distri = self.get_input_tensors(inputs, "distributions")[0]
+
+        target = self.get_label(labels)
+
         batch_size = pred_scores.shape[0]
-        device = pred_scores.device
 
-        target = self.get_label(labels)[0]
-        if self.gt_bboxes_scale is None:
-            self.gt_bboxes_scale = torch.tensor(
-                [
-                    self.original_img_size[1],
-                    self.original_img_size[0],
-                    self.original_img_size[1],
-                    self.original_img_size[0],
-                ],
-                device=device,
-            )
-            (
-                self.anchors,
-                self.anchor_points,
-                self.n_anchors_list,
-                self.stride_tensor,
-            ) = anchors_for_fpn_features(
-                feats,
-                self.stride,
-                self.grid_cell_size,
-                self.grid_cell_offset,
-                multiply_with_stride=True,
-            )
-            self.anchor_points_strided = self.anchor_points / self.stride_tensor
+        self._init_parameters(feats)
 
-        target = self._preprocess_target(target, batch_size)
+        target = self._preprocess_bbox_target(target, batch_size)
         pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided)
 
         gt_labels = target[:, :, :1]
         gt_xyxy = target[:, :, 1:]
         mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float()
 
-        if self._epoch < self.n_warmup_epochs:
-            (
-                assigned_labels,
-                assigned_bboxes,
-                assigned_scores,
-                mask_positive,
-                _,
-            ) = self.atts_assigner(
-                self.anchors,
-                self.n_anchors_list,
-                gt_labels,
-                gt_xyxy,
-                mask_gt,
-                pred_bboxes.detach() * self.stride_tensor,
-            )
-        else:
-            # TODO: log change of assigner (once common Logger)
-            (
-                assigned_labels,
-                assigned_bboxes,
-                assigned_scores,
-                mask_positive,
-                _,
-            ) = self.tal_assigner(
-                pred_scores.detach(),
-                pred_bboxes.detach() * self.stride_tensor,
-                self.anchor_points,
-                gt_labels,
-                gt_xyxy,
-                mask_gt,
-            )
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            mask_positive,
+            _,
+        ) = self._run_assigner(
+            gt_labels,
+            gt_xyxy,
+            mask_gt,
+            pred_bboxes,
+            pred_scores,
+        )
 
         return (
             pred_bboxes,
@@ -176,8 +133,12 @@ def forward(
         assigned_scores: Tensor,
         mask_positive: Tensor,
     ):
-        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1]
-        loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label)
+        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[
+            ..., :-1
+        ]
+        loss_cls = self.varifocal_loss(
+            pred_scores, assigned_scores, one_hot_label
+        )
 
         if assigned_scores.sum() > 1:
             loss_cls /= assigned_scores.sum()
@@ -192,17 +153,77 @@ def forward(
             bbox_format="xyxy",
         )[0]
 
-        loss = self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou
+        loss = (
+            self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou
+        )
 
         sub_losses = {"class": loss_cls.detach(), "iou": loss_iou.detach()}
 
         return loss, sub_losses
 
-    def _preprocess_target(self, target: Tensor, batch_size: int):
-        """Preprocess target in shape [batch_size, N, 5] where N is maximum number of
-        instances in one image."""
+    def _init_parameters(self, features: list[Tensor]):
+        if not hasattr(self, "gt_bboxes_scale"):
+            self.gt_bboxes_scale = torch.tensor(
+                [
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                ],
+                device=features[0].device,
+            )
+            (
+                self.anchors,
+                self.anchor_points,
+                self.n_anchors_list,
+                self.stride_tensor,
+            ) = anchors_for_fpn_features(
+                features,
+                self.stride,
+                self.grid_cell_size,
+                self.grid_cell_offset,
+                multiply_with_stride=True,
+            )
+            self.anchor_points_strided = (
+                self.anchor_points / self.stride_tensor
+            )
+
+    def _run_assigner(
+        self,
+        gt_labels: Tensor,
+        gt_xyxy: Tensor,
+        mask_gt: Tensor,
+        pred_bboxes: Tensor,
+        pred_scores: Tensor,
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+        if self._epoch < self.n_warmup_epochs:
+            return self.atts_assigner(
+                self.anchors,
+                self.n_anchors_list,
+                gt_labels,
+                gt_xyxy,
+                mask_gt,
+                pred_bboxes.detach() * self.stride_tensor,
+            )
+        else:
+            self._log_assigner_change()
+            return self.tal_assigner(
+                pred_scores.detach(),
+                pred_bboxes.detach() * self.stride_tensor,
+                self.anchor_points,
+                gt_labels,
+                gt_xyxy,
+                mask_gt,
+            )
+
+    def _preprocess_bbox_target(
+        self, target: Tensor, batch_size: int
+    ) -> Tensor:
+        """Preprocess target in shape [batch_size, N, 5] where N is the
+        maximum number of instances in one image."""
         sample_ids, counts = cast(
-            tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True)
+            tuple[Tensor, Tensor],
+            torch.unique(target[:, 0].int(), return_counts=True),
         )
         c_max = int(counts.max()) if counts.numel() > 0 else 0
         out_target = torch.zeros(batch_size, c_max, 5, device=target.device)
@@ -214,6 +235,16 @@ def _preprocess_target(self, target: Tensor, batch_size: int):
         out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
         return out_target
 
+    def _log_assigner_change(self):
+        if self._logged_assigner_change:
+            return
+
+        logger.info(
+            f"Switching to Task Aligned Assigner after {self.n_warmup_epochs} warmup epochs.",
+            stacklevel=2,
+        )
+        self._logged_assigner_change = True
+
 
 class VarifocalLoss(nn.Module):
     def __init__(self, alpha: float = 0.75, gamma: float = 2.0):
@@ -236,7 +267,8 @@ def forward(
         self, pred_score: Tensor, target_score: Tensor, label: Tensor
     ) -> Tensor:
         weight = (
-            self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label
+            self.alpha * pred_score.pow(self.gamma) * (1 - label)
+            + target_score * label
         )
         ce_loss = F.binary_cross_entropy(
             pred_score.float(), target_score.float(), reduction="none"
diff --git a/luxonis_train/attached_modules/losses/base_loss.py b/luxonis_train/attached_modules/losses/base_loss.py
index 89ce8d8c..7a69d0d8 100644
--- a/luxonis_train/attached_modules/losses/base_loss.py
+++ b/luxonis_train/attached_modules/losses/base_loss.py
@@ -17,19 +17,23 @@ class BaseLoss(
 ):
     """A base class for all loss functions.
 
-    This class defines the basic interface for all loss functions. It utilizes automatic
-    registration of defined subclasses to a L{LOSSES} registry.
+    This class defines the basic interface for all loss functions. It
+    utilizes automatic registration of defined subclasses to a L{LOSSES}
+    registry.
     """
 
     @abstractmethod
-    def forward(self, *args: Unpack[Ts]) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
+    def forward(
+        self, *args: Unpack[Ts]
+    ) -> Tensor | tuple[Tensor, dict[str, Tensor]]:
         """Forward pass of the loss function.
 
         @type args: Unpack[Ts]
         @param args: Prepared inputs from the L{prepare} method.
         @rtype: Tensor | tuple[Tensor, dict[str, Tensor]]
-        @return: The main loss and optional a dictionary of sublosses (for logging).
-            Only the main loss is used for backpropagation.
+        @return: The main loss and optional a dictionary of sublosses
+            (for logging). Only the main loss is used for
+            backpropagation.
         """
         ...
 
@@ -45,8 +49,10 @@ def run(
         @type labels: L{Labels}
         @param labels: Labels from the dataset.
         @rtype: Tensor | tuple[Tensor, dict[str, Tensor]]
-        @return: The main loss and optional a dictionary of sublosses (for logging).
-            Only the main loss is used for backpropagation.
-        @raises IncompatibleException: If the inputs are not compatible with the module.
+        @return: The main loss and optional a dictionary of sublosses
+            (for logging). Only the main loss is used for
+            backpropagation.
+        @raises IncompatibleException: If the inputs are not compatible
+            with the module.
         """
         return self(*self.prepare(inputs, labels))
diff --git a/luxonis_train/attached_modules/losses/bce_with_logits.py b/luxonis_train/attached_modules/losses/bce_with_logits.py
index 442a89c3..b759d06b 100644
--- a/luxonis_train/attached_modules/losses/bce_with_logits.py
+++ b/luxonis_train/attached_modules/losses/bce_with_logits.py
@@ -1,4 +1,4 @@
-from typing import Literal
+from typing import Any, Literal
 
 import torch
 from luxonis_ml.data import LabelType
@@ -15,35 +15,39 @@ def __init__(
         weight: list[float] | None = None,
         reduction: Literal["none", "mean", "sum"] = "mean",
         pos_weight: Tensor | None = None,
-        **kwargs,
+        **kwargs: Any,
     ):
-        """This loss combines a L{nn.Sigmoid} layer and the L{nn.BCELoss} in one single
-        class. This version is more numerically stable than using a plain C{Sigmoid}
-        followed by a {BCELoss} as, by combining the operations into one layer, we take
-        advantage of the log-sum-exp trick for numerical stability.
+        """This loss combines a L{nn.Sigmoid} layer and the
+        L{nn.BCELoss} in one single class. This version is more
+        numerically stable than using a plain C{Sigmoid} followed by a
+        {BCELoss} as, by combining the operations into one layer, we
+        take advantage of the log-sum-exp trick for numerical stability.
 
         @type weight: list[float] | None
-        @param weight: a manual rescaling weight given to the loss of each batch
-            element. If given, has to be a list of length C{nbatch}. Defaults to
-            C{None}.
+        @param weight: a manual rescaling weight given to the loss of
+            each batch element. If given, has to be a list of length
+            C{nbatch}. Defaults to C{None}.
         @type reduction: Literal["none", "mean", "sum"]
-        @param reduction: Specifies the reduction to apply to the output: C{"none"} |
-            C{"mean"} | C{"sum"}. C{"none"}: no reduction will be applied, C{"mean"}:
-            the sum of the output will be divided by the number of elements in the
-            output, C{"sum"}: the output will be summed. Note: C{size_average} and
-            C{reduce} are in the process of being deprecated, and in the meantime,
-            specifying either of those two args will override C{reduction}. Defaults to
-            C{"mean"}.
+        @param reduction: Specifies the reduction to apply to the
+            output: C{"none"} | C{"mean"} | C{"sum"}. C{"none"}: no
+            reduction will be applied, C{"mean"}: the sum of the output
+            will be divided by the number of elements in the output,
+            C{"sum"}: the output will be summed. Note: C{size_average}
+            and C{reduce} are in the process of being deprecated, and in
+            the meantime, specifying either of those two args will
+            override C{reduction}. Defaults to C{"mean"}.
         @type pos_weight: Tensor | None
-        @param pos_weight: a weight of positive examples to be broadcasted with target.
-            Must be a tensor with equal size along the class dimension to the number of
-            classes. Pay close attention to PyTorch's broadcasting semantics in order to
-            achieve the desired operations. For a target of size [B, C, H, W] (where B
-            is batch size) pos_weight of size [B, C, H, W] will apply different
-            pos_weights to each element of the batch or [C, H, W] the same pos_weights
-            across the batch. To apply the same positive weight along all spacial
-            dimensions for a 2D multi-class target [C, H, W] use: [C, 1, 1]. Defaults to
-            C{None}.
+        @param pos_weight: a weight of positive examples to be
+            broadcasted with target. Must be a tensor with equal size
+            along the class dimension to the number of classes. Pay
+            close attention to PyTorch's broadcasting semantics in order
+            to achieve the desired operations. For a target of size [B,
+            C, H, W] (where B is batch size) pos_weight of size [B, C,
+            H, W] will apply different pos_weights to each element of
+            the batch or [C, H, W] the same pos_weights across the
+            batch. To apply the same positive weight along all spacial
+            dimensions for a 2D multi-class target [C, H, W] use: [C, 1,
+            1]. Defaults to C{None}.
         """
         super().__init__(**kwargs)
         self.criterion = nn.BCEWithLogitsLoss(
@@ -53,6 +57,15 @@ def __init__(
         )
 
     def forward(self, predictions: Tensor, target: Tensor) -> Tensor:
+        """Computes the BCE loss from logits.
+
+        @type predictions: Tensor
+        @param predictions: Network predictions of shape (N, C, ...)
+        @type target: Tensor
+        @param target: A tensor of the same shape as predictions.
+        @rtype: Tensor
+        @return: A scalar tensor.
+        """
         if predictions.shape != target.shape:
             raise RuntimeError(
                 f"Target tensor dimension ({target.shape}) and preds tensor "
diff --git a/luxonis_train/attached_modules/losses/cross_entropy.py b/luxonis_train/attached_modules/losses/cross_entropy.py
index 05a0f524..4be0cfdc 100644
--- a/luxonis_train/attached_modules/losses/cross_entropy.py
+++ b/luxonis_train/attached_modules/losses/cross_entropy.py
@@ -1,5 +1,5 @@
 from logging import getLogger
-from typing import Literal
+from typing import Any, Literal
 
 import torch
 import torch.nn as nn
@@ -9,12 +9,11 @@
 from .base_loss import BaseLoss
 
 logger = getLogger(__name__)
-was_logged = False
 
 
 class CrossEntropyLoss(BaseLoss[Tensor, Tensor]):
-    """This criterion computes the cross entropy loss between input logits and
-    target."""
+    """This criterion computes the cross entropy loss between input
+    logits and target."""
 
     supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
 
@@ -24,7 +23,7 @@ def __init__(
         ignore_index: int = -100,
         reduction: Literal["none", "mean", "sum"] = "mean",
         label_smoothing: float = 0.0,
-        **kwargs,
+        **kwargs: Any,
     ):
         super().__init__(**kwargs)
 
@@ -34,19 +33,19 @@ def __init__(
             reduction=reduction,
             label_smoothing=label_smoothing,
         )
+        self._was_logged = False
 
     def forward(self, preds: Tensor, target: Tensor) -> Tensor:
-        global was_logged
         if preds.ndim == target.ndim:
             ch_dim = 1 if preds.ndim > 1 else 0
             if preds.shape[ch_dim] == 1:
-                if not was_logged:
+                if not self._was_logged:
                     logger.warning(
                         "`CrossEntropyLoss` expects at least 2 classes. "
                         "Attempting to fix by adding a dummy channel. "
                         "If you want to be sure, use `BCEWithLogitsLoss` instead."
                     )
-                    was_logged = True
+                    self._was_logged = True
                 preds = torch.cat([torch.zeros_like(preds), preds], dim=ch_dim)
                 if target.shape[ch_dim] == 1:
                     target = torch.cat([1 - target, target], dim=ch_dim)
diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
index 2e6621de..d996dcfd 100644
--- a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py
@@ -1,52 +1,44 @@
-from typing import Literal, cast
+from typing import Any, Literal
 
 import torch
 import torch.nn.functional as F
-from torch import Tensor, nn
-from torchvision.ops import box_convert
+from luxonis_ml.data import LabelType
+from torch import Tensor
 
-from luxonis_train.attached_modules.metrics.object_keypoint_similarity import (
-    get_area_factor,
-    get_sigmas,
-)
+from luxonis_train.attached_modules.losses import AdaptiveDetectionLoss
 from luxonis_train.nodes import EfficientKeypointBBoxHead
-from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner
-from luxonis_train.utils.boxutils import (
-    IoUType,
-    anchors_for_fpn_features,
+from luxonis_train.utils import (
+    Labels,
+    Packet,
     compute_iou_loss,
     dist2bbox,
+    get_sigmas,
+    get_with_default,
 )
-from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
+from luxonis_train.utils.boundingbox import IoUType
 
-from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
 
 
-class EfficientKeypointBBoxLoss(
-    BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]
-):
+class EfficientKeypointBBoxLoss(AdaptiveDetectionLoss):
     node: EfficientKeypointBBoxHead
     supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)]
 
-    class NodePacket(Packet[Tensor]):
-        features: list[Tensor]
-        class_scores: Tensor
-        distributions: Tensor
+    gt_kpts_scale: Tensor
 
     def __init__(
         self,
         n_warmup_epochs: int = 4,
         iou_type: IoUType = "giou",
         reduction: Literal["sum", "mean"] = "mean",
-        class_bbox_loss_weight: float = 1.0,
+        class_loss_weight: float = 1.0,
         iou_loss_weight: float = 2.5,
         viz_pw: float = 1.0,
         regr_kpts_loss_weight: float = 1.5,
         vis_kpts_loss_weight: float = 1.0,
         sigmas: list[float] | None = None,
         area_factor: float | None = None,
-        **kwargs,
+        **kwargs: Any,
     ):
         """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications
         <https://arxiv.org/pdf/2209.02976.pdf>}. It combines IoU based bbox regression loss and varifocal loss
@@ -55,12 +47,12 @@ def __init__(
 
         @type n_warmup_epochs: int
         @param n_warmup_epochs: Number of epochs where ATSS assigner is used, after that we switch to TAL assigner.
-        @type iou_type: L{IoUType}
+        @type iou_type: Literal["none", "giou", "diou", "ciou", "siou"]
         @param iou_type: IoU type used for bbox regression loss.
         @type reduction: Literal["sum", "mean"]
         @param reduction: Reduction type for loss.
-        @type class_bbox_loss_weight: float
-        @param class_bbox_loss_weight: Weight of classification loss for bounding boxes.
+        @type class_loss_weight: float
+        @param class_loss_weight: Weight of classification loss for bounding boxes.
         @type regr_kpts_loss_weight: float
         @param regr_kpts_loss_weight: Weight of regression loss for keypoints.
         @type vis_kpts_loss_weight: float
@@ -71,153 +63,100 @@ def __init__(
         @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}.
         @type area_factor: float | None
         @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}.
-        @type kwargs: dict
-        @param kwargs: Additional arguments to pass to L{BaseLoss}.
         """
-        super().__init__(**kwargs)
+        super().__init__(
+            n_warmup_epochs=n_warmup_epochs,
+            iou_type=iou_type,
+            reduction=reduction,
+            class_loss_weight=class_loss_weight,
+            iou_loss_weight=iou_loss_weight,
+            **kwargs,
+        )
 
-        if not isinstance(self.node, EfficientKeypointBBoxHead):
-            raise IncompatibleException(
-                f"Loss `{self.name}` is only "
-                "compatible with nodes of type `EfficientKeypointBBoxHead`."
-            )
-        self.iou_type: IoUType = iou_type
-        self.reduction = reduction
-        self.n_classes = self.node.n_classes
-        self.stride = self.node.stride
-        self.grid_cell_size = self.node.grid_cell_size
-        self.grid_cell_offset = self.node.grid_cell_offset
-        self.original_img_size = self.node.original_in_shape[1:]
-        self.n_heads = self.node.n_heads
-        self.n_kps = self.node.n_keypoints
-
-        self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([viz_pw]))
+        self.b_cross_entropy = BCEWithLogitsLoss(
+            pos_weight=torch.tensor([viz_pw])
+        )
         self.sigmas = get_sigmas(
-            sigmas=sigmas, n_keypoints=self.n_kps, class_name=self.name
+            sigmas=sigmas,
+            n_keypoints=self.n_keypoints,
+            caller_name=self.name,
         )
-        self.area_factor = get_area_factor(area_factor, class_name=self.name)
-
-        self.n_warmup_epochs = n_warmup_epochs
-        self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes)
-        self.tal_assigner = TaskAlignedAssigner(
-            topk=13, n_classes=self.n_classes, alpha=1.0, beta=6.0
+        self.area_factor = get_with_default(
+            area_factor, "bbox area scaling", self.name, default=0.53
         )
-
-        self.varifocal_loss = VarifocalLoss()
-        self.class_bbox_loss_weight = class_bbox_loss_weight
-        self.iou_loss_weight = iou_loss_weight
         self.regr_kpts_loss_weight = regr_kpts_loss_weight
         self.vis_kpts_loss_weight = vis_kpts_loss_weight
 
     def prepare(
-        self, outputs: Packet[Tensor], labels: Labels
-    ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
-        feats = self.get_input_tensors(outputs, "features")
-        pred_scores = self.get_input_tensors(outputs, "class_scores")[0]
-        pred_distri = self.get_input_tensors(outputs, "distributions")[0]
-        pred_kpts = self.get_input_tensors(outputs, "keypoints_raw")[0]
+        self, inputs: Packet[Tensor], labels: Labels
+    ) -> tuple[
+        Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor
+    ]:
+        feats = self.get_input_tensors(inputs, "features")
+        pred_scores = self.get_input_tensors(inputs, "class_scores")[0]
+        pred_distri = self.get_input_tensors(inputs, "distributions")[0]
+        pred_kpts = self.get_input_tensors(inputs, "keypoints_raw")[0]
+
+        target_kpts = self.get_label(labels, LabelType.KEYPOINTS)
+        target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX)
 
         batch_size = pred_scores.shape[0]
-        device = pred_scores.device
-
-        target_kpts = self.get_label(labels, LabelType.KEYPOINTS)[0]
-        target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX)[0]
         n_kpts = (target_kpts.shape[1] - 2) // 3
 
-        gt_bboxes_scale = torch.tensor(
-            [
-                self.original_img_size[1],
-                self.original_img_size[0],
-                self.original_img_size[1],
-                self.original_img_size[0],
-            ],
-            device=device,
-        )
-        gt_kpts_scale = torch.tensor(
-            [
-                self.original_img_size[1],
-                self.original_img_size[0],
-            ],
-            device=device,
-        )
-        (
-            anchors,
-            anchor_points,
-            n_anchors_list,
-            stride_tensor,
-        ) = anchors_for_fpn_features(
-            feats,
-            self.stride,
-            self.grid_cell_size,
-            self.grid_cell_offset,
-            multiply_with_stride=True,
-        )
+        self._init_parameters(feats)
 
-        anchor_points_strided = anchor_points / stride_tensor
-        pred_bboxes = dist2bbox(pred_distri, anchor_points_strided)
+        pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided)
         pred_kpts = self.dist2kpts_noscale(
-            anchor_points_strided, pred_kpts.view(batch_size, -1, n_kpts, 3)
+            self.anchor_points_strided,
+            pred_kpts.view(
+                batch_size,
+                -1,
+                n_kpts,
+                3,
+            ),
         )
 
-        target_bbox = self._preprocess_bbox_target(
-            target_bbox, batch_size, gt_bboxes_scale
-        )
+        target_bbox = self._preprocess_bbox_target(target_bbox, batch_size)
 
         gt_bbox_labels = target_bbox[:, :, :1]
         gt_xyxy = target_bbox[:, :, 1:]
         mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float()
-
-        if self._epoch < self.n_warmup_epochs:
-            (
-                assigned_labels,
-                assigned_bboxes,
-                assigned_scores,
-                mask_positive,
-                assigned_gt_idx,
-            ) = self.atts_assigner(
-                anchors,
-                n_anchors_list,
-                gt_bbox_labels,
-                gt_xyxy,
-                mask_gt,
-                pred_bboxes.detach() * stride_tensor,
-            )
-        else:
-            (
-                assigned_labels,
-                assigned_bboxes,
-                assigned_scores,
-                mask_positive,
-                assigned_gt_idx,
-            ) = self.tal_assigner(
-                pred_scores.detach(),
-                pred_bboxes.detach() * stride_tensor,
-                anchor_points,
-                gt_bbox_labels,
-                gt_xyxy,
-                mask_gt,
-            )
+        (
+            assigned_labels,
+            assigned_bboxes,
+            assigned_scores,
+            mask_positive,
+            assigned_gt_idx,
+        ) = self._run_assigner(
+            gt_bbox_labels,
+            gt_xyxy,
+            mask_gt,
+            pred_bboxes,
+            pred_scores,
+        )
 
         batched_kpts = self._preprocess_kpts_target(
-            target_kpts, batch_size, gt_kpts_scale
+            target_kpts, batch_size, self.gt_kpts_scale
         )
         assigned_gt_idx_expanded = assigned_gt_idx.unsqueeze(-1).unsqueeze(-1)
         selected_keypoints = batched_kpts.gather(
-            1, assigned_gt_idx_expanded.expand(-1, -1, self.n_kps, 3)
+            1, assigned_gt_idx_expanded.expand(-1, -1, self.n_keypoints, 3)
         )
         xy_components = selected_keypoints[:, :, :, :2]
-        normalized_xy = xy_components / stride_tensor.view(1, -1, 1, 1)
+        normalized_xy = xy_components / self.stride_tensor.view(1, -1, 1, 1)
         selected_keypoints = torch.cat(
             (normalized_xy, selected_keypoints[:, :, :, 2:]), dim=-1
         )
         gt_kpt = selected_keypoints[mask_positive]
         pred_kpts = pred_kpts[mask_positive]
-        assigned_bboxes = assigned_bboxes / stride_tensor
+        assigned_bboxes = assigned_bboxes / self.stride_tensor
 
         area = (
-            assigned_bboxes[mask_positive][:, 0] - assigned_bboxes[mask_positive][:, 2]
+            assigned_bboxes[mask_positive][:, 0]
+            - assigned_bboxes[mask_positive][:, 2]
         ) * (
-            assigned_bboxes[mask_positive][:, 1] - assigned_bboxes[mask_positive][:, 3]
+            assigned_bboxes[mask_positive][:, 1]
+            - assigned_bboxes[mask_positive][:, 3]
         )
 
         return (
@@ -256,8 +195,12 @@ def forward(
         ).mean()
         visibility_loss = self.b_cross_entropy.forward(pred_kpts[..., 2], mask)
 
-        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1]
-        loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label)
+        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[
+            ..., :-1
+        ]
+        loss_cls = self.varifocal_loss(
+            pred_scores, assigned_scores, one_hot_label
+        )
 
         if assigned_scores.sum() > 1:
             loss_cls /= assigned_scores.sum()
@@ -273,7 +216,7 @@ def forward(
         )[0]
 
         loss = (
-            self.class_bbox_loss_weight * loss_cls
+            self.class_loss_weight * loss_cls
             + self.iou_loss_weight * loss_iou
             + regression_loss * self.regr_kpts_loss_weight
             + visibility_loss * self.vis_kpts_loss_weight
@@ -288,49 +231,32 @@ def forward(
 
         return loss, sub_losses
 
-    def _preprocess_bbox_target(
-        self, bbox_target: Tensor, batch_size: int, scale_tensor: Tensor
-    ) -> Tensor:
-        """Preprocess target bboxes in shape [batch_size, N, 5] where N is maximum
-        number of instances in one image."""
-        sample_ids, counts = cast(
-            tuple[Tensor, Tensor],
-            torch.unique(bbox_target[:, 0].int(), return_counts=True),
-        )
-        c_max = int(counts.max()) if counts.numel() > 0 else 0
-        out_target = torch.zeros(batch_size, c_max, 5, device=bbox_target.device)
-        out_target[:, :, 0] = -1
-        for id, count in zip(sample_ids, counts):
-            out_target[id, :count] = bbox_target[bbox_target[:, 0] == id][:, 1:]
-
-        scaled_target = out_target[:, :, 1:5] * scale_tensor
-        out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy")
-        return out_target
-
     def _preprocess_kpts_target(
         self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor
     ) -> Tensor:
-        """Preprocesses the target keypoints in shape [batch_size, N, n_keypoints, 3]
-        where N is the maximum number of keypoints in one image."""
+        """Preprocesses the target keypoints in shape [batch_size, N,
+        n_keypoints, 3] where N is the maximum number of keypoints in
+        one image."""
 
         _, counts = torch.unique(kpts_target[:, 0].int(), return_counts=True)
         max_kpts = int(counts.max()) if counts.numel() > 0 else 0
         batched_keypoints = torch.zeros(
-            (batch_size, max_kpts, self.n_kps, 3), device=kpts_target.device
+            (batch_size, max_kpts, self.n_keypoints, 3),
+            device=kpts_target.device,
         )
         for i in range(batch_size):
             keypoints_i = kpts_target[kpts_target[:, 0] == i]
             scaled_keypoints_i = keypoints_i[:, 2:].clone()
-            batched_keypoints[i, : keypoints_i.shape[0]] = scaled_keypoints_i.view(
-                -1, self.n_kps, 3
+            batched_keypoints[i, : keypoints_i.shape[0]] = (
+                scaled_keypoints_i.view(-1, self.n_keypoints, 3)
             )
             batched_keypoints[i, :, :, :2] *= scale_tensor[:2]
 
         return batched_keypoints
 
     def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor:
-        """Adjusts and scales predicted keypoints relative to anchor points without
-        considering image stride."""
+        """Adjusts and scales predicted keypoints relative to anchor
+        points without considering image stride."""
         adj_kpts = kpts.clone()
         scale = 2.0
         x_adj = anchor_points[:, [0]] - 0.5
@@ -341,32 +267,13 @@ def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor:
         adj_kpts[..., 1] += y_adj
         return adj_kpts
 
-
-class VarifocalLoss(nn.Module):
-    def __init__(self, alpha: float = 0.75, gamma: float = 2.0):
-        """Varifocal Loss is a loss function for training a dense object detector to predict
-        the IoU-aware classification score, inspired by focal loss.
-        Code is adapted from: U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models/losses.py}
-
-        @type alpha: float
-        @param alpha: alpha parameter in focal loss, default is 0.75.
-        @type gamma: float
-        @param gamma: gamma parameter in focal loss, default is 2.0.
-        """
-
-        super().__init__()
-
-        self.alpha = alpha
-        self.gamma = gamma
-
-    def forward(
-        self, pred_score: Tensor, target_score: Tensor, label: Tensor
-    ) -> Tensor:
-        weight = (
-            self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label
-        )
-        ce_loss = F.binary_cross_entropy(
-            pred_score.float(), target_score.float(), reduction="none"
+    def _init_parameters(self, features: list[Tensor]):
+        device = features[0].device
+        super()._init_parameters(features)
+        self.gt_kpts_scale = torch.tensor(
+            [
+                self.original_img_size[1],
+                self.original_img_size[0],
+            ],
+            device=device,
         )
-        loss = (ce_loss * weight).sum()
-        return loss
diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
index d174c555..8c9230ae 100644
--- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
@@ -1,17 +1,20 @@
-from typing import cast
+import logging
+from typing import Any, cast
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 from torchvision.ops import box_convert
 
 from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss
 from luxonis_train.nodes import ImplicitKeypointBBoxHead
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils import (
+    Labels,
+    Packet,
     compute_iou_loss,
     match_to_anchor,
     process_bbox_predictions,
 )
-from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
 
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
@@ -25,7 +28,10 @@
     list[Tensor],
 ]
 
+logger = logging.getLogger(__name__)
 
+
+# TODO: BROKEN!
 class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]):
     node: ImplicitKeypointBBoxHead
     supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)]
@@ -47,10 +53,10 @@ def __init__(
         anchor_threshold: float = 4.0,
         bias: float = 0.5,
         balance: list[float] | None = None,
-        **kwargs,
+        **kwargs: Any,
     ):
-        """Joint loss for keypoint and box predictions for cases where the keypoints and
-        boxes are inherently linked.
+        """Joint loss for keypoint and box predictions for cases where
+        the keypoints and boxes are inherently linked.
 
         Based on U{YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object
         Keypoint Similarity Loss<https://arxiv.org/ftp/arxiv/papers/2204/2204.06806.pdf>}.
@@ -89,34 +95,29 @@ def __init__(
 
         super().__init__(**kwargs)
 
-        if not isinstance(self.node, ImplicitKeypointBBoxHead):
-            raise IncompatibleException(
-                f"Loss `{self.name}` is only "
-                "compatible with nodes of type `ImplicitKeypointBBoxHead`."
-            )
-        self.n_classes = self.node.n_classes
-        self.n_keypoints = self.node.n_keypoints
         self.n_anchors = self.node.n_anchors
-        self.num_heads = self.node.num_heads
+        self.n_heads = self.node.n_heads
         self.box_offset = self.node.box_offset
         self.anchors = self.node.anchors
         self.balance = balance or [4.0, 1.0, 0.4]
-        if len(self.balance) < self.num_heads:
-            raise ValueError(
-                f"Balance list must have at least {self.num_heads} elements."
+        if len(self.balance) < self.n_heads:
+            logger.warning(
+                f"Balance list must have at least {self.n_heads} elements."
+                "Filling the rest with 1.0."
             )
+            self.balance += [1.0] * (self.n_heads - len(self.balance))
 
         self.min_objectness_iou = min_objectness_iou
         self.bbox_weight = bbox_loss_weight
         self.class_weight = class_loss_weight
         self.objectness_weight = objectness_loss_weight
-        self.kpt_visibility_weight = keypoint_visibility_loss_weight
-        self.keypoint_regression_loss_weight = keypoint_regression_loss_weight
         self.anchor_threshold = anchor_threshold
 
         self.bias = bias
 
-        self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([obj_pw]))
+        self.b_cross_entropy = BCEWithLogitsLoss(
+            pos_weight=torch.tensor([obj_pw])
+        )
         self.class_loss = SmoothBCEWithLogitsLoss(
             label_smoothing=label_smoothing,
             bce_pow=cls_pw,
@@ -126,6 +127,8 @@ def __init__(
             bce_power=viz_pw,
             sigmas=sigmas,
             area_factor=area_factor,
+            regression_loss_weight=keypoint_regression_loss_weight,
+            visibility_loss_weight=keypoint_visibility_loss_weight,
         )
 
         self.positive_smooth_const = 1 - 0.5 * label_smoothing
@@ -134,38 +137,44 @@ def __init__(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[Tensor], KeypointTargetType]:
-        """Prepares the labels to be in the correct format for loss calculation.
+        """Prepares the labels to be in the correct format for loss
+        calculation.
 
         @type outputs: Packet[Tensor]
         @param outputs: Output from the forward pass.
         @type labels: L{Labels}
         @param labels: Dictionary containing the labels.
-        @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor], list[Tensor],
-            list[tuple[Tensor, Tensor, Tensor, Tensor]], list[Tensor]]]
-        @return: Tuple containing the original output and the postprocessed labels. The
-            processed labels are a tuple containing the class targets, box targets,
-            keypoint targets, indices and anchors. Indicies are a tuple containing
-            vectors of indices for batch, anchor, feature y and feature x dimensions,
-            respectively. They are all of shape (n_targets,). The indices are used to
-            index the output tensors of shape (batch_size, n_anchors, feature_height,
-            feature_width, n_classes + box_offset + n_keypoints * 3) to get a tensor of
-            shape (n_targets, n_classes + box_offset + n_keypoints * 3).
+        @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor],
+            list[Tensor], list[tuple[Tensor, Tensor, Tensor, Tensor]],
+            list[Tensor]]]
+        @return: Tuple containing the original output and the
+            postprocessed labels. The processed labels are a tuple
+            containing the class targets, box targets, keypoint targets,
+            indices and anchors. Indicies are a tuple containing vectors
+            of indices for batch, anchor, feature y and feature x
+            dimensions, respectively. They are all of shape
+            (n_targets,). The indices are used to index the output
+            tensors of shape (batch_size, n_anchors, feature_height,
+            feature_width, n_classes + box_offset + n_keypoints * 3) to
+            get a tensor of shape (n_targets, n_classes + box_offset +
+            n_keypoints * 3).
         """
         predictions = self.get_input_tensors(outputs, "features")
 
-        kpts = self.get_label(labels, LabelType.KEYPOINTS)[0]
-        boxes = self.get_label(labels, LabelType.BOUNDINGBOX)[0]
+        kpt_label = self.get_label(labels, LabelType.KEYPOINTS)
+        bbox_label = self.get_label(labels, LabelType.BOUNDINGBOX)
 
-        nkpts = (kpts.shape[1] - 2) // 3
-        targets = torch.zeros((len(boxes), nkpts * 3 + self.box_offset + 1))
-        targets[:, :2] = boxes[:, :2]
+        targets = torch.zeros(
+            (kpt_label.shape[0], self.n_keypoints * 3 + self.box_offset + 1)
+        )
+        targets[:, :2] = kpt_label[:, :2]
         targets[:, 2 : self.box_offset + 1] = box_convert(
-            boxes[:, 2:], "xywh", "cxcywh"
+            bbox_label[:, 2:], "xywh", "cxcywh"
         )
 
-        targets[:, self.box_offset + 1 :: 3] = kpts[:, 2::3]  # insert kp x coordinates
-        targets[:, self.box_offset + 2 :: 3] = kpts[:, 3::3]  # insert kp y coordinates
-        targets[:, self.box_offset + 3 :: 3] = kpts[:, 4::3]  # insert kp visibility
+        # insert keypoints
+        for i in range(1, 4):
+            targets[:, self.box_offset + i :: 3] = kpt_label[:, i + 1 :: 3]
 
         n_targets = targets.shape[0]
 
@@ -176,21 +185,26 @@ def prepare(
         anchors: list[Tensor] = []
 
         anchor_indices = (
-            torch.arange(self.n_anchors, device=targets.device, dtype=torch.float32)
+            torch.arange(
+                self.n_anchors, device=targets.device, dtype=torch.float32
+            )
             .reshape(self.n_anchors, 1)
             .repeat(1, n_targets)
             .unsqueeze(-1)
         )
-        targets = torch.cat((targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2)
+        targets = torch.cat(
+            (targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2
+        )
 
         xy_deltas = (
             torch.tensor(
-                [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device
+                [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]],
+                device=targets.device,
             ).float()
             * self.bias
         )
 
-        for i in range(self.num_heads):
+        for i in range(self.n_heads):
             anchor = self.anchors[i]
             feature_height, feature_width = predictions[i].shape[2:4]
             scaled_targets, xy_shifts = match_to_anchor(
@@ -251,9 +265,15 @@ def forward(
             "kpt_regression": torch.tensor(0.0, device=device),
         }
 
-        for pred, class_target, box_target, kpt_target, index, anchor, balance in zip(
-            predictions, *targets, self.balance
-        ):
+        for (
+            pred,
+            class_target,
+            box_target,
+            kpt_target,
+            index,
+            anchor,
+            balance,
+        ) in zip(predictions, *targets, self.balance):
             obj_targets = torch.zeros_like(pred[..., 0], device=device)
             n_targets = len(class_target)
 
@@ -280,13 +300,8 @@ def forward(
                     kpt_target.to(device),
                     area.to(device),
                 )
-
-                sub_losses["kpt_regression"] += (
-                    kpt_sublosses["regression"] * self.keypoint_regression_loss_weight
-                )
-                sub_losses["kpt_visibility"] += (
-                    kpt_sublosses["visibility"] * self.kpt_visibility_weight
-                )
+                for name, kpt_subloss in kpt_sublosses.items():
+                    sub_losses[name] += kpt_subloss
 
                 obj_targets[index] = (self.min_objectness_iou) + (
                     1 - self.min_objectness_iou
@@ -295,11 +310,10 @@ def forward(
                 if self.n_classes > 1:
                     sub_losses["class"] += (
                         self.class_loss.forward(
-                            [
-                                pred_subset[
-                                    :,
-                                    self.box_offset : self.box_offset + self.n_classes,
-                                ]
+                            pred_subset[
+                                :,
+                                self.box_offset : self.box_offset
+                                + self.n_classes,
                             ],
                             class_target,
                         )
@@ -315,7 +329,9 @@ def forward(
         loss = cast(Tensor, sum(sub_losses.values())).reshape([])
         return loss, {name: loss.detach() for name, loss in sub_losses.items()}
 
-    def _create_keypoint_target(self, scaled_targets: Tensor, box_xy_deltas: Tensor):
+    def _create_keypoint_target(
+        self, scaled_targets: Tensor, box_xy_deltas: Tensor
+    ):
         keypoint_target = scaled_targets[:, self.box_offset + 1 : -1]
         for j in range(self.n_keypoints):
             idx = 3 * j
diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py
index d5ca278f..c17ac7a1 100644
--- a/luxonis_train/attached_modules/losses/keypoint_loss.py
+++ b/luxonis_train/attached_modules/losses/keypoint_loss.py
@@ -1,17 +1,20 @@
+from typing import Any
+
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.attached_modules.metrics.object_keypoint_similarity import (
-    get_area_factor,
+from luxonis_train.utils import (
     get_sigmas,
+    get_with_default,
+    process_keypoints_predictions,
 )
-from luxonis_train.utils.boxutils import process_keypoints_predictions
-from luxonis_train.utils.types import Labels, LabelType, Packet
 
 from .base_loss import BaseLoss
 from .bce_with_logits import BCEWithLogitsLoss
 
 
+# TODO: Make it work on its own
 class KeypointLoss(BaseLoss[Tensor, Tensor]):
     supported_labels = [LabelType.KEYPOINTS]
 
@@ -21,73 +24,89 @@ def __init__(
         bce_power: float = 1.0,
         sigmas: list[float] | None = None,
         area_factor: float | None = None,
-        **kwargs,
+        regression_loss_weight: float = 1.0,
+        visibility_loss_weight: float = 1.0,
+        **kwargs: Any,
     ):
-        """Keypoint based loss that is computed from OKS-based regression and visibility
-        loss.
+        """Keypoint based loss that is computed from OKS-based
+        regression and visibility loss.
 
         @type n_keypoints: int
         @param n_keypoints: Number of keypoints.
         @type bce_power: float
-        @param bce_power: Power used for BCE visibility loss. Defaults to C{1.0}.
-        @param sigmas: Sigmas used for OKS. If None then use COCO ones if possible or
-            default ones. Defaults to C{None}.
+        @param bce_power: Power used for BCE visibility loss. Defaults
+            to C{1.0}.
+        @param sigmas: Sigmas used for OKS. If None then use COCO ones
+            if possible or default ones. Defaults to C{None}.
         @type area_factor: float | None
-        @param area_factor: Factor by which we multiply bbox area. If None then use
-            default one. Defaults to C{None}.
+        @param area_factor: Factor by which we multiply bbox area. If
+            None then use default one. Defaults to C{None}.
+        @type regression_loss_weight: float
+        @param regression_loss_weight: Weight of regression loss.
+            Defaults to C{1.0}.
+        @type visibility_loss_weight: float
+        @param visibility_loss_weight: Weight of visibility loss.
+            Defaults to C{1.0}.
         """
 
         super().__init__(**kwargs)
         self.b_cross_entropy = BCEWithLogitsLoss(
             pos_weight=torch.tensor([bce_power]), **kwargs
         )
-        self.sigmas = get_sigmas(
-            sigmas=sigmas, n_keypoints=n_keypoints, class_name=self.name
+        self.sigmas = get_sigmas(sigmas, n_keypoints, caller_name=self.name)
+        self.area_factor = get_with_default(
+            area_factor, "bbox area scaling", self.name, default=0.53
         )
-        self.area_factor = get_area_factor(area_factor, class_name=self.name)
-
-    def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Tensor, Tensor]:
-        return torch.cat(inputs["keypoints"], dim=0), self.get_label(labels)[0]
+        self.regression_loss_weight = regression_loss_weight
+        self.visibility_loss_weight = visibility_loss_weight
 
     def forward(
         self, prediction: Tensor, target: Tensor, area: Tensor
     ) -> tuple[Tensor, dict[str, Tensor]]:
-        """Computes the keypoint loss and visibility loss for a given prediction and
-        target.
+        """Computes the keypoint loss and visibility loss for a given
+        prediction and target.
 
         @type prediction: Tensor
-        @param prediction: Predicted tensor of shape C{[n_detections, n_keypoints * 3]}.
+        @param prediction: Predicted tensor of shape C{[n_detections,
+            n_keypoints * 3]}.
         @type target: Tensor
-        @param target: Target tensor of shape C{[n_detections, n_keypoints * 3]}.
+        @param target: Target tensor of shape C{[n_detections,
+            n_keypoints * 3]}.
         @type area: Tensor
         @param area: Area tensor of shape C{[n_detections]}.
         @rtype: tuple[Tensor, dict[str, Tensor]]
-        @return: A tuple containing the total loss tensor of shape C{[1,]} and a
-            dictionary with the regression loss and visibility loss tensors.
+        @return: A tuple containing the total loss tensor of shape
+            C{[1,]} and a dictionary with the regression loss and
+            visibility loss tensors.
         """
-        device = prediction.device
-        sigmas = self.sigmas.to(device)
+        sigmas = self.sigmas.to(prediction.device)
 
         pred_x, pred_y, pred_v = process_keypoints_predictions(prediction)
-        gt_x = target[:, 0::3]
-        gt_y = target[:, 1::3]
-        gt_v = (target[:, 2::3] > 0).float()
+        target_x = target[:, 0::3]
+        target_y = target[:, 1::3]
+        target_visibility = (target[:, 2::3] > 0).float()
 
-        visibility_loss = self.b_cross_entropy.forward(pred_v, gt_v)
+        visibility_loss = (
+            self.b_cross_entropy.forward(pred_v, target_visibility)
+            * self.visibility_loss_weight
+        )
         scales = area * self.area_factor
 
-        d = (gt_x - pred_x) ** 2 + (gt_y - pred_y) ** 2
-        e = d / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2
+        distance = (target_x - pred_x) ** 2 + (target_y - pred_y) ** 2
+        normalized_distance = (
+            distance / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2
+        )
 
-        regression_loss_unreduced = 1 - torch.exp(-e)
-        regression_loss_reduced = (regression_loss_unreduced * gt_v).sum(dim=1) / (
-            gt_v.sum(dim=1) + 1e-9
+        regression_loss = 1 - torch.exp(-normalized_distance)
+        regression_loss = (regression_loss * target_visibility).sum(dim=1) / (
+            target_visibility.sum(dim=1) + 1e-9
         )
-        regression_loss = regression_loss_reduced.mean()
+        regression_loss = regression_loss.mean()
+        regression_loss *= self.regression_loss_weight
 
         total_loss = regression_loss + visibility_loss
 
         return total_loss, {
-            "regression": regression_loss,
-            "visibility": visibility_loss,
+            "kpt_regression": regression_loss,
+            "kpt_visibility": visibility_loss,
         }
diff --git a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
index f3affc74..884d4863 100644
--- a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
+++ b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py
@@ -1,4 +1,4 @@
-from typing import Literal
+from typing import Any, Literal
 
 from luxonis_ml.data import LabelType
 from torch import Tensor
@@ -15,7 +15,7 @@ def __init__(
         alpha: float = 0.25,
         gamma: float = 2.0,
         reduction: Literal["none", "mean", "sum"] = "mean",
-        **kwargs,
+        **kwargs: Any,
     ):
         """Focal loss from U{Focal Loss for Dense Object Detection
         <https://arxiv.org/abs/1708.02002>}.
@@ -37,7 +37,11 @@ def __init__(
 
     def forward(self, preds: Tensor, target: Tensor) -> Tensor:
         loss = sigmoid_focal_loss(
-            preds, target, alpha=self.alpha, gamma=self.gamma, reduction=self.reduction
+            preds,
+            target,
+            alpha=self.alpha,
+            gamma=self.gamma,
+            reduction=self.reduction,
         )
 
         return loss
diff --git a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
index ac976428..edc2bf98 100644
--- a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
+++ b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py
@@ -1,4 +1,4 @@
-from typing import Literal
+from typing import Any, Literal
 
 import torch
 from luxonis_ml.data import LabelType
@@ -17,31 +17,32 @@ def __init__(
         bce_pow: float = 1.0,
         weight: list[float] | None = None,
         reduction: Literal["mean", "sum", "none"] = "mean",
-        **kwargs,
+        **kwargs: Any,
     ):
         """BCE with logits loss and label smoothing.
 
         @type label_smoothing: float
-        @param label_smoothing: Label smoothing factor. Defaults to C{0.0}.
+        @param label_smoothing: Label smoothing factor. Defaults to
+            C{0.0}.
         @type bce_pow: float
         @param bce_pow: Weight for positive samples. Defaults to C{1.0}.
         @type weight: list[float] | None
-        @param weight: a manual rescaling weight given to the loss of each batch
-            element. If given, it has to be a list of length C{nbatch}.
+        @param weight: a manual rescaling weight given to the loss of
+            each batch element. If given, it has to be a list of length
+            C{nbatch}.
         @type reduction: Literal["mean", "sum", "none"]
-        @param reduction: Specifies the reduction to apply to the output: C{'none'} |
-            C{'mean'} | C{'sum'}. C{'none'}: no reduction will be applied, C{'mean'}:
-            the sum of the output will be divided by the number of elements in the
-            output, C{'sum'}: the output will be summed. Note: C{size_average} and
-            C{reduce} are in the process of being deprecated, and in the meantime,
-            specifying either of those two args will override C{reduction}. Defaults to
-            C{'mean'}.
-        @type kwargs: dict
-        @param kwargs: Additional arguments to pass to L{BaseLoss}.
+        @param reduction: Specifies the reduction to apply to the
+            output: C{'none'} | C{'mean'} | C{'sum'}. C{'none'}: no
+            reduction will be applied, C{'mean'}: the sum of the output
+            will be divided by the number of elements in the output,
+            C{'sum'}: the output will be summed. Note: C{size_average}
+            and C{reduce} are in the process of being deprecated, and in
+            the meantime, specifying either of those two args will
+            override C{reduction}. Defaults to C{'mean'}.
         """
         super().__init__(**kwargs)
-        self.negative_smooth_const = 1.0 - 0.5 * label_smoothing
-        self.positive_smooth_const = 0.5 * label_smoothing
+        self.positive_smooth_const = 1.0 - label_smoothing
+        self.negative_smooth_const = label_smoothing
         self.criterion = BCEWithLogitsLoss(
             pos_weight=torch.tensor(
                 [bce_pow],
@@ -50,24 +51,26 @@ def __init__(
             reduction=reduction,
         )
 
-    def forward(self, predictions: list[Tensor], target: Tensor) -> Tensor:
+    def forward(self, predictions: Tensor, target: Tensor) -> Tensor:
         """Computes the BCE loss with label smoothing.
 
-        @type predictions: list[Tensor]
-        @param predictions: List of tensors of shape (N, n_classes), containing the
-            predicted class scores.
+        @type predictions: Tensor
+        @param predictions: Network predictions of shape (N, C, ...)
         @type target: Tensor
-        @param target: A tensor of shape (N,), containing the ground-truth class labels
+        @param target: A tensor of the same shape as predictions.
         @rtype: Tensor
         @return: A scalar tensor.
         """
-        prediction = predictions[0]
-        smoothed_target = torch.full_like(
-            prediction,
-            self.negative_smooth_const,
-            device=prediction.device,
-        )
-        smoothed_target[
-            torch.arange(target.shape[0]), target
-        ] = self.positive_smooth_const
-        return self.criterion.forward(prediction, smoothed_target)
+        if predictions.shape != target.shape:
+            raise RuntimeError(
+                f"Target tensor dimension ({target.shape}) and predictions tensor "
+                f"dimension ({predictions.shape}) should be the same."
+            )
+
+        if self.negative_smooth_const != 0.0:
+            target = (
+                target * self.positive_smooth_const
+                + (1 - target) * self.negative_smooth_const
+            )
+
+        return self.criterion(predictions, target)
diff --git a/luxonis_train/attached_modules/losses/softmax_focal_loss.py b/luxonis_train/attached_modules/losses/softmax_focal_loss.py
index 14f32e54..43c844f3 100644
--- a/luxonis_train/attached_modules/losses/softmax_focal_loss.py
+++ b/luxonis_train/attached_modules/losses/softmax_focal_loss.py
@@ -1,6 +1,5 @@
-# TODO: document
-
-from typing import Literal
+import logging
+from typing import Any, Literal
 
 import torch
 from luxonis_ml.data import LabelType
@@ -10,21 +9,26 @@
 
 from .cross_entropy import CrossEntropyLoss
 
+logger = logging.getLogger(__name__)
+
 
+# TODO: Add support for multi-class tasks
 class SoftmaxFocalLoss(BaseLoss[Tensor, Tensor]):
     supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION]
 
     def __init__(
         self,
-        alpha: float | list[float] = 0.25,
+        alpha: float = 0.25,
         gamma: float = 2.0,
         reduction: Literal["none", "mean", "sum"] = "mean",
-        **kwargs,
+        **kwargs: Any,
     ):
-        """Focal loss implementation for multi-class/multi-label tasks using Softmax.
+        """Focal loss implementation for binary classification and
+        segmentation tasks using Softmax.
 
-        @type alpha: float | list[float]
-        @param alpha: Weighting factor for the rare class. Defaults to C{0.25}.
+        @type alpha: float
+        @param alpha: Weighting factor for the rare class. Defaults to
+            C{0.25}.
         @type gamma: float
         @param gamma: Focusing parameter. Defaults to C{2.0}.
         @type reduction: Literal["none", "mean", "sum"]
@@ -40,13 +44,7 @@ def __init__(
     def forward(self, predictions: Tensor, target: Tensor) -> Tensor:
         ce_loss = self.ce_criterion.forward(predictions, target)
         pt = torch.exp(-ce_loss)
-        loss = ce_loss * ((1 - pt) ** self.gamma)
-
-        if isinstance(self.alpha, float) and self.alpha >= 0:
-            loss = self.alpha * loss
-        elif isinstance(self.alpha, list):
-            alpha_t = torch.tensor(self.alpha)[target]
-            loss = alpha_t * loss
+        loss = ce_loss * ((1 - pt) ** self.gamma) * self.alpha
 
         if self.reduction == "mean":
             loss = loss.mean()
diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py
index 9e73e4ac..b1dc40ea 100644
--- a/luxonis_train/attached_modules/metrics/__init__.py
+++ b/luxonis_train/attached_modules/metrics/__init__.py
@@ -1,8 +1,8 @@
 from .base_metric import BaseMetric
-from .common import Accuracy, F1Score, JaccardIndex, Precision, Recall
 from .mean_average_precision import MeanAveragePrecision
 from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints
 from .object_keypoint_similarity import ObjectKeypointSimilarity
+from .torchmetrics import Accuracy, F1Score, JaccardIndex, Precision, Recall
 
 __all__ = [
     "Accuracy",
diff --git a/luxonis_train/attached_modules/metrics/base_metric.py b/luxonis_train/attached_modules/metrics/base_metric.py
index b2e456c9..a4109d2d 100644
--- a/luxonis_train/attached_modules/metrics/base_metric.py
+++ b/luxonis_train/attached_modules/metrics/base_metric.py
@@ -5,8 +5,8 @@
 from typing_extensions import TypeVarTuple, Unpack
 
 from luxonis_train.attached_modules import BaseAttachedModule
+from luxonis_train.utils import Labels, Packet
 from luxonis_train.utils.registry import METRICS
-from luxonis_train.utils.types import Labels, Packet
 
 Ts = TypeVarTuple("Ts")
 
@@ -19,8 +19,9 @@ class BaseMetric(
 ):
     """A base class for all metrics.
 
-    This class defines the basic interface for all metrics. It utilizes automatic
-    registration of defined subclasses to a L{METRICS} registry.
+    This class defines the basic interface for all metrics. It utilizes
+    automatic registration of defined subclasses to a L{METRICS}
+    registry.
     """
 
     @abstractmethod
@@ -33,7 +34,9 @@ def update(self, *args: Unpack[Ts]) -> None:
         ...
 
     @abstractmethod
-    def compute(self) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]:
+    def compute(
+        self,
+    ) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]:
         """Computes the metric.
 
         @rtype: Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]
@@ -48,12 +51,14 @@ def compute(self) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tenso
     def run_update(self, outputs: Packet[Tensor], labels: Labels) -> None:
         """Calls the metric's update method.
 
-        Validates and prepares the inputs, then calls the metric's update method.
+        Validates and prepares the inputs, then calls the metric's
+        update method.
 
         @type outputs: Packet[Tensor]
         @param outputs: The outputs of the model.
         @type labels: Labels
-        @param labels: The labels of the model. @raises L{IncompatibleException}: If the
-            inputs are not compatible with the module.
+        @param labels: The labels of the model. @raises
+            L{IncompatibleException}: If the inputs are not compatible
+            with the module.
         """
         self.update(*self.prepare(outputs, labels))
diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py
deleted file mode 100644
index 97e8a7ec..00000000
--- a/luxonis_train/attached_modules/metrics/common.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import logging
-
-import torchmetrics
-from luxonis_ml.data import LabelType
-from torch import Tensor
-
-from .base_metric import BaseMetric
-
-logger = logging.getLogger(__name__)
-
-
-class TorchMetricWrapper(BaseMetric):
-    def __init__(self, **kwargs):
-        super().__init__(node=kwargs.pop("node", None))
-        task = kwargs.get("task")
-
-        if self.node.n_classes > 1:
-            if task == "binary":
-                raise ValueError(
-                    f"Task type set to '{task}', but the dataset has more than 1 class. "
-                    f"Set the `task` parameter for {self.name} to either 'multiclass' or 'multilabel'."
-                )
-            task = "multiclass"
-        else:
-            if task == "multiclass":
-                raise ValueError(
-                    f"Task type set to '{task}', but the dataset has only 1 class. "
-                    f"Set the `task` parameter for {self.name} to 'binary'."
-                )
-            task = "binary"
-        if "task" not in kwargs:
-            logger.warning(
-                f"Task type not specified for {self.name}, assuming '{task}'. "
-                "If this is not correct, please set the `task` parameter explicitly."
-            )
-        kwargs["task"] = task
-        self._task = task
-
-        if self._task == "multiclass":
-            if "num_classes" not in kwargs:
-                if self.node is None:
-                    raise ValueError(
-                        "Either `node` or `num_classes` must be provided to "
-                        "multiclass torchmetrics."
-                    )
-                kwargs["num_classes"] = self.node.n_classes
-        elif self._task == "multilabel":
-            if "num_labels" not in kwargs:
-                if self.node is None:
-                    raise ValueError(
-                        "Either `node` or `num_labels` must be provided to "
-                        "multilabel torchmetrics."
-                    )
-                kwargs["num_labels"] = self.node.n_classes
-
-        self.metric = self.Metric(**kwargs)
-
-    def update(self, preds, target, *args, **kwargs) -> None:
-        if self._task in ["multiclass"]:
-            target = target.argmax(dim=1)
-        self.metric.update(preds, target, *args, **kwargs)
-
-    def compute(self) -> Tensor:
-        return self.metric.compute()
-
-    def reset(self) -> None:
-        self.metric.reset()
-
-
-class Accuracy(TorchMetricWrapper):
-    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
-    Metric = torchmetrics.Accuracy
-
-
-class F1Score(TorchMetricWrapper):
-    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
-    Metric = torchmetrics.F1Score
-
-
-class JaccardIndex(TorchMetricWrapper):
-    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
-    Metric = torchmetrics.JaccardIndex
-
-
-class Precision(TorchMetricWrapper):
-    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
-    Metric = torchmetrics.Precision
-
-
-class Recall(TorchMetricWrapper):
-    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
-    Metric = torchmetrics.Recall
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index ffdf5e22..6d51f55b 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -1,23 +1,29 @@
+from typing import Any
+
 import torchmetrics.detection as detection
+from luxonis_ml.data import LabelType
 from torch import Tensor
 from torchvision.ops import box_convert
 
-from luxonis_train.utils.types import Labels, LabelType, Packet
+from luxonis_train.utils import Labels, Packet
 
 from .base_metric import BaseMetric
 
 
-class MeanAveragePrecision(BaseMetric):
-    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object
-    detection predictions.
+class MeanAveragePrecision(
+    BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]]
+):
+    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall
+    (mAR) for object detection predictions.
 
-    Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR)
+    Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall
+    (mAR)
     <https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html>}.
     """
 
     supported_labels = [LabelType.BOUNDINGBOX]
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Any):
         super().__init__(**kwargs)
         self.metric = detection.MeanAveragePrecision()
 
@@ -29,12 +35,12 @@ def update(
         self.metric.update(outputs, labels)
 
     def prepare(
-        self, outputs: Packet[Tensor], labels: Labels
+        self, inputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        box_label = self.get_label(labels)[0]
-        output_nms = self.get_input_tensors(outputs)
+        box_label = self.get_label(labels)
+        output_nms = self.get_input_tensors(inputs)
 
-        image_size = self.node.original_in_shape[1:]
+        image_size = self.original_in_shape[1:]
 
         output_list: list[dict[str, Tensor]] = []
         label_list: list[dict[str, Tensor]] = []
@@ -51,7 +57,9 @@ def prepare(
             curr_bboxs = box_convert(curr_label[:, 2:], "xywh", "xyxy")
             curr_bboxs[:, 0::2] *= image_size[1]
             curr_bboxs[:, 1::2] *= image_size[0]
-            label_list.append({"boxes": curr_bboxs, "labels": curr_label[:, 1].int()})
+            label_list.append(
+                {"boxes": curr_bboxs, "labels": curr_label[:, 1].int()}
+            )
 
         return output_list, label_list
 
@@ -59,11 +67,21 @@ def reset(self) -> None:
         self.metric.reset()
 
     def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
-        metric_dict = self.metric.compute()
+        metric_dict: dict[str, Tensor] = self.metric.compute()
 
         del metric_dict["classes"]
         del metric_dict["map_per_class"]
         del metric_dict["mar_100_per_class"]
+        for key in list(metric_dict.keys()):
+            if "map" in key:
+                map = metric_dict[key]
+                mar_key = key.replace("map", "mar")
+                if mar_key in metric_dict:
+                    mar = metric_dict[mar_key]
+                    metric_dict[key.replace("map", "f1")] = (
+                        2 * (map * mar) / (map + mar)
+                    )
+
         map = metric_dict.pop("map")
 
         return map, metric_dict
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
index 0d558b43..3b34c242 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
@@ -3,21 +3,20 @@
 from typing import Any, Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
 from torch import Tensor
 from torchvision.ops import box_convert
 
-from luxonis_train.attached_modules.metrics.object_keypoint_similarity import (
-    get_area_factor,
-    get_sigmas,
-)
-from luxonis_train.utils.types import Labels, LabelType, Packet
+from luxonis_train.utils import Labels, Packet, get_sigmas, get_with_default
 
 from .base_metric import BaseMetric
 
 
-class MeanAveragePrecisionKeypoints(BaseMetric):
+class MeanAveragePrecisionKeypoints(
+    BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]]
+):
     """Mean Average Precision metric for keypoints.
 
     Uses C{OKS} as IoU measure.
@@ -48,15 +47,14 @@ def __init__(
         box_format: Literal["xyxy", "xywh", "cxcywh"] = "xyxy",
         **kwargs,
     ):
-        """Implementation of the mean average precision metric for keypoint detections.
+        """Implementation of the mean average precision metric for
+        keypoint detections.
 
         Adapted from: U{https://github.com/Lightning-AI/torchmetrics/blob/v1.0.1/src/
         torchmetrics/detection/mean_ap.py}.
 
-        @license: Apache-2.0 License
+        @license: Apache License, Version 2.0
 
-        @type num_keypoints: int
-        @param num_keypoints: Number of keypoints.
         @type sigmas: list[float] | None
         @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then
             use COCO if possible otherwise defaults. Defaults to C{None}.
@@ -66,15 +64,15 @@ def __init__(
         @param max_dets: Maximum number of detections to be considered per image. Defaults to C{20}.
         @type box_format: Literal["xyxy", "xywh", "cxcywh"]
         @param box_format: Input bbox format.
-        @type kwargs: Any
-        @param kwargs: Additional arguments to pass to L{BaseMetric}.
         """
         super().__init__(**kwargs)
 
-        self.n_keypoints = self.node.n_keypoints
-
-        self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.name)
-        self.area_factor = get_area_factor(area_factor, self.name)
+        self.sigmas = get_sigmas(
+            sigmas, self.n_keypoints, caller_name=self.name
+        )
+        self.area_factor = get_with_default(
+            area_factor, "bbox area scaling", self.name, default=0.53
+        )
         self.max_dets = max_dets
 
         allowed_box_formats = ("xyxy", "xywh", "cxcywh")
@@ -93,12 +91,16 @@ def __init__(
         self.add_state("groundtruth_labels", default=[], dist_reduce_fx=None)
         self.add_state("groundtruth_area", default=[], dist_reduce_fx=None)
         self.add_state("groundtruth_crowds", default=[], dist_reduce_fx=None)
-        self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None)
+        self.add_state(
+            "groundtruth_keypoints", default=[], dist_reduce_fx=None
+        )
 
-    def prepare(self, outputs: Packet[Tensor], labels: Labels):
+    def prepare(
+        self, inputs: Packet[Tensor], labels: Labels
+    ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
         assert self.node.tasks is not None
-        kpts = self.get_label(labels, LabelType.KEYPOINTS)[0]
-        boxes = self.get_label(labels, LabelType.BOUNDINGBOX)[0]
+        kpts = self.get_label(labels, LabelType.KEYPOINTS)
+        boxes = self.get_label(labels, LabelType.BOUNDINGBOX)
 
         nkpts = (kpts.shape[1] - 2) // 3
         label = torch.zeros((len(boxes), nkpts * 3 + 6))
@@ -108,19 +110,21 @@ def prepare(self, outputs: Packet[Tensor], labels: Labels):
         label[:, 7::3] = kpts[:, 3::3]  # y
         label[:, 8::3] = kpts[:, 4::3]  # visiblity
 
-        output_list_kpt_map = []
-        label_list_kpt_map = []
-        image_size = self.node.original_in_shape[1:]
+        output_list_kpt_map: list[dict[str, Tensor]] = []
+        label_list_kpt_map: list[dict[str, Tensor]] = []
+        image_size = self.original_in_shape[1:]
 
-        output_kpts = self.get_input_tensors(outputs, LabelType.KEYPOINTS)
-        output_bboxes = self.get_input_tensors(outputs, LabelType.BOUNDINGBOX)
+        output_kpts = self.get_input_tensors(inputs, LabelType.KEYPOINTS)
+        output_bboxes = self.get_input_tensors(inputs, LabelType.BOUNDINGBOX)
         for i in range(len(output_kpts)):
             output_list_kpt_map.append(
                 {
                     "boxes": output_bboxes[i][:, :4],
                     "scores": output_bboxes[i][:, 4],
                     "labels": output_bboxes[i][:, 5].int(),
-                    "keypoints": output_kpts[i].reshape(-1, self.n_keypoints * 3),
+                    "keypoints": output_kpts[i].reshape(
+                        -1, self.n_keypoints * 3
+                    ),
                 }
             )
 
@@ -223,7 +227,9 @@ def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
             coco_target.createIndex()
             coco_preds.createIndex()
 
-            self.coco_eval = COCOeval(coco_target, coco_preds, iouType="keypoints")
+            self.coco_eval = COCOeval(
+                coco_target, coco_preds, iouType="keypoints"
+            )
             self.coco_eval.params.kpt_oks_sigmas = self.sigmas.cpu().numpy()
             self.coco_eval.params.maxDets = [self.max_dets]
 
@@ -254,20 +260,24 @@ def _get_coco_format(
         crowds: list[Tensor] | None = None,
         area: list[Tensor] | None = None,
     ) -> dict[str, list[dict[str, Any]]]:
-        """Transforms and returns all cached targets or predictions in COCO format.
+        """Transforms and returns all cached targets or predictions in
+        COCO format.
 
-        Format is defined at U{https://cocodataset.org/#format-data}.
+        Format is defined at U{
+        https://cocodataset.org/#format-data}.
         """
-        images = []
-        annotations = []
-        annotation_id = 1  # has to start with 1, otherwise COCOEval results are wrong
+        images: list[dict[str, int]] = []
+        annotations: list[dict[str, Any]] = []
+        annotation_id = (
+            1  # has to start with 1, otherwise COCOEval results are wrong
+        )
 
         for image_id, (image_boxes, image_kpts, image_labels) in enumerate(
             zip(boxes, keypoints, labels)
         ):
-            image_boxes_list = image_boxes.cpu().tolist()
-            image_kpts_list = image_kpts.cpu().tolist()
-            image_labels_list = image_labels.cpu().tolist()
+            image_boxes_list: list[list[float]] = image_boxes.cpu().tolist()
+            image_kpts_list: list[list[float]] = image_kpts.cpu().tolist()
+            image_labels_list: list[int] = image_labels.cpu().tolist()
 
             images.append({"id": image_id})
 
@@ -297,8 +307,12 @@ def _get_coco_format(
                 else:
                     area_stat = image_box[2] * image_box[3] * self.area_factor
 
-                num_keypoints = len(
-                    [i for i in range(2, len(image_kpt), 3) if image_kpt[i] != 0]
+                n_keypoints = len(
+                    [
+                        i
+                        for i in range(2, len(image_kpt), 3)
+                        if image_kpt[i] != 0
+                    ]
                 )  # number of annotated keypoints
                 annotation = {
                     "id": annotation_id,
@@ -307,14 +321,18 @@ def _get_coco_format(
                     "area": area_stat,
                     "category_id": image_label,
                     "iscrowd": (
-                        crowds[image_id][k].cpu().tolist() if crowds is not None else 0
+                        crowds[image_id][k].cpu().tolist()
+                        if crowds is not None
+                        else 0
                     ),
                     "keypoints": image_kpt,
-                    "num_keypoints": num_keypoints,
+                    "num_keypoints": n_keypoints,
                 }
 
                 if scores is not None:
                     score = scores[image_id][k].cpu().tolist()
+                    # `tolist` returns a number for scalar tensors,
+                    # the name is misleading
                     if not isinstance(score, float):
                         raise ValueError(
                             f"Invalid input score of sample {image_id}, element {k}"
@@ -325,9 +343,15 @@ def _get_coco_format(
                 annotation_id += 1
 
         classes = [{"id": i, "name": str(i)} for i in self._get_classes()]
-        return {"images": images, "annotations": annotations, "categories": classes}
+        return {
+            "images": images,
+            "annotations": annotations,
+            "categories": classes,
+        }
 
-    def _get_safe_item_values(self, item: dict[str, Tensor]) -> tuple[Tensor, Tensor]:
+    def _get_safe_item_values(
+        self, item: dict[str, Tensor]
+    ) -> tuple[Tensor, Tensor]:
         """Convert and return the boxes."""
         boxes = self._fix_empty_tensors(item["boxes"])
         if boxes.numel() > 0:
@@ -336,7 +360,8 @@ def _get_safe_item_values(self, item: dict[str, Tensor]) -> tuple[Tensor, Tensor
         return boxes, keypoints
 
     def _get_classes(self) -> list[int]:
-        """Return a list of unique classes found in ground truth and detection data."""
+        """Return a list of unique classes found in ground truth and
+        detection data."""
         if len(self.pred_labels) > 0 or len(self.groundtruth_labels) > 0:
             return (
                 torch.cat(self.pred_labels + self.groundtruth_labels)
@@ -348,7 +373,8 @@ def _get_classes(self) -> list[int]:
 
     @staticmethod
     def _fix_empty_tensors(input_tensor: Tensor) -> Tensor:
-        """Empty tensors can cause problems in DDP mode, this methods corrects them."""
+        """Empty tensors can cause problems in DDP mode, this methods
+        corrects them."""
         if input_tensor.numel() == 0 and input_tensor.ndim == 1:
             return input_tensor.unsqueeze(0)
         return input_tensor
diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
index 4cbd1cac..503a00ad 100644
--- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
+++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
@@ -1,11 +1,13 @@
 import logging
+from typing import Any
 
 import torch
+from luxonis_ml.data import LabelType
 from scipy.optimize import linear_sum_assignment
 from torch import Tensor
 from torchvision.ops import box_convert
 
-from luxonis_train.utils.types import Labels, LabelType, Packet
+from luxonis_train.utils import Labels, Packet, get_sigmas, get_with_default
 
 from .base_metric import BaseMetric
 
@@ -33,46 +35,46 @@ def __init__(
         sigmas: list[float] | None = None,
         area_factor: float | None = None,
         use_cocoeval_oks: bool = True,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
-        """Object Keypoint Similarity metric for evaluating keypoint predictions.
+        """Object Keypoint Similarity metric for evaluating keypoint
+        predictions.
 
-        @type n_keypoints: int
-        @param n_keypoints: Number of keypoints.
         @type sigmas: list[float] | None
-        @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then
-            use COCO if possible otherwise defaults. Defaults to C{None}.
+        @param sigmas: Sigma for each keypoint to weigh its importance,
+            if C{None}, then use COCO if possible otherwise defaults.
+            Defaults to C{None}.
         @type area_factor: float | None
-        @param area_factor: Factor by which we multiply bbox area. If None then use
-            default one. Defaults to C{None}.
+        @param area_factor: Factor by which we multiply bbox area. If
+            None then use default one. Defaults to C{None}.
         @type use_cocoeval_oks: bool
-        @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use
-            the one from definition. Defaults to C{True}.
+        @param use_cocoeval_oks: Whether to use same OKS formula as in
+            COCOeval or use the one from definition. Defaults to
+            C{True}.
         """
         super().__init__(**kwargs)
 
-        if n_keypoints is None and self.node is None:
-            raise ValueError(
-                f"Either `n_keypoints` or `node` must be provided to {self.name}."
-            )
-        self.n_keypoints = n_keypoints or self.node.n_keypoints
-
-        self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.name)
-        self.area_factor = get_area_factor(area_factor, self.name)
+        self.sigmas = get_sigmas(
+            sigmas, self.n_keypoints, caller_name=self.name
+        )
+        self.area_factor = get_with_default(
+            area_factor, "bbox area scaling", self.name, default=0.53
+        )
         self.use_cocoeval_oks = use_cocoeval_oks
 
         self.add_state("pred_keypoints", default=[], dist_reduce_fx=None)
-        self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None)
+        self.add_state(
+            "groundtruth_keypoints", default=[], dist_reduce_fx=None
+        )
         self.add_state("groundtruth_scales", default=[], dist_reduce_fx=None)
 
     def prepare(
-        self, outputs: Packet[Tensor], labels: Labels
+        self, inputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        assert self.node.tasks is not None
-        kpts_labels = self.get_label(labels, LabelType.KEYPOINTS)[0]
-        bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX)[0]
-        num_keypoints = (kpts_labels.shape[1] - 2) // 3
-        label = torch.zeros((len(bbox_labels), num_keypoints * 3 + 6))
+        kpts_labels = self.get_label(labels, LabelType.KEYPOINTS)
+        bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX)
+        n_keypoints = (kpts_labels.shape[1] - 2) // 3
+        label = torch.zeros((len(bbox_labels), n_keypoints * 3 + 6))
         label[:, :2] = bbox_labels[:, :2]
         label[:, 2:6] = box_convert(bbox_labels[:, 2:], "xywh", "xyxy")
         label[:, 6::3] = kpts_labels[:, 2::3]  # insert kp x coordinates
@@ -81,10 +83,10 @@ def prepare(
 
         output_list_oks = []
         label_list_oks = []
-        image_size = self.node.original_in_shape[1:]
+        image_size = self.original_in_shape[1:]
 
         for i, pred_kpt in enumerate(
-            self.get_input_tensors(outputs, LabelType.KEYPOINTS)
+            self.get_input_tensors(inputs, LabelType.KEYPOINTS)
         ):
             output_list_oks.append({"keypoints": pred_kpt})
 
@@ -97,8 +99,12 @@ def prepare(
             curr_kpts[:, 1::3] *= image_size[0]
             curr_bboxs_widths = curr_bboxs[:, 2] - curr_bboxs[:, 0]
             curr_bboxs_heights = curr_bboxs[:, 3] - curr_bboxs[:, 1]
-            curr_scales = curr_bboxs_widths * curr_bboxs_heights * self.area_factor
-            label_list_oks.append({"keypoints": curr_kpts, "scales": curr_scales})
+            curr_scales = (
+                curr_bboxs_widths * curr_bboxs_heights * self.area_factor
+            )
+            label_list_oks.append(
+                {"keypoints": curr_kpts, "scales": curr_scales}
+            )
 
         return output_list_oks, label_list_oks
 
@@ -129,11 +135,11 @@ def update(
                   width and height are unnormalized.
         """
         for item in preds:
-            keypoints = fix_empty_tensors(item["keypoints"])
+            keypoints = self._fix_empty_tensors(item["keypoints"])
             self.pred_keypoints.append(keypoints)
 
         for item in target:
-            keypoints = fix_empty_tensors(item["keypoints"])
+            keypoints = self._fix_empty_tensors(item["keypoints"])
             self.groundtruth_keypoints.append(keypoints)
             self.groundtruth_scales.append(item["scales"])
 
@@ -144,10 +150,14 @@ def compute(self) -> Tensor:
         image_mean_oks = torch.zeros(len(self.groundtruth_keypoints))
         for i, (pred_kpts, gt_kpts, gt_scales) in enumerate(
             zip(
-                self.pred_keypoints, self.groundtruth_keypoints, self.groundtruth_scales
+                self.pred_keypoints,
+                self.groundtruth_keypoints,
+                self.groundtruth_scales,
             )
         ):
-            gt_kpts = torch.reshape(gt_kpts, (-1, self.n_keypoints, 3))  # [N, K, 3]
+            gt_kpts = torch.reshape(
+                gt_kpts, (-1, self.n_keypoints, 3)
+            )  # [N, K, 3]
 
             image_ious = compute_oks(
                 pred_kpts,
@@ -159,13 +169,23 @@ def compute(self) -> Tensor:
             gt_indices, pred_indices = linear_sum_assignment(
                 image_ious.cpu().numpy(), maximize=True
             )
-            matched_ious = [image_ious[n, m] for n, m in zip(gt_indices, pred_indices)]
+            matched_ious = [
+                image_ious[n, m] for n, m in zip(gt_indices, pred_indices)
+            ]
             image_mean_oks[i] = torch.tensor(matched_ious).mean()
 
         final_oks = image_mean_oks.nanmean()
 
         return final_oks
 
+    @staticmethod
+    def _fix_empty_tensors(input_tensor: Tensor) -> Tensor:
+        """Empty tensors can cause problems in DDP mode, this methods
+        corrects them."""
+        if input_tensor.numel() == 0 and input_tensor.ndim == 1:
+            return input_tensor.unsqueeze(0)
+        return input_tensor
+
 
 def compute_oks(
     pred: Tensor,
@@ -174,7 +194,8 @@ def compute_oks(
     sigmas: Tensor,
     use_cocoeval_oks: bool,
 ) -> Tensor:
-    """Compute Object Keypoint Similarity between every GT and prediction.
+    """Compute Object Keypoint Similarity between every GT and
+    prediction.
 
     @type pred: Tensor[N, K, 3]
     @param pred: Predicted keypoints.
@@ -183,11 +204,11 @@ def compute_oks(
     @type scales: Tensor[M]
     @param scales: Scales of the bounding boxes.
     @type sigmas: Tensor
-    @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then use
-        same weights for all.
+    @param sigmas: Sigma for each keypoint to weigh its importance, if
+        C{None}, then use same weights for all.
     @type use_cocoeval_oks: bool
-    @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use the
-        one from definition.
+    @param use_cocoeval_oks: Whether to use same OKS formula as in
+        COCOeval or use the one from definition.
     @rtype: Tensor
     @return: Object Keypoint Similarity every pred and gt [M, N]
     """
@@ -211,73 +232,3 @@ def compute_oks(
     return (torch.exp(-oks) * kpt_mask[:, None]).sum(-1) / (
         kpt_mask.sum(-1)[:, None] + eps
     )
-
-
-def fix_empty_tensors(input_tensor: Tensor) -> Tensor:
-    """Empty tensors can cause problems in DDP mode, this methods corrects them."""
-    if input_tensor.numel() == 0 and input_tensor.ndim == 1:
-        return input_tensor.unsqueeze(0)
-    return input_tensor
-
-
-def get_sigmas(
-    sigmas: list[float] | None, n_keypoints: int, class_name: str | None
-) -> Tensor:
-    """Validate and set the sigma values."""
-    if sigmas is not None:
-        if len(sigmas) == n_keypoints:
-            return torch.tensor(sigmas, dtype=torch.float32)
-        else:
-            error_msg = "The length of the sigmas list must be the same as the number of keypoints."
-            if class_name:
-                error_msg = f"[{class_name}] {error_msg}"
-            raise ValueError(error_msg)
-    else:
-        if n_keypoints == 17:
-            warn_msg = "Default COCO sigmas are being used."
-            if class_name:
-                warn_msg = f"[{class_name}] {warn_msg}"
-            logger.warning(warn_msg)
-            return torch.tensor(
-                [
-                    0.026,
-                    0.025,
-                    0.025,
-                    0.035,
-                    0.035,
-                    0.079,
-                    0.079,
-                    0.072,
-                    0.072,
-                    0.062,
-                    0.062,
-                    0.107,
-                    0.107,
-                    0.087,
-                    0.087,
-                    0.089,
-                    0.089,
-                ],
-                dtype=torch.float32,
-            )
-        else:
-            warn_msg = "Default sigma of 0.04 is being used for each keypoint."
-            if class_name:
-                warn_msg = f"[{class_name}] {warn_msg}"
-            logger.warning(warn_msg)
-            return torch.tensor([0.04] * n_keypoints, dtype=torch.float32)
-
-
-def get_area_factor(area_factor: float | None, class_name: str | None) -> float:
-    """Set the default area factor if not defined."""
-    factor = 0.53
-    if area_factor is None:
-        warn_msg = (
-            f"Default area_factor of {factor} is being used for bbox area scaling."
-        )
-        if class_name:
-            warn_msg = f"[{class_name}] {warn_msg}"
-        logger.warning(warn_msg)
-        return factor
-    else:
-        return area_factor
diff --git a/luxonis_train/attached_modules/metrics/torchmetrics.py b/luxonis_train/attached_modules/metrics/torchmetrics.py
new file mode 100644
index 00000000..a8797a13
--- /dev/null
+++ b/luxonis_train/attached_modules/metrics/torchmetrics.py
@@ -0,0 +1,114 @@
+import logging
+from contextlib import suppress
+from typing import Any
+
+import torchmetrics
+from luxonis_ml.data import LabelType
+from torch import Tensor
+
+from .base_metric import BaseMetric
+
+logger = logging.getLogger(__name__)
+
+
+class TorchMetricWrapper(BaseMetric[Tensor]):
+    Metric: type[torchmetrics.Metric]
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(node=kwargs.pop("node", None))
+        task = kwargs.get("task")
+        if task is None:
+            if "num_classes" in kwargs:
+                if kwargs["num_classes"] == 1:
+                    task = "binary"
+                else:
+                    task = "multiclass"
+            elif "num_labels" in kwargs:
+                task = "multilabel"
+            else:
+                with suppress(RuntimeError, ValueError):
+                    if self.n_classes == 1:
+                        task = "binary"
+                    else:
+                        task = "multiclass"
+
+        if task is None:
+            raise ValueError(
+                f"'{self.name}' does not have the 'task' parameter set. "
+                "and it is not possible to infer it from the other arguments. "
+                "You can either set the 'task' parameter explicitly, provide either 'num_classes' or 'num_labels' argument, "
+                "or use this metric with a node. "
+                "The 'task' can be one of 'binary', 'multiclass', or 'multilabel'. "
+            )
+        self._task = task
+        kwargs["task"] = task
+
+        n_classes: int | None = kwargs.get(
+            "num_classes", kwargs.get("num_labels")
+        )
+
+        if n_classes is None:
+            with suppress(RuntimeError, ValueError):
+                n_classes = self.n_classes
+
+        if n_classes is None and task != "binary":
+            arg_name = "num_classes" if task == "multiclass" else "num_labels"
+            raise ValueError(
+                f"'{self.name}' metric does not have the '{arg_name}' parameter set "
+                "and it is not possible to infer it from the other arguments. "
+                "You can either set the '{arg_name}' parameter explicitly, or use this metric with a node."
+            )
+
+        if task == "binary" and n_classes is not None and n_classes > 1:
+            raise ValueError(
+                f"Task type set to '{task}', but the dataset has more than 1 class. "
+                f"Set the `task` argument of '{self.name}' to either 'multiclass' or 'multilabel'."
+            )
+        elif task != "binary" and n_classes == 1:
+            raise ValueError(
+                f"Task type set to '{task}', but the dataset has only 1 class. "
+                f"Set the `task` argument of '{self.name}' to 'binary'."
+            )
+
+        if task == "multiclass":
+            kwargs["num_classes"] = n_classes
+        elif task == "multilabel":
+            kwargs["num_labels"] = n_classes
+
+        self.metric = self.Metric(**kwargs)
+
+    def update(self, preds: Tensor, target: Tensor) -> None:
+        if self._task in ["multiclass"]:
+            target = target.argmax(dim=1)
+        self.metric.update(preds, target)
+
+    def compute(self) -> Tensor:
+        return self.metric.compute()
+
+    def reset(self) -> None:
+        self.metric.reset()
+
+
+class Accuracy(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.Accuracy
+
+
+class F1Score(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.F1Score
+
+
+class JaccardIndex(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.JaccardIndex
+
+
+class Precision(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.Precision
+
+
+class Recall(TorchMetricWrapper):
+    supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+    Metric = torchmetrics.Recall
diff --git a/luxonis_train/attached_modules/visualizers/base_visualizer.py b/luxonis_train/attached_modules/visualizers/base_visualizer.py
index 5fa6db62..817a09d5 100644
--- a/luxonis_train/attached_modules/visualizers/base_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/base_visualizer.py
@@ -4,8 +4,8 @@
 from typing_extensions import TypeVarTuple, Unpack
 
 from luxonis_train.attached_modules import BaseAttachedModule
+from luxonis_train.utils import Labels, Packet
 from luxonis_train.utils.registry import VISUALIZERS
-from luxonis_train.utils.types import Labels, Packet
 
 Ts = TypeVarTuple("Ts")
 
@@ -17,8 +17,9 @@ class BaseVisualizer(
 ):
     """A base class for all visualizers.
 
-    This class defines the basic interface for all visualizers. It utilizes automatic
-    registration of defined subclasses to the L{VISUALIZERS} registry.
+    This class defines the basic interface for all visualizers. It
+    utilizes automatic registration of defined subclasses to the
+    L{VISUALIZERS} registry.
     """
 
     @abstractmethod
@@ -27,7 +28,12 @@ def forward(
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         *args: Unpack[Ts],
-    ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]] | list[Tensor]:
+    ) -> (
+        Tensor
+        | tuple[Tensor, Tensor]
+        | tuple[Tensor, list[Tensor]]
+        | list[Tensor]
+    ):
         """Forward pass of the visualizer.
 
         Takes an image and the prepared inputs from the `prepare` method and
@@ -62,4 +68,6 @@ def run(
         inputs: Packet[Tensor],
         labels: Labels,
     ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]]:
-        return self(label_canvas, prediction_canvas, *self.prepare(inputs, labels))
+        return self(
+            label_canvas, prediction_canvas, *self.prepare(inputs, labels)
+        )
diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
index df3ac933..e544bf06 100644
--- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
@@ -1,12 +1,16 @@
 import logging
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.utils.types import LabelType
-
 from .base_visualizer import BaseVisualizer
-from .utils import Color, draw_bounding_box_labels, draw_bounding_boxes, get_color
+from .utils import (
+    Color,
+    draw_bounding_box_labels,
+    draw_bounding_boxes,
+    get_color,
+)
 
 
 class BBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]):
@@ -25,39 +29,50 @@ def __init__(
     ):
         """Visualizer for bounding box predictions.
 
-        Creates a visualization of the bounding box predictions and labels.
+        Creates a visualization of the bounding box predictions and
+        labels.
 
         @type labels: dict[int, str] | list[str] | None
-        @param labels: Either a dictionary mapping class indices to names, or a list of
-            names. If list is provided, the label mapping is done by index. By default,
-            no labels are drawn.
+        @param labels: Either a dictionary mapping class indices to
+            names, or a list of names. If list is provided, the label
+            mapping is done by index. By default, no labels are drawn.
         @type draw_labels: bool
-        @param draw_labels: Whether or not to draw labels. Defaults to C{True}.
+        @param draw_labels: Whether or not to draw labels. Defaults to
+            C{True}.
         @type colors: dict[int, Color] | list[Color] | None
-        @param colors: Either a dictionary mapping class indices to colors, or a list of
-            colors. If list is provided, the color mapping is done by index. By default,
-            random colors are used.
+        @param colors: Either a dictionary mapping class indices to
+            colors, or a list of colors. If list is provided, the color
+            mapping is done by index. By default, random colors are
+            used.
         @type fill: bool
-        @param fill: Whether or not to fill the bounding boxes. Defaults to C{False}.
+        @param fill: Whether or not to fill the bounding boxes. Defaults
+            to C{False}.
         @type width: int | None
-        @param width: The width of the bounding box lines. Defaults to C{1}.
+        @param width: The width of the bounding box lines. Defaults to
+            C{1}.
         @type font: str | None
-        @param font: A filename containing a TrueType font. Defaults to C{None}.
+        @param font: A filename containing a TrueType font. Defaults to
+            C{None}.
         @type font_size: int | None
-        @param font_size: The font size to use for the labels. Defaults to C{None}.
+        @param font_size: The font size to use for the labels. Defaults
+            to C{None}.
         """
         super().__init__(**kwargs)
         if isinstance(labels, list):
             labels = {i: label for i, label in enumerate(labels)}
 
         self.bbox_labels = labels or {
-            i: label for i, label in enumerate(self.node.class_names)
+            i: label for i, label in enumerate(self.class_names)
         }
 
         if colors is None:
-            colors = {label: get_color(i) for i, label in self.bbox_labels.items()}
+            colors = {
+                label: get_color(i) for i, label in self.bbox_labels.items()
+            }
         if isinstance(colors, list):
-            colors = {self.bbox_labels[i]: color for i, color in enumerate(colors)}
+            colors = {
+                self.bbox_labels[i]: color for i, color in enumerate(colors)
+            }
         self.colors = colors
         self.fill = fill
         self.width = width
@@ -159,16 +174,17 @@ def forward(
         predictions: list[Tensor],
         targets: Tensor,
     ) -> tuple[Tensor, Tensor]:
-        """Creates a visualization of the bounding box predictions and labels.
+        """Creates a visualization of the bounding box predictions and
+        labels.
 
         @type label_canvas: Tensor
         @param label_canvas: The canvas containing the labels.
         @type prediction_canvas: Tensor
         @param prediction_canvas: The canvas containing the predictions.
         @type prediction: Tensor
-        @param prediction: The predicted bounding boxes. The shape should be [N, 6],
-            where N is the number of bounding boxes and the last dimension is [x1, y1,
-            x2, y2, class, conf].
+        @param prediction: The predicted bounding boxes. The shape
+            should be [N, 6], where N is the number of bounding boxes
+            and the last dimension is [x1, y1, x2, y2, class, conf].
         @type targets: Tensor
         @param targets: The target bounding boxes.
         """
diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py
index 20a5710e..9d26172b 100644
--- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py
@@ -23,8 +23,8 @@ def __init__(
         """Visualizer for classification tasks.
 
         @type include_plot: bool
-        @param include_plot: Whether to include a plot of the class probabilities in the
-            visualization. Defaults to C{True}.
+        @param include_plot: Whether to include a plot of the class
+            probabilities in the visualization. Defaults to C{True}.
         """
         super().__init__(**kwargs)
         self.include_plot = include_plot
@@ -34,19 +34,21 @@ def __init__(
 
     def _get_class_name(self, pred: Tensor) -> str:
         idx = int((pred.argmax()).item())
-        if self.node.class_names is None:
+        if self.class_names is None:
             return str(idx)
-        return self.node.class_names[idx]
+        return self.class_names[idx]
 
-    def _generate_plot(self, prediction: Tensor, width: int, height: int) -> Tensor:
-        prediction = prediction.softmax(-1).detach().cpu().numpy()
+    def _generate_plot(
+        self, prediction: Tensor, width: int, height: int
+    ) -> Tensor:
+        pred = prediction.softmax(-1).detach().cpu().numpy()
         fig, ax = plt.subplots(figsize=(width / 100, height / 100))
-        ax.bar(np.arange(len(prediction)), prediction)
-        ax.set_xticks(np.arange(len(prediction)))
-        if self.node.class_names is not None:
-            ax.set_xticklabels(self.node.class_names, rotation=90)
+        ax.bar(np.arange(len(pred)), pred)
+        ax.set_xticks(np.arange(len(pred)))
+        if self.class_names is not None:
+            ax.set_xticklabels(self.class_names, rotation=90)
         else:
-            ax.set_xticklabels(np.arange(1, len(prediction) + 1))
+            ax.set_xticklabels(np.arange(1, len(pred) + 1))
         ax.set_ylim(0, 1)
         ax.set_xlabel("Class")
         ax.set_ylabel("Probability")
@@ -88,7 +90,9 @@ def forward(
             overlay[i] = numpy_to_torch_img(arr)
             if self.include_plot:
                 plots[i] = self._generate_plot(
-                    prediction, prediction_canvas.shape[3], prediction_canvas.shape[2]
+                    prediction,
+                    prediction_canvas.shape[3],
+                    prediction_canvas.shape[2],
                 )
 
         if self.include_plot:
diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
index 287d5e1c..53b9cb88 100644
--- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
@@ -22,18 +22,20 @@ def __init__(
         """Visualizer for keypoints.
 
         @type visibility_threshold: float
-        @param visibility_threshold: Threshold for visibility of keypoints. If the
-            visibility of a keypoint is below this threshold, it is considered as not
-            visible. Defaults to C{0.5}.
+        @param visibility_threshold: Threshold for visibility of
+            keypoints. If the visibility of a keypoint is below this
+            threshold, it is considered as not visible. Defaults to
+            C{0.5}.
         @type connectivity: list[tuple[int, int]] | None
-        @param connectivity: List of tuples of keypoint indices that define the
-            connections in the skeleton. Defaults to C{None}.
+        @param connectivity: List of tuples of keypoint indices that
+            define the connections in the skeleton. Defaults to C{None}.
         @type visible_color: L{Color}
-        @param visible_color: Color of visible keypoints. Either a string or a tuple of
-            RGB values. Defaults to C{"red"}.
+        @param visible_color: Color of visible keypoints. Either a
+            string or a tuple of RGB values. Defaults to C{"red"}.
         @type nonvisible_color: L{Color} | None
-        @param nonvisible_color: Color of nonvisible keypoints. If C{None}, nonvisible
-            keypoints are not drawn. Defaults to C{None}.
+        @param nonvisible_color: Color of nonvisible keypoints. If
+            C{None}, nonvisible keypoints are not drawn. Defaults to
+            C{None}.
         """
         super().__init__(**kwargs)
         self.visibility_threshold = visibility_threshold
@@ -62,7 +64,9 @@ def draw_predictions(
             if nonvisible_color is not None:
                 _kwargs = deepcopy(kwargs)
                 _kwargs["colors"] = nonvisible_color
-                nonvisible_kpts = prediction[..., :2] * mask.unsqueeze(-1).float()
+                nonvisible_kpts = (
+                    prediction[..., :2] * mask.unsqueeze(-1).float()
+                )
                 viz[i] = draw_keypoints(
                     viz[i].clone(),
                     nonvisible_kpts[..., :2],
diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
index c7925ecc..b7ecbfbb 100644
--- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
@@ -7,7 +7,8 @@
 
 
 class MultiVisualizer(BaseVisualizer[Packet[Tensor], Labels]):
-    """Special type of visualizer that combines multiple visualizers together.
+    """Special type of visualizer that combines multiple visualizers
+    together.
 
     All the visualizers are applied in the order they are provided and they all draw on
     the same canvas.
@@ -25,14 +26,16 @@ def __init__(self, visualizers: list[Kwargs], **kwargs):
         self.visualizers = []
         for item in visualizers:
             visualizer_params = item.get("params", {})
-            visualizer = VISUALIZERS.get(item["name"])(**visualizer_params, **kwargs)
+            visualizer = VISUALIZERS.get(item["name"])(
+                **visualizer_params, **kwargs
+            )
             self.visualizers.append(visualizer)
 
     def prepare(
-        self, output: Packet[Tensor], label: Labels, idx: int = 0
+        self, inputs: Packet[Tensor], label: Labels, idx: int = 0
     ) -> tuple[Packet[Tensor], Labels]:
         self._idx = idx
-        return output, label
+        return inputs, label
 
     def forward(
         self,
@@ -42,12 +45,16 @@ def forward(
         labels: Labels,
     ) -> tuple[Tensor, Tensor]:
         for visualizer in self.visualizers:
-            match visualizer.run(label_canvas, prediction_canvas, outputs, labels):
+            match visualizer.run(
+                label_canvas, prediction_canvas, outputs, labels
+            ):
                 case Tensor() as prediction_viz:
                     prediction_canvas = prediction_viz
                 case (Tensor(data=label_viz), Tensor(data=prediction_viz)):
                     label_canvas = label_viz
                     prediction_canvas = prediction_viz
                 case _:
-                    raise NotImplementedError
+                    raise NotImplementedError(
+                        "Unexpected return type from visualizer."
+                    )
         return label_canvas, prediction_canvas
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index 85b93ce1..15e2fd09 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -1,12 +1,16 @@
 import logging
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.utils.types import LabelType
-
 from .base_visualizer import BaseVisualizer
-from .utils import Color, draw_segmentation_labels, get_color, seg_output_to_bool
+from .utils import (
+    Color,
+    draw_segmentation_labels,
+    get_color,
+    seg_output_to_bool,
+)
 
 logger = logging.getLogger(__name__)
 log_disable = False
@@ -98,7 +102,8 @@ def forward(
         targets: Tensor,
         **kwargs,
     ) -> tuple[Tensor, Tensor]:
-        """Creates a visualization of the segmentation predictions and labels.
+        """Creates a visualization of the segmentation predictions and
+        labels.
 
         @type label_canvas: Tensor
         @param label_canvas: The canvas to draw the labels on.
@@ -146,7 +151,9 @@ def _adjust_colors(
 
         if not log_disable:
             if colors is None:
-                logger.warning("No colors provided. Using random colors instead.")
+                logger.warning(
+                    "No colors provided. Using random colors instead."
+                )
             elif data.size(1) != len(colors):
                 logger.warning(
                     f"Number of colors ({len(colors)}) does not match number of "
diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py
index c55b12ce..402ab98f 100644
--- a/luxonis_train/attached_modules/visualizers/utils.py
+++ b/luxonis_train/attached_modules/visualizers/utils.py
@@ -19,7 +19,7 @@
     draw_segmentation_masks,
 )
 
-from luxonis_train.utils.config import Config
+from luxonis_train.utils import Config
 
 Color = str | tuple[int, int, int]
 """Color type alias.
@@ -44,13 +44,14 @@ def figure_to_torch(fig: Figure, width: int, height: int) -> Tensor:
 def torch_img_to_numpy(
     img: Tensor, reverse_colors: bool = False
 ) -> npt.NDArray[np.uint8]:
-    """Converts a torch image (CHW) to a numpy array (HWC). Optionally also converts
-    colors.
+    """Converts a torch image (CHW) to a numpy array (HWC). Optionally
+    also converts colors.
 
     @type img: Tensor
     @param img: Torch image (CHW)
     @type reverse_colors: bool
-    @param reverse_colors: Whether to reverse colors (RGB to BGR). Defaults to False.
+    @param reverse_colors: Whether to reverse colors (RGB to BGR).
+        Defaults to False.
     @rtype: npt.NDArray[np.uint8]
     @return: Numpy image (HWC)
     """
@@ -129,8 +130,8 @@ def draw_bounding_box_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor:
     @type img: Tensor
     @param img: Image to draw on.
     @type label: Tensor
-    @param label: Bounding box label. The shape should be (n_instances, 4), where the
-        last dimension is (x, y, w, h).
+    @param label: Bounding box label. The shape should be (n_instances,
+        4), where the last dimension is (x, y, w, h).
     @type kwargs: dict
     @param kwargs: Additional arguments to pass to
         L{torchvision.utils.draw_bounding_boxes}.
@@ -150,10 +151,11 @@ def draw_keypoint_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor:
     @type img: Tensor
     @param img: Image to draw on.
     @type label: Tensor
-    @param label: Keypoint label. The shape should be (n_instances, 3), where the last
-        dimension is (x, y, visibility).
+    @param label: Keypoint label. The shape should be (n_instances, 3),
+        where the last dimension is (x, y, visibility).
     @type kwargs: dict
-    @param kwargs: Additional arguments to pass to L{torchvision.utils.draw_keypoints}.
+    @param kwargs: Additional arguments to pass to
+        L{torchvision.utils.draw_keypoints}.
     @rtype: Tensor
     @return: Image with keypoint labels drawn on.
     """
@@ -191,7 +193,8 @@ def unnormalize(
     std: list[float] | float | None = None,
     to_uint8: bool = False,
 ) -> Tensor:
-    """Unnormalizes an image back to original values, optionally converts it to uint8.
+    """Unnormalizes an image back to original values, optionally
+    converts it to uint8.
 
     @type img: Tensor
     @param img: Image to unnormalize.
@@ -304,9 +307,12 @@ def get_color(seed: int) -> Color:
 #
 #  TEST:
 def combine_visualizations(
-    visualization: Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]],
+    visualization: Tensor
+    | tuple[Tensor, Tensor]
+    | tuple[Tensor, list[Tensor]],
 ) -> Tensor:
-    """Default way of combining multiple visualizations into one final image."""
+    """Default way of combining multiple visualizations into one final
+    image."""
 
     def resize_to_match(
         fst: Tensor,
@@ -315,7 +321,7 @@ def resize_to_match(
         keep_size: Literal["larger", "smaller", "first", "second"] = "larger",
         resize_along: Literal["width", "height", "exact"] = "height",
         keep_aspect_ratio: bool = True,
-    ):
+    ) -> tuple[Tensor, Tensor]:
         """Resizes two images so they have the same size.
 
         Resizes two images so they can be concateneted together. It's possible to
@@ -411,7 +417,9 @@ def resize_to_match(
         case Tensor() as viz:
             return viz
         case (Tensor(data=viz_labels), Tensor(data=viz_predictions)):
-            viz_labels, viz_predictions = resize_to_match(viz_labels, viz_predictions)
+            viz_labels, viz_predictions = resize_to_match(
+                viz_labels, viz_predictions
+            )
             return torch.cat([viz_labels, viz_predictions], dim=-1)
 
         case (Tensor(data=_), [*viz]) if isinstance(viz, list) and all(
diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index 4c7f7824..95f860a1 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -1,9 +1,13 @@
 from lightning.pytorch.callbacks import (
     DeviceStatsMonitor,
     EarlyStopping,
+    GradientAccumulationScheduler,
     LearningRateMonitor,
     ModelCheckpoint,
+    ModelPruning,
     RichModelSummary,
+    StochasticWeightAveraging,
+    Timer,
 )
 
 from luxonis_train.utils.registry import CALLBACKS
@@ -26,6 +30,10 @@
 CALLBACKS.register_module(module=ModelCheckpoint)
 CALLBACKS.register_module(module=RichModelSummary)
 CALLBACKS.register_module(module=DeviceStatsMonitor)
+CALLBACKS.register_module(module=GradientAccumulationScheduler)
+CALLBACKS.register_module(module=StochasticWeightAveraging)
+CALLBACKS.register_module(module=Timer)
+CALLBACKS.register_module(module=ModelPruning)
 
 
 __all__ = [
diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py
index d9e7b298..30949e4e 100644
--- a/luxonis_train/callbacks/archive_on_train_end.py
+++ b/luxonis_train/callbacks/archive_on_train_end.py
@@ -26,12 +26,12 @@ def on_train_end(
         """
 
         path = self.get_checkpoint(pl_module)
-        if path is None:
+        if path is None:  # pragma: no cover
             logger.warning("Skipping model archiving.")
             return
 
         onnx_path = pl_module.core._exported_models.get("onnx")
-        if onnx_path is None:
+        if onnx_path is None:  # pragma: no cover
             logger.error(
                 "Model executable not found. "
                 "Make sure to run exporter callback before archiver callback. "
diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py
index 261c4ef6..e727e81f 100644
--- a/luxonis_train/callbacks/export_on_train_end.py
+++ b/luxonis_train/callbacks/export_on_train_end.py
@@ -25,7 +25,7 @@ def on_train_end(
         @param pl_module: Pytorch Lightning module.
         """
         path = self.get_checkpoint(pl_module)
-        if path is None:
+        if path is None:  # pragma: no cover
             logger.warning("Skipping model export.")
             return
 
diff --git a/luxonis_train/callbacks/gpu_stats_monitor.py b/luxonis_train/callbacks/gpu_stats_monitor.py
index 9479d4d2..a189ed3f 100644
--- a/luxonis_train/callbacks/gpu_stats_monitor.py
+++ b/luxonis_train/callbacks/gpu_stats_monitor.py
@@ -27,11 +27,11 @@
 
 import pytorch_lightning as pl
 import torch
-from lightning.pytorch.accelerators import CUDAAccelerator  # type: ignore
-from pytorch_lightning.utilities import rank_zero_only
-from pytorch_lightning.utilities.exceptions import (
-    MisconfigurationException,  # type: ignore
+from lightning.pytorch.accelerators.cuda import CUDAAccelerator
+from lightning_fabric.utilities.exceptions import (
+    MisconfigurationException,  # noqa: F401
 )
+from pytorch_lightning.utilities import rank_zero_only
 from pytorch_lightning.utilities.parsing import AttributeDict
 from pytorch_lightning.utilities.types import STEP_OUTPUT
 
@@ -40,49 +40,6 @@
 
 @CALLBACKS.register_module()
 class GPUStatsMonitor(pl.Callback):
-    """Automatically monitors and logs GPU stats during training stage.
-    C{GPUStatsMonitor} is a callback and in order to use it you need to assign a logger
-    in the C{Trainer}.
-
-    Args:
-        memory_utilization: Set to C{True} to monitor used, free and percentage of memory
-            utilization at the start and end of each step. Default: C{True}.
-        gpu_utilization: Set to C{True} to monitor percentage of GPU utilization
-            at the start and end of each step. Default: C{True}.
-        intra_step_time: Set to C{True} to monitor the time of each step. Default: {False}.
-        inter_step_time: Set to C{True} to monitor the time between the end of one step
-            and the start of the next step. Default: C{False}.
-        fan_speed: Set to C{True} to monitor percentage of fan speed. Default: C{False}.
-        temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius.
-            Default: C{False}.
-
-    Raises:
-        MisconfigurationException:
-            If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger.
-
-    Example::
-
-        >>> from pytorch_lightning import Trainer
-        >>> from pytorch_lightning.callbacks import GPUStatsMonitor
-        >>> gpu_stats = GPUStatsMonitor() # doctest: +SKIP
-        >>> trainer = Trainer(callbacks=[gpu_stats]) # doctest: +SKIP
-
-    GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows:
-
-    - **fan.speed** – The fan speed value is the percent of maximum speed that the device's fan is currently
-      intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed.
-      If the fan is physically blocked and unable to spin, this output will not match the actual fan speed.
-      Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.
-    - **memory.used** – Total memory allocated by active contexts.
-    - **memory.free** – Total free memory.
-    - **utilization.gpu** – Percent of time over the past sample period during which one or more kernels was
-      executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product.
-    - **utilization.memory** – Percent of time over the past sample period during which global (device) memory was
-      being read or written. The sample period may be between 1 second and 1/6 second depending on the product.
-    - **temperature.gpu** – Core GPU temperature, in degrees C.
-    - **temperature.memory** – HBM memory temperature, in degrees C.
-    """
-
     def __init__(
         self,
         memory_utilization: bool = True,
@@ -92,6 +49,40 @@ def __init__(
         fan_speed: bool = False,
         temperature: bool = False,
     ):
+        """Automatically monitors and logs GPU stats during training
+        stage. C{GPUStatsMonitor} is a callback and in order to use it
+        you need to assign a logger in the C{Trainer}.
+
+        GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows:
+
+            - C{fan.speed} – The fan speed value is the percent of maximum speed that the device's fan is currently
+              intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed.
+              If the fan is physically blocked and unable to spin, this output will not match the actual fan speed.
+              Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.
+            - C{memory.used} – Total memory allocated by active contexts.
+            - C{memory.free} – Total free memory.
+            - C{utilization.gpu} – Percent of time over the past sample period during which one or more kernels was
+              executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product.
+            - C{utilization.memory} – Percent of time over the past sample period during which global (device) memory was
+              being read or written. The sample period may be between 1 second and 1/6 second depending on the product.
+            - C{temperature.gpu} – Core GPU temperature, in degrees C.
+            - C{temperature.memory} – HBM memory temperature, in degrees C.
+
+        @type memory_utilization: bool
+        @param memory_utilization: Set to C{True} to monitor used, free and percentage of memory utilization at the start and end of each step. Defaults to C{True}.
+        @type gpu_utilization: bool
+        @param gpu_utilization: Set to C{True} to monitor percentage of GPU utilization at the start and end of each step. Defaults to C{True}.
+        @type intra_step_time: bool
+        @param intra_step_time: Set to C{True} to monitor the time of each step. Defaults to {False}.
+        @type inter_step_time: bool
+        @param inter_step_time: Set to C{True} to monitor the time between the end of one step and the start of the next step. Defaults to C{False}.
+        @type fan_speed: bool
+        @param fan_speed: Set to C{True} to monitor percentage of fan speed. Defaults to C{False}.
+        @type temperature: bool
+        @param temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius. Defaults to C{False}.
+        @raises MisconfigurationException: If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger.
+        """
+
         super().__init__()
 
         if shutil.which("nvidia-smi") is None:
@@ -167,7 +158,9 @@ def on_train_batch_start(
 
         gpu_stat_keys = self._get_gpu_stat_keys()
         gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
-        logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys)
+        logs = self._parse_gpu_stats(
+            self._device_ids, gpu_stats, gpu_stat_keys
+        )
 
         if self._log_stats.inter_step_time and self._snap_inter_step_time:
             # First log at beginning of second step
@@ -193,9 +186,13 @@ def on_train_batch_end(
         if not trainer._logger_connector.should_update_logs:
             return
 
-        gpu_stat_keys = self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys()
+        gpu_stat_keys = (
+            self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys()
+        )
         gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
-        logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys)
+        logs = self._parse_gpu_stats(
+            self._device_ids, gpu_stats, gpu_stat_keys
+        )
 
         if self._log_stats.intra_step_time and self._snap_intra_step_time:
             logs["batch_time/intra_step (ms)"] = (
@@ -213,7 +210,9 @@ def _get_gpu_ids(device_ids: List[int]) -> List[str]:
         cuda_visible_devices: List[str] = os.getenv(
             "CUDA_VISIBLE_DEVICES", default=default
         ).split(",")
-        return [cuda_visible_devices[device_id].strip() for device_id in device_ids]
+        return [
+            cuda_visible_devices[device_id].strip() for device_id in device_ids
+        ]
 
     def _get_gpu_stats(self, queries: List[str]) -> List[List[float]]:
         if not queries:
@@ -251,7 +250,9 @@ def _to_float(x: str) -> float:
 
     @staticmethod
     def _parse_gpu_stats(
-        device_ids: List[int], stats: List[List[float]], keys: List[Tuple[str, str]]
+        device_ids: List[int],
+        stats: List[List[float]],
+        keys: List[Tuple[str, str]],
     ) -> Dict[str, float]:
         """Parse the gpu stats into a loggable dict."""
         logs = {}
@@ -288,6 +289,8 @@ def _get_gpu_device_stat_keys(self) -> List[Tuple[str, str]]:
             stat_keys.append(("fan.speed", "%"))
 
         if self._log_stats.temperature:
-            stat_keys.extend([("temperature.gpu", "°C"), ("temperature.memory", "°C")])
+            stat_keys.extend(
+                [("temperature.gpu", "°C"), ("temperature.memory", "°C")]
+            )
 
         return stat_keys
diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py
index d14fcf08..b8bf6512 100644
--- a/luxonis_train/callbacks/luxonis_progress_bar.py
+++ b/luxonis_train/callbacks/luxonis_progress_bar.py
@@ -3,7 +3,11 @@
 
 import lightning.pytorch as pl
 import tabulate
-from lightning.pytorch.callbacks import ProgressBar, RichProgressBar, TQDMProgressBar
+from lightning.pytorch.callbacks import (
+    ProgressBar,
+    RichProgressBar,
+    TQDMProgressBar,
+)
 from rich.console import Console
 from rich.table import Table
 
@@ -14,7 +18,6 @@ class BaseLuxonisProgressBar(ABC, ProgressBar):
     def get_metrics(
         self, trainer: pl.Trainer, pl_module: pl.LightningModule
     ) -> dict[str, int | str | float | dict[str, float]]:
-        # NOTE: there might be a cleaner way of doing this
         items = super().get_metrics(trainer, pl_module)
         items.pop("v_num", None)
         if trainer.training and pl_module.training_step_outputs:
@@ -30,7 +33,8 @@ def print_results(
     ) -> None:
         """Prints results to the console.
 
-        This includes the stage name, loss value, and tables with metrics.
+        This includes the stage name, loss value, and tables with
+        metrics.
 
         @type stage: str
         @param stage: Stage name.
@@ -39,12 +43,13 @@ def print_results(
         @type metrics: Mapping[str, Mapping[str, int | str | float]]
         @param metrics: Metrics in format {table_name: table}.
         """
-        pass
+        ...
 
 
 @CALLBACKS.register_module()
 class LuxonisTQDMProgressBar(TQDMProgressBar, BaseLuxonisProgressBar):
-    """Custom text progress bar based on TQDMProgressBar from Pytorch Lightning."""
+    """Custom text progress bar based on TQDMProgressBar from Pytorch
+    Lightning."""
 
     def __init__(self):
         super().__init__(leave=True)
@@ -71,7 +76,8 @@ def _print_table(
         @type key_name: str
         @param key_name: Name of the key column. Defaults to C{"Name"}.
         @type value_name: str
-        @param value_name: Name of the value column. Defaults to C{"Value"}.
+        @param value_name: Name of the value column. Defaults to
+            C{"Value"}.
         """
         self._rule(title)
         print(
@@ -100,14 +106,15 @@ def print_results(
 
 @CALLBACKS.register_module()
 class LuxonisRichProgressBar(RichProgressBar, BaseLuxonisProgressBar):
-    """Custom rich text progress bar based on RichProgressBar from Pytorch Lightning."""
+    """Custom rich text progress bar based on RichProgressBar from
+    Pytorch Lightning."""
 
     def __init__(self):
         super().__init__(leave=True)
 
     @property
     def console(self) -> Console:
-        if self._console is None:
+        if self._console is None:  # pragma: no cover
             raise RuntimeError(
                 "Console is not initialized for the `LuxonisRichProgressBar`. "
                 "Consider setting `tracker.use_rich_progress_bar` to `False` in the configuration."
@@ -130,7 +137,8 @@ def print_table(
         @type key_name: str
         @param key_name: Name of the key column. Defaults to C{"Name"}.
         @type value_name: str
-        @param value_name: Name of the value column. Defaults to C{"Value"}.
+        @param value_name: Name of the value column. Defaults to
+            C{"Value"}.
         """
         rich_table = Table(
             title=title,
@@ -140,10 +148,7 @@ def print_table(
         rich_table.add_column(key_name, style="magenta")
         rich_table.add_column(value_name, style="white")
         for name, value in table.items():
-            if isinstance(value, float):
-                rich_table.add_row(name, f"{value:.5f}")
-            else:
-                rich_table.add_row(name, str(value))
+            rich_table.add_row(name, f"{value:.5f}")
         self.console.print(rich_table)
 
     def print_results(
@@ -153,7 +158,9 @@ def print_results(
         metrics: Mapping[str, Mapping[str, int | str | float]],
     ) -> None:
         self.console.rule(f"{stage}", style="bold magenta")
-        self.console.print(f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]")
+        self.console.print(
+            f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]"
+        )
         self.console.print("[bold magenta]Metrics:[/bold magenta]")
         for table_name, table in metrics.items():
             self.print_table(table_name, table)
diff --git a/luxonis_train/callbacks/metadata_logger.py b/luxonis_train/callbacks/metadata_logger.py
index 45ff8717..ab29f7d0 100644
--- a/luxonis_train/callbacks/metadata_logger.py
+++ b/luxonis_train/callbacks/metadata_logger.py
@@ -6,7 +6,7 @@
 import yaml
 
 import luxonis_train
-from luxonis_train.utils.config import Config
+from luxonis_train.utils import Config
 from luxonis_train.utils.registry import CALLBACKS
 
 
@@ -15,8 +15,9 @@ class MetadataLogger(pl.Callback):
     def __init__(self, hyperparams: list[str]):
         """Callback that logs training metadata.
 
-        Metadata include all defined hyperparameters together with git hashes of
-        luxonis-ml and luxonis-train packages. Also stores this information locally.
+        Metadata include all defined hyperparameters together with git
+        hashes of luxonis-ml and luxonis-train packages. Also stores
+        this information locally.
 
         @type hyperparams: list[str]
         @param hyperparams: List of hyperparameters to log.
@@ -25,30 +26,44 @@ def __init__(self, hyperparams: list[str]):
         self.hyperparams = hyperparams
 
     def on_fit_start(
-        self, _: pl.Trainer, pl_module: "luxonis_train.models.LuxonisLightningModule"
+        self,
+        _: pl.Trainer,
+        pl_module: "luxonis_train.models.LuxonisLightningModule",
     ) -> None:
         cfg: Config = pl_module.cfg
 
         hparams = {key: cfg.get(key) for key in self.hyperparams}
 
-        # try to get luxonis-ml and luxonis-train git commit hashes (if installed as editable)
         luxonis_ml_hash = self._get_editable_package_git_hash("luxonis_ml")
-        if luxonis_ml_hash:
+        if luxonis_ml_hash:  # pragma: no cover
             hparams["luxonis_ml"] = luxonis_ml_hash
 
-        luxonis_train_hash = self._get_editable_package_git_hash("luxonis_train")
-        if luxonis_train_hash:
+        luxonis_train_hash = self._get_editable_package_git_hash(
+            "luxonis_train"
+        )
+        if luxonis_train_hash:  # pragma: no cover
             hparams["luxonis_train"] = luxonis_train_hash
 
         pl_module.logger.log_hyperparams(hparams)
-        # also save metadata locally
-        with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w+") as f:
+        with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w") as f:
             yaml.dump(hparams, f, default_flow_style=False)
 
     @staticmethod
-    def _get_editable_package_git_hash(package_name: str) -> str | None:
+    def _get_editable_package_git_hash(
+        package_name: str,
+    ) -> str | None:  # pragma: no cover
+        """Get git hash of an editable package.
+
+        @type package_name: str
+        @param package_name: Name of the package.
+        @rtype: str or None
+        @return: Git hash of the package or None if the package is not
+            installed in editable mode.
+        """
         try:
             distribution = pkg_resources.get_distribution(package_name)
+            if distribution.location is None:
+                return None
             package_location = osp.join(distribution.location, package_name)
 
             # remove any additional folders in path (e.g. "/src")
diff --git a/luxonis_train/callbacks/module_freezer.py b/luxonis_train/callbacks/module_freezer.py
index 4f73ff30..de0afa99 100644
--- a/luxonis_train/callbacks/module_freezer.py
+++ b/luxonis_train/callbacks/module_freezer.py
@@ -13,7 +13,8 @@ def __init__(self, frozen_modules: list[tuple[nn.Module, int]]):
         """Callback that freezes parts of the model.
 
         @type frozen_modules: list[tuple[nn.Module, int]]
-        @param frozen_modules: List of tuples of modules and epochs to freeze until.
+        @param frozen_modules: List of tuples of modules and epochs to
+            freeze until.
         """
         super().__init__()
         self.frozen_modules = frozen_modules
diff --git a/luxonis_train/callbacks/needs_checkpoint.py b/luxonis_train/callbacks/needs_checkpoint.py
index 30355e82..b3de6aed 100644
--- a/luxonis_train/callbacks/needs_checkpoint.py
+++ b/luxonis_train/callbacks/needs_checkpoint.py
@@ -10,7 +10,9 @@
 
 class NeedsCheckpoint(pl.Callback):
     def __init__(
-        self, preferred_checkpoint: Literal["metric", "loss"] = "metric", **kwargs
+        self,
+        preferred_checkpoint: Literal["metric", "loss"] = "metric",
+        **kwargs,
     ):
         super().__init__(**kwargs)
         self.preferred_checkpoint = preferred_checkpoint
@@ -40,7 +42,8 @@ def _get_checkpoint(
                 )
             return path
 
-    def _get_other_type(self, checkpoint_type: str) -> str:
+    @staticmethod
+    def _get_other_type(checkpoint_type: str) -> str:
         if checkpoint_type == "loss":
             return "metric"
         return "loss"
diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py
index f2bb09ec..a60a16dd 100644
--- a/luxonis_train/callbacks/test_on_train_end.py
+++ b/luxonis_train/callbacks/test_on_train_end.py
@@ -27,4 +27,6 @@ def on_train_end(
         for callback in trainer.callbacks:  # type: ignore
             if isinstance(callback, ModelCheckpoint):
                 if hash(callback.monitor) in best_paths:
-                    callback.best_model_path = best_paths[hash(callback.monitor)]
+                    callback.best_model_path = best_paths[
+                        hash(callback.monitor)
+                    ]
diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
index 29da59ef..b9753e94 100644
--- a/luxonis_train/callbacks/upload_checkpoint.py
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -12,7 +12,8 @@
 
 @CALLBACKS.register_module()
 class UploadCheckpoint(pl.Callback):
-    """Callback that uploads best checkpoint based on the validation loss."""
+    """Callback that uploads best checkpoint based on the validation
+    loss."""
 
     def __init__(self):
         """Constructs `UploadCheckpoint`.
@@ -43,7 +44,9 @@ def on_save_checkpoint(
                 if curr_best_checkpoint not in self.last_best_checkpoints:
                     self.logger.info("Uploading checkpoint...")
                     temp_filename = (
-                        Path(curr_best_checkpoint).parent.with_suffix(".ckpt").name
+                        Path(curr_best_checkpoint)
+                        .parent.with_suffix(".ckpt")
+                        .name
                     )
                     torch.save(checkpoint, temp_filename)
                     module.logger.upload_artifact(temp_filename, typ="weights")
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index c683773c..cffa3ff1 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -3,7 +3,7 @@
 import threading
 from logging import getLogger
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any, Literal, Mapping, overload
 
 import lightning.pytorch as pl
 import lightning_utilities.core.rank_zero as rank_zero_module
@@ -16,15 +16,17 @@
 from luxonis_ml.nn_archive import ArchiveGenerator
 from luxonis_ml.nn_archive.config import CONFIG_VERSION
 from luxonis_ml.utils import LuxonisFileSystem, reset_logging, setup_logging
+from typeguard import typechecked
 
 from luxonis_train.attached_modules.visualizers import get_unnormalized_images
-from luxonis_train.callbacks import LuxonisRichProgressBar, LuxonisTQDMProgressBar
+from luxonis_train.callbacks import (
+    LuxonisRichProgressBar,
+    LuxonisTQDMProgressBar,
+)
+from luxonis_train.loaders import BaseLoaderTorch, collate_fn
 from luxonis_train.models import LuxonisLightningModule
-from luxonis_train.utils.config import Config
-from luxonis_train.utils.general import DatasetMetadata
-from luxonis_train.utils.loaders import BaseLoaderTorch, collate_fn
+from luxonis_train.utils import Config, DatasetMetadata, LuxonisTrackerPL
 from luxonis_train.utils.registry import LOADERS
-from luxonis_train.utils.tracker import LuxonisTrackerPL
 
 from .utils.export_utils import (
     blobconverter_export,
@@ -41,8 +43,8 @@
 class LuxonisModel:
     """Common logic of the core components.
 
-    This class contains common logic of the core components (trainer, evaluator,
-    exporter, etc.).
+    This class contains common logic of the core components (trainer,
+    evaluator, exporter, etc.).
     """
 
     def __init__(
@@ -80,6 +82,7 @@ def __init__(
             self.cfg.tracker.save_directory, self.tracker.run_name
         )
         self.log_file = osp.join(self.run_save_dir, "luxonis_train.log")
+        self.error_message = None
 
         # NOTE: to add the file handler (we only get the save dir now,
         # but we want to use the logger before)
@@ -89,10 +92,16 @@ def __init__(
         # NOTE: overriding logger in pl so it uses our logger to log device info
         rank_zero_module.log = logger
 
-        deterministic = False
         if self.cfg.trainer.seed is not None:
             pl.seed_everything(self.cfg.trainer.seed, workers=True)
-            deterministic = True
+
+        self.pl_trainer = create_trainer(
+            self.cfg.trainer,
+            logger=self.tracker,
+            callbacks=LuxonisRichProgressBar()
+            if self.cfg.trainer.use_rich_progress_bar
+            else LuxonisTQDMProgressBar(),
+        )
 
         self.train_augmentations = Augmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
@@ -114,15 +123,6 @@ def __init__(
             only_normalize=True,
         )
 
-        self.pl_trainer = create_trainer(
-            self.cfg,
-            logger=self.tracker,
-            deterministic=deterministic,
-            callbacks=LuxonisRichProgressBar()
-            if self.cfg.trainer.use_rich_progress_bar
-            else LuxonisTQDMProgressBar(),
-        )
-
         self.loaders: dict[str, BaseLoaderTorch] = {}
         for view in ["train", "val", "test"]:
             loader_name = self.cfg.loader.name
@@ -155,27 +155,31 @@ def __init__(
         sampler = None
         # TODO: implement weighted sampler
         if self.cfg.trainer.use_weighted_sampler:
-            raise NotImplementedError("Weighted sampler is not implemented yet.")
+            raise NotImplementedError(
+                "Weighted sampler is not implemented yet."
+            )
 
         self.pytorch_loaders = {
             view: torch_data.DataLoader(
                 self.loaders[view],
                 batch_size=self.cfg.trainer.batch_size,
-                num_workers=self.cfg.trainer.num_workers,
+                num_workers=self.cfg.trainer.n_workers,
                 collate_fn=collate_fn,
                 shuffle=view == "train",
                 drop_last=(
-                    self.cfg.trainer.skip_last_batch if view == "train" else False
+                    self.cfg.trainer.skip_last_batch
+                    if view == "train"
+                    else False
                 ),
                 pin_memory=self.cfg.trainer.pin_memory,
                 sampler=sampler if view == "train" else None,
             )
             for view in ["train", "val", "test"]
         }
-        self.error_message = None
 
-        self.dataset_metadata = DatasetMetadata.from_loader(self.loaders["train"])
-        self.dataset_metadata.set_loader(self.pytorch_loaders["train"])
+        self.dataset_metadata = DatasetMetadata.from_loader(
+            self.loaders["train"]
+        )
 
         self.cfg.save_data(osp.join(self.run_save_dir, "config.yaml"))
 
@@ -195,7 +199,7 @@ def _train(self, resume: str | None, *args, **kwargs):
         status = "success"
         try:
             self.pl_trainer.fit(*args, ckpt_path=resume, **kwargs)
-        except Exception as e:
+        except Exception as e:  # pragma: no cover
             logger.exception("Encountered an exception during training.")
             status = "failed"
             raise e
@@ -211,29 +215,34 @@ def train(
         @type new_thread: bool
         @param new_thread: Runs training in new thread if set to True.
         @type resume_weights: str | None
-        @param resume_weights: Path to checkpoint to resume training from.
+        @param resume_weights: Path to the checkpoint from which to to
+            resume the training.
         """
 
         if self.cfg.trainer.matmul_precision is not None:
             logger.info(
                 f"Setting matmul precision to {self.cfg.trainer.matmul_precision}"
             )
-            torch.set_float32_matmul_precision(self.cfg.trainer.matmul_precision)
+            torch.set_float32_matmul_precision(
+                self.cfg.trainer.matmul_precision
+            )
 
         if resume_weights is not None:
             resume_weights = str(
                 LuxonisFileSystem.download(resume_weights, self.run_save_dir)
             )
 
-        def graceful_exit(signum: int, _):
-            logger.info(f"{signal.Signals(signum).name} received, stopping training...")
+        def graceful_exit(signum: int, _):  # pragma: no cover
+            logger.info(
+                f"{signal.Signals(signum).name} received, stopping training..."
+            )
             ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
             self.pl_trainer.save_checkpoint(ckpt_path)
             self.tracker.upload_artifact(
                 ckpt_path, typ="checkpoints", name="resume.ckpt"
             )
             self.tracker._finalize(status="failed")
-            exit(0)
+            exit()
 
         signal.signal(signal.SIGTERM, graceful_exit)
 
@@ -249,7 +258,7 @@ def graceful_exit(signum: int, _):
             logger.info("Training finished")
             logger.info(f"Checkpoints saved in: {self.run_save_dir}")
 
-        else:
+        else:  # pragma: no cover
             # Every time exception happens in the Thread, this hook will activate
             def thread_exception_hook(args):
                 self.error_message = str(args.exc_value)
@@ -269,7 +278,10 @@ def thread_exception_hook(args):
             self.thread.start()
 
     def export(
-        self, onnx_save_path: str | None = None, *, weights: str | Path | None = None
+        self,
+        onnx_save_path: str | None = None,
+        *,
+        weights: str | Path | None = None,
     ) -> None:
         """Runs export.
 
@@ -290,8 +302,12 @@ def export(
         export_save_dir = Path(self.run_save_dir, "export")
         export_save_dir.mkdir(parents=True, exist_ok=True)
 
-        export_path = export_save_dir / (self.cfg.exporter.name or self.cfg.model.name)
-        onnx_save_path = onnx_save_path or str(export_path.with_suffix(".onnx"))
+        export_path = export_save_dir / (
+            self.cfg.exporter.name or self.cfg.model.name
+        )
+        onnx_save_path = onnx_save_path or str(
+            export_path.with_suffix(".onnx")
+        )
 
         with replace_weights(self.lightning_module, weights):
             output_names = self.lightning_module.export_onnx(
@@ -301,7 +317,9 @@ def export(
         try_onnx_simplify(onnx_save_path)
         self._exported_models["onnx"] = Path(onnx_save_path)
 
-        scale_values, mean_values, reverse_channels = get_preprocessing(self.cfg)
+        scale_values, mean_values, reverse_channels = get_preprocessing(
+            self.cfg
+        )
 
         if self.cfg.exporter.blobconverter.active:
             try:
@@ -313,7 +331,9 @@ def export(
                     str(export_save_dir),
                     onnx_save_path,
                 )
-                self._exported_models["blob"] = export_path.with_suffix(".blob")
+                self._exported_models["blob"] = export_path.with_suffix(
+                    ".blob"
+                )
             except ImportError:
                 logger.error("Failed to import `blobconverter`")
                 logger.warning(
@@ -340,36 +360,52 @@ def export(
         for path in self._exported_models.values():
             if self.cfg.exporter.upload_to_run:
                 self.tracker.upload_artifact(path, typ="export")
-            if self.cfg.exporter.upload_url is not None:
+            if self.cfg.exporter.upload_url is not None:  # pragma: no cover
                 LuxonisFileSystem.upload(path, self.cfg.exporter.upload_url)
 
         with open(export_path.with_suffix(".yaml"), "w") as f:
             yaml.dump(modelconverter_config, f)
             if self.cfg.exporter.upload_to_run:
                 self.tracker.upload_artifact(f.name, name=f.name, typ="export")
-            if self.cfg.exporter.upload_url is not None:
+            if self.cfg.exporter.upload_url is not None:  # pragma: no cover
                 LuxonisFileSystem.upload(f.name, self.cfg.exporter.upload_url)
 
+    @overload
     def test(
-        self, new_thread: bool = False, view: Literal["train", "test", "val"] = "val"
-    ) -> None:
+        self,
+        new_thread: Literal[False] = ...,
+        view: Literal["train", "test", "val"] = "val",
+    ) -> Mapping[str, float]: ...
+
+    @overload
+    def test(
+        self,
+        new_thread: Literal[True] = ...,
+        view: Literal["train", "test", "val"] = "val",
+    ) -> None: ...
+
+    @typechecked
+    def test(
+        self,
+        new_thread: bool = False,
+        view: Literal["train", "val", "test"] = "val",
+    ) -> Mapping[str, float] | None:
         """Runs testing.
 
         @type new_thread: bool
         @param new_thread: Runs testing in a new thread if set to True.
         @type view: Literal["train", "test", "val"]
         @param view: Which view to run the testing on. Defauls to "val".
+        @rtype: Mapping[str, float] | None
+        @return: If new_thread is False, returns a dictionary test
+            results.
         """
 
-        if view not in self.pytorch_loaders:
-            raise ValueError(
-                f"View {view} is not valid. Valid views are: 'train', 'val', 'test'."
-            )
         loader = self.pytorch_loaders[view]
 
         if not new_thread:
-            self.pl_trainer.test(self.lightning_module, loader)
-        else:
+            return self.pl_trainer.test(self.lightning_module, loader)[0]
+        else:  # pragma: no cover
             self.thread = threading.Thread(
                 target=self.pl_trainer.test,
                 args=(self.lightning_module, loader),
@@ -377,22 +413,24 @@ def test(
             )
             self.thread.start()
 
-    def infer(self, view: str = "val", save_dir: str | Path | None = None) -> None:
+    @typechecked
+    def infer(
+        self,
+        view: Literal["train", "val", "test"] = "val",
+        save_dir: str | Path | None = None,
+    ) -> None:
         """Runs inference.
 
         @type view: str
-        @param view: Which split to run the inference on. Valid values are: 'train',
-            'val', 'test'. Defaults to "val".
+        @param view: Which split to run the inference on. Valid values
+            are: 'train', 'val', 'test'. Defaults to "val".
         @type save_dir: str | Path | None
-        @param save_dir: Directory where to save the visualizations. If not specified,
-            visualizations will be rendered on the screen.
+        @param save_dir: Directory where to save the visualizations. If
+            not specified, visualizations will be rendered on the
+            screen.
         """
         self.lightning_module.eval()
 
-        if view not in self.pytorch_loaders:
-            raise ValueError(
-                f"View {view} is not valid. Valid views are: 'train', 'val', 'test'."
-            )
         for inputs, labels in self.pytorch_loaders[view]:
             images = get_unnormalized_images(self.cfg, inputs)
             outputs = self.lightning_module.forward(
@@ -418,18 +456,24 @@ def _objective(trial: optuna.trial.Trial) -> float:
                 **tracker_params,
             )
 
-            run_save_dir = osp.join(cfg_tracker.save_directory, child_tracker.run_name)
+            run_save_dir = osp.join(
+                cfg_tracker.save_directory, child_tracker.run_name
+            )
 
             assert self.cfg.tuner is not None
-            curr_params = get_trial_params(all_augs, self.cfg.tuner.params, trial)
+            curr_params = get_trial_params(
+                all_augs, self.cfg.tuner.params, trial
+            )
             curr_params["model.predefined_model"] = None
 
             cfg_copy = self.cfg.model_copy(deep=True)
+            # manually remove Normalize so it doesn't
+            # get duplicated when creating new cfg instance
             cfg_copy.trainer.preprocessing.augmentations = [
                 a
                 for a in cfg_copy.trainer.preprocessing.augmentations
                 if a.name != "Normalize"
-            ]  # manually remove Normalize so it doesn't duplicate it when creating new cfg instance
+            ]
             cfg = Config.get_config(cfg_copy.model_dump(), curr_params)
 
             child_tracker.log_hyperparams(curr_params)
@@ -449,18 +493,16 @@ def _objective(trial: optuna.trial.Trial) -> float:
                 else LuxonisTQDMProgressBar()
             ]
 
-            pruner_callback = PyTorchLightningPruningCallback(trial, monitor="val/loss")
+            pruner_callback = PyTorchLightningPruningCallback(
+                trial, monitor="val/loss"
+            )
             callbacks.append(pruner_callback)
-            deterministic = False
-            if self.cfg.trainer.seed:
+
+            if self.cfg.trainer.seed is not None:
                 pl.seed_everything(cfg.trainer.seed, workers=True)
-                deterministic = True
 
             pl_trainer = create_trainer(
-                cfg,
-                logger=child_tracker,
-                callbacks=callbacks,
-                deterministic=deterministic,
+                cfg.trainer, logger=child_tracker, callbacks=callbacks
             )
 
             try:
@@ -475,7 +517,9 @@ def _objective(trial: optuna.trial.Trial) -> float:
             except optuna.TrialPruned as e:
                 logger.info(e)
 
-            if "val/loss" not in pl_trainer.callback_metrics:
+            if (
+                "val/loss" not in pl_trainer.callback_metrics
+            ):  # pragma: no cover
                 raise ValueError(
                     "No validation loss found. "
                     "This can happen if `TestOnTrainEnd` callback is used."
@@ -485,9 +529,13 @@ def _objective(trial: optuna.trial.Trial) -> float:
 
         cfg_tuner = self.cfg.tuner
         if cfg_tuner is None:
-            raise ValueError("You have to specify the `tuner` section in config.")
+            raise ValueError(
+                "You have to specify the `tuner` section in config."
+            )
 
-        all_augs = [a.name for a in self.cfg.trainer.preprocessing.augmentations]
+        all_augs = [
+            a.name for a in self.cfg.trainer.preprocessing.augmentations
+        ]
         rank = rank_zero_only.rank
         cfg_tracker = self.cfg.tracker
         tracker_params = cfg_tracker.model_dump()
@@ -499,7 +547,7 @@ def _objective(trial: optuna.trial.Trial) -> float:
             is_sweep=False,
             **tracker_params,
         )
-        if self.parent_tracker.is_mlflow:
+        if self.parent_tracker.is_mlflow:  # pragma: no cover
             # Experiment needs to be interacted with to create actual MLFlow run
             self.parent_tracker.experiment["mlflow"].active_run()
 
@@ -515,7 +563,7 @@ def _objective(trial: optuna.trial.Trial) -> float:
         if cfg_tuner.storage.active:
             if cfg_tuner.storage.storage_type == "local":
                 storage = "sqlite:///study_local.db"
-            else:
+            else:  # pragma: no cover
                 storage = "postgresql://{}:{}@{}:{}/{}".format(
                     self.cfg.ENVIRON.POSTGRES_USER,
                     self.cfg.ENVIRON.POSTGRES_PASSWORD,
@@ -540,7 +588,7 @@ def _objective(trial: optuna.trial.Trial) -> float:
 
         self.parent_tracker.log_hyperparams(study.best_params)
 
-        if self.cfg.tracker.is_wandb:
+        if self.cfg.tracker.is_wandb:  # pragma: no cover
             # If wandb used then init parent tracker separately at the end
             wandb_parent_tracker = LuxonisTrackerPL(
                 rank=rank_zero_only.rank,
@@ -555,8 +603,8 @@ def archive(self, path: str | Path | None = None) -> Path:
         """Generates an NN Archive out of a model executable.
 
         @type path: str | Path | None
-        @param path: Path to the model executable. If not specified, the model will be
-            exported first.
+        @param path: Path to the model executable. If not specified, the
+            model will be exported first.
         @rtype: Path
         @return: Path to the generated NN Archive.
         """
@@ -583,8 +631,12 @@ def _mult(lst: list[float | int]) -> list[float]:
             return [round(x * 255.0, 5) for x in lst]
 
         preprocessing = {  # TODO: keep preprocessing same for each input?
-            "mean": _mult(self.cfg.trainer.preprocessing.normalize.params["mean"]),
-            "scale": _mult(self.cfg.trainer.preprocessing.normalize.params["std"]),
+            "mean": _mult(
+                self.cfg.trainer.preprocessing.normalize.params["mean"]
+            ),
+            "scale": _mult(
+                self.cfg.trainer.preprocessing.normalize.params["std"]
+            ),
             "reverse_channels": self.cfg.trainer.preprocessing.train_rgb,
             "interleaved_to_planar": False,  # TODO: make it modifiable?
         }
@@ -642,8 +694,10 @@ def _mult(lst: list[float | int]) -> list[float]:
 
         logger.info(f"NN Archive saved to {archive_path}")
 
-        if self.cfg.archiver.upload_url is not None:
-            LuxonisFileSystem.upload(archive_path, self.cfg.archiver.upload_url)
+        if self.cfg.archiver.upload_url is not None:  # pragma: no cover
+            LuxonisFileSystem.upload(
+                archive_path, self.cfg.archiver.upload_url
+            )
 
         if self.cfg.archiver.upload_to_run:
             self.tracker.upload_artifact(archive_path, typ="archive")
@@ -655,14 +709,15 @@ def get_status(self) -> tuple[int, int]:
         """Get current status of training.
 
         @rtype: tuple[int, int]
-        @return: First element is current epoch, second element is total number of
-            epochs.
+        @return: First element is current epoch, second element is total
+            number of epochs.
         """
         return self.lightning_module.get_status()
 
     @rank_zero_only
     def get_status_percentage(self) -> float:
-        """Return percentage of current training, takes into account early stopping.
+        """Return percentage of current training, takes into account
+        early stopping.
 
         @rtype: float
         @return: Percentage of current training in range 0-100.
@@ -671,7 +726,8 @@ def get_status_percentage(self) -> float:
 
     @rank_zero_only
     def get_error_message(self) -> str | None:
-        """Return error message if one occurs while running in thread, otherwise None.
+        """Return error message if one occurs while running in thread,
+        otherwise None.
 
         @rtype: str | None
         @return: Error message
@@ -680,10 +736,12 @@ def get_error_message(self) -> str | None:
 
     @rank_zero_only
     def get_min_loss_checkpoint_path(self) -> str | None:
-        """Return best checkpoint path with respect to minimal validation loss.
+        """Return best checkpoint path with respect to minimal
+        validation loss.
 
         @rtype: str
-        @return: Path to best checkpoint with respect to minimal validation loss
+        @return: Path to best checkpoint with respect to minimal
+            validation loss
         """
         if not self.pl_trainer.checkpoint_callbacks:
             return None
@@ -691,10 +749,12 @@ def get_min_loss_checkpoint_path(self) -> str | None:
 
     @rank_zero_only
     def get_best_metric_checkpoint_path(self) -> str | None:
-        """Return best checkpoint path with respect to best validation metric.
+        """Return best checkpoint path with respect to best validation
+        metric.
 
         @rtype: str
-        @return: Path to best checkpoint with respect to best validation metric
+        @return: Path to best checkpoint with respect to best validation
+            metric
         """
         if len(self.pl_trainer.checkpoint_callbacks) < 2:
             return None
diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py
index 72cdefc7..96c2bcde 100644
--- a/luxonis_train/core/utils/archive_utils.py
+++ b/luxonis_train/core/utils/archive_utils.py
@@ -15,7 +15,7 @@
     ImplementedHeads,
     ImplementedHeadsIsSoxtmaxed,
 )
-from luxonis_train.utils.config import Config
+from luxonis_train.utils import Config
 
 logger = logging.getLogger(__name__)
 
@@ -63,7 +63,7 @@ def _from_onnx_dtype(dtype: int) -> DataType:
         TensorProto.FLOAT: "float32",
         TensorProto.FLOAT16: "float16",
     }
-    if dtype not in dtype_map:
+    if dtype not in dtype_map:  # pragma: no cover
         raise ValueError(f"Unsupported ONNX data type: `{dtype}`")
 
     return DataType(dtype_map[dtype])
@@ -72,7 +72,7 @@ def _from_onnx_dtype(dtype: int) -> DataType:
 def _load_onnx_model(onnx_path: Path) -> onnx.ModelProto:
     try:
         return onnx.load(str(onnx_path))
-    except Exception as e:
+    except Exception as e:  # pragma: no cover
         raise ValueError(f"Failed to load ONNX model: `{onnx_path}`") from e
 
 
@@ -98,7 +98,9 @@ def _get_onnx_inputs(onnx_path: Path) -> dict[str, MetadataDict]:
     for inp in model.graph.input:
         shape = [dim.dim_value for dim in inp.type.tensor_type.shape.dim]
         inputs[inp.name]["shape"] = shape
-        inputs[inp.name]["dtype"] = _from_onnx_dtype(inp.type.tensor_type.elem_type)
+        inputs[inp.name]["dtype"] = _from_onnx_dtype(
+            inp.type.tensor_type.elem_type
+        )
 
     return inputs
 
@@ -116,7 +118,7 @@ def _get_classes(
                 node_task = "segmentation"
             case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead":
                 node_task = "keypoints"
-            case _:
+            case _:  # pragma: no cover
                 raise ValueError("Node does not map to a default task.")
 
     return classes.get(node_task, [])
@@ -137,7 +139,9 @@ def _get_head_specific_parameters(
 
     parameters = {}
     if head_name == "ClassificationHead":
-        parameters["is_softmax"] = getattr(ImplementedHeadsIsSoxtmaxed, head_name).value
+        parameters["is_softmax"] = getattr(
+            ImplementedHeadsIsSoxtmaxed, head_name
+        ).value
     elif head_name == "EfficientBBoxHead":
         parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv6.value
         head_node = nodes[head_alias]
@@ -145,7 +149,9 @@ def _get_head_specific_parameters(
         parameters["conf_threshold"] = head_node.conf_thres
         parameters["max_det"] = head_node.max_det
     elif head_name in ["SegmentationHead", "BiSeNetHead"]:
-        parameters["is_softmax"] = getattr(ImplementedHeadsIsSoxtmaxed, head_name).value
+        parameters["is_softmax"] = getattr(
+            ImplementedHeadsIsSoxtmaxed, head_name
+        ).value
     elif head_name == "ImplicitKeypointBBoxHead":
         parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value
         head_node = nodes[head_alias]
@@ -161,18 +167,21 @@ def _get_head_specific_parameters(
         parameters["conf_threshold"] = head_node.conf_thres
         parameters["max_det"] = head_node.max_det
         parameters["n_keypoints"] = head_node.n_keypoints
-    else:
+    else:  # pragma: no cover
         raise ValueError("Unknown head name")
     return parameters
 
 
-def _get_head_outputs(outputs: list[dict], head_name: str, head_type: str) -> list[str]:
+def _get_head_outputs(
+    outputs: list[dict], head_name: str, head_type: str
+) -> list[str]:
     """Get model outputs in a head-specific format.
 
     @type outputs: list[dict]
     @param outputs: List of NN Archive outputs.
     @type head_name: str
-    @param head_name: Type of the head (e.g. 'EfficientBBoxHead') or its custom alias.
+    @param head_name: Type of the head (e.g. 'EfficientBBoxHead') or its
+        custom alias.
     @type head_type: str
     @param head_name: Type of the head (e.g. 'EfficientBBoxHead').
     @rtype: list[str]
@@ -238,7 +247,9 @@ def get_heads(
                     task = str(next(iter(task.values())))
 
                 classes = _get_classes(node_name, task, class_dict)
-                head_outputs = _get_head_outputs(outputs, node_alias, node_name)
+                head_outputs = _get_head_outputs(
+                    outputs, node_alias, node_name
+                )
                 head_dict = {
                     "parser": parser,
                     "metadata": {
diff --git a/luxonis_train/core/utils/export_utils.py b/luxonis_train/core/utils/export_utils.py
index 3b34a912..b4863f1b 100644
--- a/luxonis_train/core/utils/export_utils.py
+++ b/luxonis_train/core/utils/export_utils.py
@@ -42,7 +42,7 @@ def try_onnx_simplify(onnx_path: str) -> None:
         model_onnx = onnx.load(onnx_path)
         onnx_model, check = onnxsim.simplify(model_onnx)
         if not check:
-            raise RuntimeError("ONNX simplify failed.")
+            raise RuntimeError("ONNX simplify failed.")  # pragma: no cover
         onnx.save(onnx_model, onnx_path)
         logger.info(f"ONNX model saved to {onnx_path}")
 
@@ -52,7 +52,7 @@ def try_onnx_simplify(onnx_path: str) -> None:
             "`onnxsim` not installed. Skipping ONNX model simplification. "
             "Ensure `onnxsim` is installed in your environment."
         )
-    except RuntimeError:
+    except RuntimeError:  # pragma: no cover
         logger.error(
             "Failed to simplify ONNX model. Proceeding without simplification."
         )
@@ -100,7 +100,7 @@ def blobconverter_export(
 
     logger.info("Converting ONNX to .blob")
 
-    optimizer_params = []
+    optimizer_params: list[str] = []
     if scale_values:
         optimizer_params.append(f"--scale_values={scale_values}")
     if mean_values:
@@ -111,7 +111,7 @@ def blobconverter_export(
     blob_path = blobconverter.from_onnx(
         model=onnx_path,
         optimizer_params=optimizer_params,
-        data_type=cfg.data_type,
+        data_type=cfg.data_type.upper(),
         shaves=cfg.blobconverter.shaves,
         version=cfg.blobconverter.version,
         use_cache=False,
diff --git a/luxonis_train/core/utils/train_utils.py b/luxonis_train/core/utils/train_utils.py
index 3a45a85b..73b615cb 100644
--- a/luxonis_train/core/utils/train_utils.py
+++ b/luxonis_train/core/utils/train_utils.py
@@ -1,9 +1,11 @@
+from typing import Any
+
 import lightning.pytorch as pl
 
-from luxonis_train.utils.config import Config
+from luxonis_train.utils.config import TrainerConfig
 
 
-def create_trainer(cfg: Config, **kwargs) -> pl.Trainer:
+def create_trainer(cfg: TrainerConfig, **kwargs: Any) -> pl.Trainer:
     """Creates Pytorch Lightning trainer.
 
     @type cfg: Config
@@ -13,13 +15,14 @@ def create_trainer(cfg: Config, **kwargs) -> pl.Trainer:
     @return: Pytorch Lightning trainer.
     """
     return pl.Trainer(
-        accelerator=cfg.trainer.accelerator,
-        devices=cfg.trainer.devices,
-        strategy=cfg.trainer.strategy,
-        max_epochs=cfg.trainer.epochs,
-        accumulate_grad_batches=cfg.trainer.accumulate_grad_batches,
-        check_val_every_n_epoch=cfg.trainer.validation_interval,
-        num_sanity_val_steps=cfg.trainer.num_sanity_val_steps,
-        profiler=cfg.trainer.profiler,
+        accelerator=cfg.accelerator,
+        devices=cfg.devices,
+        strategy=cfg.strategy,
+        max_epochs=cfg.epochs,
+        accumulate_grad_batches=cfg.accumulate_grad_batches,
+        check_val_every_n_epoch=cfg.validation_interval,
+        num_sanity_val_steps=cfg.n_sanity_val_steps,
+        profiler=cfg.profiler,
+        deterministic=cfg.deterministic,
         **kwargs,
     )
diff --git a/luxonis_train/core/utils/tune_utils.py b/luxonis_train/core/utils/tune_utils.py
index e2fe692e..d9d6c4c0 100644
--- a/luxonis_train/core/utils/tune_utils.py
+++ b/luxonis_train/core/utils/tune_utils.py
@@ -61,17 +61,23 @@ def get_trial_params(
             case "int", [int(low), int(high), *tail]:
                 step = tail[0] if tail else 1
                 if not isinstance(step, int):
-                    raise ValueError(f"Step for int type must be int, but got {step}")
+                    raise ValueError(
+                        f"Step for int type must be int, but got {step}"
+                    )
                 new_value = trial.suggest_int(key_name, low, high, step=step)
             case "loguniform", [float(low), float(high)]:
                 new_value = trial.suggest_loguniform(key_name, low, high)
             case "uniform", [float(low), float(high)]:
                 new_value = trial.suggest_uniform(key_name, low, high)
             case _, _:
-                raise KeyError(f"Combination of {key_type} and {value} not supported")
+                raise KeyError(
+                    f"Combination of {key_type} and {value} not supported"
+                )
 
         new_params[key_name] = new_value
 
     if len(new_params) == 0:
-        raise ValueError("No paramteres to tune. Specify them under `tuner.params`.")
+        raise ValueError(
+            "No paramteres to tune. Specify them under `tuner.params`."
+        )
     return new_params
diff --git a/luxonis_train/utils/loaders/__init__.py b/luxonis_train/loaders/__init__.py
similarity index 100%
rename from luxonis_train/utils/loaders/__init__.py
rename to luxonis_train/loaders/__init__.py
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/loaders/base_loader.py
similarity index 65%
rename from luxonis_train/utils/loaders/base_loader.py
rename to luxonis_train/loaders/base_loader.py
index 5e884955..b6b8a863 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/loaders/base_loader.py
@@ -1,16 +1,17 @@
 from abc import ABC, abstractmethod
 
 import torch
-from luxonis_ml.data import Augmentations
+from luxonis_ml.data import Augmentations, LabelType
 from luxonis_ml.utils.registry import AutoRegisterMeta
 from torch import Size, Tensor
 from torch.utils.data import Dataset
 
 from luxonis_train.utils.registry import LOADERS
-from luxonis_train.utils.types import Labels, LabelType
+from luxonis_train.utils.types import Labels
 
 LuxonisLoaderTorchOutput = tuple[dict[str, Tensor], Labels]
-"""LuxonisLoaderTorchOutput is a tuple of source tensors and corresponding labels."""
+"""LuxonisLoaderTorchOutput is a tuple of source tensors and
+corresponding labels."""
 
 
 class BaseLoaderTorch(
@@ -20,8 +21,8 @@ class BaseLoaderTorch(
     register=False,
     registry=LOADERS,
 ):
-    """Base abstract loader class that enforces LuxonisLoaderTorchOutput output label
-    structure."""
+    """Base abstract loader class that enforces LuxonisLoaderTorchOutput
+    output label structure."""
 
     def __init__(
         self,
@@ -38,6 +39,8 @@ def image_source(self) -> str:
         """Name of the input image group.
 
         Example: 'image'
+
+        @type: str
         """
         if self._image_source is None:
             raise ValueError("image_source is not set")
@@ -47,39 +50,46 @@ def image_source(self) -> str:
     @abstractmethod
     def input_shapes(self) -> dict[str, Size]:
         """
-        Shape of each loader group (sub-element), WITHOUT batch dimension.
+        Shape (c, h, w) of each loader group (sub-element), WITHOUT batch dimension.
         Examples:
 
-        1. Single image input::
-            {
-                'image': torch.Size([3, 224, 224]),
-            }
-
-        2. Image and segmentation input::
-            {
-                'image': torch.Size([3, 224, 224]),
-                'segmentation': torch.Size([1, 224, 224]),
-            }
-
-        3. Left image, right image and disparity input::
-            {
-                'left': torch.Size([3, 224, 224]),
-                'right': torch.Size([3, 224, 224]),
-                'disparity': torch.Size([1, 224, 224]),
-            }
-
-        4. Image, keypoints, and point cloud input::
-            {
-                'image': torch.Size([3, 224, 224]),
-                'keypoints': torch.Size([17, 2]),
-                'point_cloud': torch.Size([20000, 3]),
-            }
-
-        @rtype: dict[str, Size]
-        @return: A dictionary mapping group names to their shapes.
+            1. Single image input::
+                {
+                    'image': torch.Size([3, 224, 224]),
+                }
+
+            2. Image and segmentation input::
+                {
+                    'image': torch.Size([3, 224, 224]),
+                    'segmentation': torch.Size([1, 224, 224]),
+                }
+
+            3. Left image, right image and disparity input::
+                {
+                    'left': torch.Size([3, 224, 224]),
+                    'right': torch.Size([3, 224, 224]),
+                    'disparity': torch.Size([1, 224, 224]),
+                }
+
+            4. Image, keypoints, and point cloud input::
+                {
+                    'image': torch.Size([3, 224, 224]),
+                    'keypoints': torch.Size([17, 2]),
+                    'point_cloud': torch.Size([20000, 3]),
+                }
+
+        @type: dict[str, Size]
         """
         ...
 
+    @property
+    def input_shape(self) -> Size:
+        """Shape (c, h, w) of the input tensor, WITHOUT batch dimension.
+
+        @type: torch.Size
+        """
+        return self.input_shapes[self.image_source]
+
     @abstractmethod
     def __len__(self) -> int:
         """Returns length of the dataset."""
@@ -106,11 +116,12 @@ def get_classes(self) -> dict[str, list[str]]:
         ...
 
     def get_n_keypoints(self) -> dict[str, int] | None:
-        """Returns the dictionary defining the semantic skeleton for each class using
-        keypoints.
+        """Returns the dictionary defining the semantic skeleton for
+        each class using keypoints.
 
         @rtype: Dict[str, Dict]
-        @return: A dictionary mapping classes to their skeleton definitions.
+        @return: A dictionary mapping classes to their skeleton
+            definitions.
         """
         return None
 
@@ -121,19 +132,21 @@ def collate_fn(
     """Default collate function used for training.
 
     @type batch: list[LuxonisLoaderTorchOutput]
-    @param batch: List of loader outputs (dict of Tensors) and labels (dict of Tensors)
-        in the LuxonisLoaderTorchOutput format.
+    @param batch: List of loader outputs (dict of Tensors) and labels
+        (dict of Tensors) in the LuxonisLoaderTorchOutput format.
     @rtype: tuple[dict[str, Tensor], dict[LabelType, Tensor]]
-    @return: Tuple of inputs and annotations in the format expected by the model.
+    @return: Tuple of inputs and annotations in the format expected by
+        the model.
     """
     inputs: tuple[dict[str, Tensor], ...]
     labels: tuple[Labels, ...]
     inputs, labels = zip(*batch)
 
-    out_inputs = {k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys()}
-    out_labels = {task: {} for task in labels[0].keys()}
+    out_inputs = {
+        k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys()
+    }
 
-    out_labels = {}
+    out_labels: Labels = {}
 
     for task in labels[0].keys():
         label_type = labels[0][task][1]
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/loaders/luxonis_loader_torch.py
similarity index 98%
rename from luxonis_train/utils/loaders/luxonis_loader_torch.py
rename to luxonis_train/loaders/luxonis_loader_torch.py
index 328f87be..8286a7a2 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/loaders/luxonis_loader_torch.py
@@ -156,7 +156,9 @@ def _parse_dataset(
                 f"Supported types are: {', '.join(DatasetType.__members__)}."
             )
 
-        logger.info(f"Parsing dataset from {dataset_dir} with name '{dataset_name}'")
+        logger.info(
+            f"Parsing dataset from {dataset_dir} with name '{dataset_name}'"
+        )
 
         return LuxonisParser(
             dataset_dir,
diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py
index a3671dac..2bbf8ca9 100644
--- a/luxonis_train/models/luxonis_lightning.py
+++ b/luxonis_train/models/luxonis_lightning.py
@@ -1,6 +1,7 @@
 from collections import defaultdict
 from collections.abc import Mapping
 from logging import getLogger
+from pathlib import Path
 from typing import Literal, cast
 
 import lightning.pytorch as pl
@@ -17,21 +18,32 @@
     BaseMetric,
     BaseVisualizer,
 )
-from luxonis_train.attached_modules.metrics.common import TorchMetricWrapper
+from luxonis_train.attached_modules.metrics.torchmetrics import (
+    TorchMetricWrapper,
+)
 from luxonis_train.attached_modules.visualizers import (
     combine_visualizations,
     get_unnormalized_images,
 )
-from luxonis_train.callbacks import (
-    BaseLuxonisProgressBar,
-    ModuleFreezer,
-)
+from luxonis_train.callbacks import BaseLuxonisProgressBar, ModuleFreezer
 from luxonis_train.nodes import BaseNode
+from luxonis_train.utils import (
+    DatasetMetadata,
+    Kwargs,
+    Labels,
+    LuxonisTrackerPL,
+    Packet,
+    to_shape_packet,
+    traverse_graph,
+)
 from luxonis_train.utils.config import AttachedModuleConfig, Config
-from luxonis_train.utils.general import DatasetMetadata, to_shape_packet, traverse_graph
-from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry
-from luxonis_train.utils.tracker import LuxonisTrackerPL
-from luxonis_train.utils.types import Kwargs, Labels, Packet
+from luxonis_train.utils.graph import Graph
+from luxonis_train.utils.registry import (
+    CALLBACKS,
+    OPTIMIZERS,
+    SCHEDULERS,
+    Registry,
+)
 
 from .luxonis_output import LuxonisOutput
 
@@ -105,13 +117,13 @@ def __init__(
         @type save_dir: str
         @param save_dir: Directory to save checkpoints.
         @type input_shapes: dict[str, Size]
-        @param input_shapes: Dictionary of input shapes. Keys are input names, values
-            are shapes.
+        @param input_shapes: Dictionary of input shapes. Keys are input
+            names, values are shapes.
         @type dataset_metadata: L{DatasetMetadata} | None
         @param dataset_metadata: Dataset metadata.
         @type kwargs: Any
-        @param kwargs: Additional arguments to pass to the L{LightningModule}
-            constructor.
+        @param kwargs: Additional arguments to pass to the
+            L{LightningModule} constructor.
         """
         super().__init__(**kwargs)
 
@@ -123,18 +135,24 @@ def __init__(
         self.image_source = cfg.loader.image_source
         self.dataset_metadata = dataset_metadata or DatasetMetadata()
         self.frozen_nodes: list[tuple[nn.Module, int]] = []
-        self.graph: dict[str, list[str]] = {}
+        self.graph: Graph = {}
         self.loader_input_shapes: dict[str, dict[str, Size]] = {}
         self.node_input_sources: dict[str, list[str]] = defaultdict(list)
         self.loss_weights: dict[str, float] = {}
         self.main_metric: str | None = None
         self.save_dir = save_dir
         self.test_step_outputs: list[Mapping[str, Tensor | float | int]] = []
-        self.training_step_outputs: list[Mapping[str, Tensor | float | int]] = []
-        self.validation_step_outputs: list[Mapping[str, Tensor | float | int]] = []
+        self.training_step_outputs: list[
+            Mapping[str, Tensor | float | int]
+        ] = []
+        self.validation_step_outputs: list[
+            Mapping[str, Tensor | float | int]
+        ] = []
         self.losses: dict[str, dict[str, BaseLoss]] = defaultdict(dict)
         self.metrics: dict[str, dict[str, BaseMetric]] = defaultdict(dict)
-        self.visualizers: dict[str, dict[str, BaseVisualizer]] = defaultdict(dict)
+        self.visualizers: dict[str, dict[str, BaseVisualizer]] = defaultdict(
+            dict
+        )
 
         self._logged_images = 0
 
@@ -152,7 +170,9 @@ def __init__(
                 elif isinstance(node_cfg.freezing.unfreeze_after, int):
                     unfreeze_after = node_cfg.freezing.unfreeze_after
                 else:
-                    unfreeze_after = int(node_cfg.freezing.unfreeze_after * epochs)
+                    unfreeze_after = int(
+                        node_cfg.freezing.unfreeze_after * epochs
+                    )
                 frozen_nodes.append((node_name, unfreeze_after))
 
             if node_cfg.task is not None:
@@ -172,8 +192,14 @@ def __init__(
 
                     node_cfg.task = {next(iter(Node.tasks)): node_cfg.task}
                 else:
-                    node_cfg.task = {**Node._process_tasks(Node.tasks), **node_cfg.task}
-            nodes[node_name] = (Node, {**node_cfg.params, "_tasks": node_cfg.task})
+                    node_cfg.task = {
+                        **Node._process_tasks(Node.tasks),
+                        **node_cfg.task,
+                    }
+            nodes[node_name] = (
+                Node,
+                {**node_cfg.params, "_tasks": node_cfg.task},
+            )
 
             # Handle inputs for this node
             if node_cfg.input_sources:
@@ -241,7 +267,7 @@ def __init__(
     @property
     def core(self) -> "luxonis_train.core.LuxonisModel":
         """Returns the core model."""
-        if self._core is None:
+        if self._core is None:  # pragma: no cover
             raise ValueError("Core reference is not set.")
         return self._core
 
@@ -251,12 +277,12 @@ def _initiate_nodes(
     ) -> nn.ModuleDict:
         """Initializes all the nodes in the model.
 
-        Traverses the graph and initiates each node using outputs of the preceding
-        nodes.
+        Traverses the graph and initiates each node using outputs of the
+        preceding nodes.
 
         @type nodes: dict[str, tuple[type[LuxonisNode], Kwargs]]
-        @param nodes: Dictionary of nodes to be initiated. Keys are node names, values
-            are tuples of node class and node kwargs.
+        @param nodes: Dictionary of nodes to be initiated. Keys are node
+            names, values are tuples of node class and node kwargs.
         @rtype: L{nn.ModuleDict}[str, L{LuxonisNode}]
         @return: Dictionary of initiated nodes.
         """
@@ -268,9 +294,10 @@ def _initiate_nodes(
             for source_name, shape in shapes.items()
         }
 
-        for node_name, (Node, node_kwargs), node_input_names, _ in traverse_graph(
-            self.graph, nodes
-        ):
+        for node_name, (
+            Node,
+            node_kwargs,
+        ), node_input_names, _ in traverse_graph(self.graph, nodes):
             node_dummy_inputs: list[Packet[Tensor]] = []
             """List of dummy input packets for the node.
 
@@ -313,23 +340,27 @@ def forward(
     ) -> LuxonisOutput:
         """Forward pass of the model.
 
-        Traverses the graph and step-by-step computes the outputs of each node. Each
-        next node is computed only when all of its predecessors are computed. Once the
-        outputs are not needed anymore, they are removed from the memory.
+        Traverses the graph and step-by-step computes the outputs of
+        each node. Each next node is computed only when all of its
+        predecessors are computed. Once the outputs are not needed
+        anymore, they are removed from the memory.
 
         @type inputs: L{Tensor}
         @param inputs: Input tensor.
         @type task_labels: L{TaskLabels} | None
         @param task_labels: Labels dictionary. Defaults to C{None}.
         @type images: L{Tensor} | None
-        @param images: Canvas tensor for visualizers. Defaults to C{None}.
+        @param images: Canvas tensor for visualizers. Defaults to
+            C{None}.
         @type compute_loss: bool
-        @param compute_loss: Whether to compute losses. Defaults to C{True}.
+        @param compute_loss: Whether to compute losses. Defaults to
+            C{True}.
         @type compute_metrics: bool
-        @param compute_metrics: Whether to update metrics. Defaults to C{True}.
+        @param compute_metrics: Whether to update metrics. Defaults to
+            C{True}.
         @type compute_visualizations: bool
-        @param compute_visualizations: Whether to compute visualizations. Defaults to
-            C{False}.
+        @param compute_visualizations: Whether to compute
+            visualizations. Defaults to C{False}.
         @rtype: L{LuxonisOutput}
         @return: Output of the model.
         """
@@ -353,11 +384,19 @@ def forward(
             outputs = node.run(node_inputs)
             computed[node_name] = outputs
 
-            if compute_loss and node_name in self.losses and labels is not None:
+            if (
+                compute_loss
+                and node_name in self.losses
+                and labels is not None
+            ):
                 for loss_name, loss in self.losses[node_name].items():
                     losses[node_name][loss_name] = loss.run(outputs, labels)
 
-            if compute_metrics and node_name in self.metrics and labels is not None:
+            if (
+                compute_metrics
+                and node_name in self.metrics
+                and labels is not None
+            ):
                 for metric in self.metrics[node_name].values():
                     metric.run_update(outputs, labels)
 
@@ -367,7 +406,9 @@ def forward(
                 and images is not None
                 and labels is not None
             ):
-                for viz_name, visualizer in self.visualizers[node_name].items():
+                for viz_name, visualizer in self.visualizers[
+                    node_name
+                ].items():
                     viz = combine_visualizations(
                         visualizer.run(
                             images,
@@ -420,7 +461,7 @@ def compute_metrics(self) -> dict[str, dict[str, Tensor]]:
                         computed_submetrics = {metric_name: metric_value}
                     case dict(submetrics):
                         computed_submetrics = submetrics
-                    case unknown:
+                    case unknown:  # pragma: no cover
                         raise ValueError(
                             f"Metric {metric_name} returned unexpected value of "
                             f"type {type(unknown)}."
@@ -435,7 +476,8 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]:
         @type save_path: str
         @param save_path: Path where the exported model will be saved.
         @type kwargs: Any
-        @param kwargs: Additional arguments for the L{torch.onnx.export} method.
+        @param kwargs: Additional arguments for the L{torch.onnx.export}
+            method.
         @rtype: list[str]
         @return: List of output names.
         """
@@ -448,7 +490,8 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]:
         }
 
         inputs_deep_clone = {
-            k: torch.zeros(elem.shape).to(self.device) for k, elem in inputs.items()
+            k: torch.zeros(elem.shape).to(self.device)
+            for k, elem in inputs.items()
         }
 
         inputs_for_onnx = {"inputs": inputs_deep_clone}
@@ -519,22 +562,26 @@ def export_forward(inputs) -> tuple[Tensor, ...]:
 
     def process_losses(
         self,
-        losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]]],
+        losses_dict: dict[
+            str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]]
+        ],
     ) -> tuple[Tensor, dict[str, Tensor]]:
         """Processes individual losses from the model run.
 
-        Goes over the computed losses and computes the final loss as a weighted sum of
-        all the losses.
+        Goes over the computed losses and computes the final loss as a
+        weighted sum of all the losses.
 
-        @type losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, dict[str,
-            Tensor]]]]
-        @param losses_dict: Dictionary of computed losses. Each node can have multiple
-            losses attached. The first key identifies the node, the second key
-            identifies the specific loss. Values are either single tensors or tuples of
-            tensors and sublosses.
+        @type losses_dict: dict[str, dict[str, Tensor | tuple[Tensor,
+            dict[str, Tensor]]]]
+        @param losses_dict: Dictionary of computed losses. Each node can
+            have multiple losses attached. The first key identifies the
+            node, the second key identifies the specific loss. Values
+            are either single tensors or tuples of tensors and
+            sublosses.
         @rtype: tuple[Tensor, dict[str, Tensor]]
-        @return: Tuple of final loss and dictionary of processed sublosses. The
-            dictionary is in a format of {loss_name: loss_value}.
+        @return: Tuple of final loss and dictionary of processed
+            sublosses. The dictionary is in a format of {loss_name:
+            loss_value}.
         """
         final_loss = torch.zeros(1, device=self.device)
         training_step_output: dict[str, Tensor] = {}
@@ -548,9 +595,9 @@ def process_losses(
 
                 loss *= self.loss_weights[loss_name]
                 final_loss += loss
-                training_step_output[
-                    f"loss/{node_name}/{loss_name}"
-                ] = loss.detach().cpu()
+                training_step_output[f"loss/{node_name}/{loss_name}"] = (
+                    loss.detach().cpu()
+                )
                 if self.cfg.trainer.log_sub_losses and sublosses:
                     for subloss_name, subloss_value in sublosses.items():
                         training_step_output[
@@ -559,10 +606,14 @@ def process_losses(
         training_step_output["loss"] = final_loss.detach().cpu()
         return final_loss, training_step_output
 
-    def training_step(self, train_batch: tuple[dict[str, Tensor], Labels]) -> Tensor:
+    def training_step(
+        self, train_batch: tuple[dict[str, Tensor], Labels]
+    ) -> Tensor:
         """Performs one step of training with provided batch."""
         outputs = self.forward(*train_batch)
-        assert outputs.losses, "Losses are empty, check if you have defined any loss"
+        assert (
+            outputs.losses
+        ), "Losses are empty, check if you have defined any loss"
 
         loss, training_step_output = self.process_losses(outputs.losses)
         self.training_step_outputs.append(training_step_output)
@@ -605,7 +656,8 @@ def get_status(self) -> tuple[int, int]:
         return self.current_epoch, self.cfg.trainer.epochs
 
     def get_status_percentage(self) -> float:
-        """Returns percentage of current training, takes into account early stopping."""
+        """Returns percentage of current training, takes into account
+        early stopping."""
         if self._trainer.early_stopping_callback:
             # model haven't yet stop from early stopping callback
             if self._trainer.early_stopping_callback.stopped_epoch == 0:
@@ -616,11 +668,13 @@ def get_status_percentage(self) -> float:
             return (self.current_epoch / self.cfg.trainer.epochs) * 100
 
     def _evaluation_step(
-        self, mode: Literal["test", "val"], batch: tuple[dict[str, Tensor], Labels]
+        self,
+        mode: Literal["test", "val"],
+        batch: tuple[dict[str, Tensor], Labels],
     ) -> dict[str, Tensor]:
         inputs, labels = batch
         images = None
-        if self._logged_images < self.cfg.trainer.num_log_images:
+        if self._logged_images < self.cfg.trainer.n_log_images:
             images = get_unnormalized_images(self.cfg, inputs)
         outputs = self.forward(
             inputs,
@@ -638,7 +692,7 @@ def _evaluation_step(
             for viz_name, viz_batch in visualizations.items():
                 logged_images = self._logged_images
                 for viz in viz_batch:
-                    if logged_images >= self.cfg.trainer.num_log_images:
+                    if logged_images >= self.cfg.trainer.n_log_images:
                         break
                     self.logger.log_image(
                         f"{mode}/visualizations/{node_name}/{viz_name}/{logged_images}",
@@ -662,7 +716,9 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None:
         logger.info("Metrics computed.")
         for node_name, metrics in computed_metrics.items():
             for metric_name, metric_value in metrics.items():
-                metric_results[node_name][metric_name] = metric_value.cpu().item()
+                metric_results[node_name][metric_name] = (
+                    metric_value.cpu().item()
+                )
                 self.log(
                     f"{mode}/metric/{node_name}/{metric_name}",
                     metric_value,
@@ -682,7 +738,9 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None:
     def configure_callbacks(self) -> list[pl.Callback]:
         """Configures Pytorch Lightning callbacks."""
         self.min_val_loss_checkpoints_path = f"{self.save_dir}/min_val_loss"
-        self.best_val_metric_checkpoints_path = f"{self.save_dir}/best_val_metric"
+        self.best_val_metric_checkpoints_path = (
+            f"{self.save_dir}/best_val_metric"
+        )
         model_name = self.cfg.model.name
 
         callbacks: list[pl.Callback] = [
@@ -716,14 +774,17 @@ def configure_callbacks(self) -> list[pl.Callback]:
 
         for callback in self.cfg.trainer.callbacks:
             if callback.active:
-                callbacks.append(CALLBACKS.get(callback.name)(**callback.params))
+                callbacks.append(
+                    CALLBACKS.get(callback.name)(**callback.params)
+                )
 
         return callbacks
 
     def configure_optimizers(
         self,
     ) -> tuple[
-        list[torch.optim.Optimizer], list[torch.optim.lr_scheduler._LRScheduler]
+        list[torch.optim.Optimizer],
+        list[torch.optim.lr_scheduler._LRScheduler],
     ]:
         """Configures model optimizers and schedulers."""
         cfg_optimizer = self.cfg.trainer.optimizer
@@ -739,18 +800,20 @@ def configure_optimizers(
 
         return [optimizer], [scheduler]
 
-    def load_checkpoint(self, path: str | None) -> None:
+    def load_checkpoint(self, path: str | Path | None) -> None:
         """Loads checkpoint weights from provided path.
 
-        Loads the checkpoints gracefully, ignoring keys that are not found in the model
-        state dict or in the checkpoint.
+        Loads the checkpoints gracefully, ignoring keys that are not
+        found in the model state dict or in the checkpoint.
 
         @type path: str | None
-        @param path: Path to the checkpoint. If C{None}, no checkpoint will be loaded.
+        @param path: Path to the checkpoint. If C{None}, no checkpoint
+            will be loaded.
         """
         if path is None:
             return
 
+        path = str(path)
         checkpoint = torch.load(path, map_location=self.device)
 
         if "state_dict" not in checkpoint:
@@ -809,7 +872,9 @@ def _init_attached_module(
         return module_name, node_name
 
     @staticmethod
-    def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict:
+    def _to_module_dict(
+        modules: dict[str, dict[str, nn.Module]],
+    ) -> nn.ModuleDict:
         return nn.ModuleDict(
             {
                 node_name: nn.ModuleDict(node_modules)
@@ -819,7 +884,9 @@ def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict:
 
     @property
     def _progress_bar(self) -> BaseLuxonisProgressBar:
-        return cast(BaseLuxonisProgressBar, self._trainer.progress_bar_callback)
+        return cast(
+            BaseLuxonisProgressBar, self._trainer.progress_bar_callback
+        )
 
     @rank_zero_only
     def _print_results(
@@ -829,16 +896,20 @@ def _print_results(
 
         logger.info(f"{stage} loss: {loss:.4f}")
 
-        self._progress_bar.print_results(stage=stage, loss=loss, metrics=metrics)
+        self._progress_bar.print_results(
+            stage=stage, loss=loss, metrics=metrics
+        )
 
         if self.main_metric is not None:
             main_metric_node, main_metric_name = self.main_metric.split("/")
             main_metric = metrics[main_metric_node][main_metric_name]
-            logger.info(f"{stage} main metric ({self.main_metric}): {main_metric:.4f}")
+            logger.info(
+                f"{stage} main metric ({self.main_metric}): {main_metric:.4f}"
+            )
 
     def _is_train_eval_epoch(self) -> bool:
-        """Checks if train eval should be performed on current epoch based on configured
-        train_metrics_interval."""
+        """Checks if train eval should be performed on current epoch
+        based on configured train_metrics_interval."""
         train_metrics_interval = self.cfg.trainer.train_metrics_interval
         # add +1 to current_epoch because starting epoch is at 0
         return (
diff --git a/luxonis_train/models/luxonis_output.py b/luxonis_train/models/luxonis_output.py
index d69943fc..3cf59329 100644
--- a/luxonis_train/models/luxonis_output.py
+++ b/luxonis_train/models/luxonis_output.py
@@ -3,8 +3,7 @@
 
 from torch import Tensor
 
-from luxonis_train.utils.general import to_shape_packet
-from luxonis_train.utils.types import Packet
+from luxonis_train.utils import Packet, to_shape_packet
 
 
 @dataclass
diff --git a/luxonis_train/models/predefined_models/base_predefined_model.py b/luxonis_train/models/predefined_models/base_predefined_model.py
index 33ababdc..9388f345 100644
--- a/luxonis_train/models/predefined_models/base_predefined_model.py
+++ b/luxonis_train/models/predefined_models/base_predefined_model.py
@@ -1,4 +1,4 @@
-from abc import ABC, abstractproperty
+from abc import ABC, abstractmethod
 
 from luxonis_ml.utils.registry import AutoRegisterMeta
 
@@ -17,21 +17,21 @@ class BasePredefinedModel(
     registry=MODELS,
     register=False,
 ):
-    @abstractproperty
-    def nodes(self) -> list[ModelNodeConfig]:
-        ...
+    @property
+    @abstractmethod
+    def nodes(self) -> list[ModelNodeConfig]: ...
 
-    @abstractproperty
-    def losses(self) -> list[LossModuleConfig]:
-        ...
+    @property
+    @abstractmethod
+    def losses(self) -> list[LossModuleConfig]: ...
 
-    @abstractproperty
-    def metrics(self) -> list[MetricModuleConfig]:
-        ...
+    @property
+    @abstractmethod
+    def metrics(self) -> list[MetricModuleConfig]: ...
 
-    @abstractproperty
-    def visualizers(self) -> list[AttachedModuleConfig]:
-        ...
+    @property
+    @abstractmethod
+    def visualizers(self) -> list[AttachedModuleConfig]: ...
 
     def generate_model(
         self,
diff --git a/luxonis_train/models/predefined_models/classification_model.py b/luxonis_train/models/predefined_models/classification_model.py
index c9d782eb..e390b667 100644
--- a/luxonis_train/models/predefined_models/classification_model.py
+++ b/luxonis_train/models/predefined_models/classification_model.py
@@ -1,13 +1,13 @@
 from dataclasses import dataclass, field
 from typing import Literal
 
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
     LossModuleConfig,
     MetricModuleConfig,
     ModelNodeConfig,
 )
-from luxonis_train.utils.types import Kwargs
 
 from .base_predefined_model import BasePredefinedModel
 
@@ -15,7 +15,7 @@
 @dataclass
 class ClassificationModel(BasePredefinedModel):
     backbone: str = "MicroNet"
-    task: Literal["multiclass", "multilabel"] = "multilabel"
+    task: Literal["multiclass", "multilabel"] = "multiclass"
     backbone_params: Kwargs = field(default_factory=dict)
     head_params: Kwargs = field(default_factory=dict)
     loss_params: Kwargs = field(default_factory=dict)
diff --git a/luxonis_train/models/predefined_models/detection_model.py b/luxonis_train/models/predefined_models/detection_model.py
index e9db4462..94c4487f 100644
--- a/luxonis_train/models/predefined_models/detection_model.py
+++ b/luxonis_train/models/predefined_models/detection_model.py
@@ -1,12 +1,12 @@
 from dataclasses import dataclass, field
 
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
     LossModuleConfig,
     MetricModuleConfig,
     ModelNodeConfig,
 )
-from luxonis_train.utils.types import Kwargs
 
 from .base_predefined_model import BasePredefinedModel
 
@@ -47,7 +47,9 @@ def nodes(self) -> list[ModelNodeConfig]:
                 name="EfficientBBoxHead",
                 alias="detection_head",
                 freezing=self.head_params.pop("freezing", {}),
-                inputs=["detection_neck"] if self.use_neck else ["detection_backbone"],
+                inputs=["detection_neck"]
+                if self.use_neck
+                else ["detection_backbone"],
                 params=self.head_params,
                 task=self.task_name,
             )
diff --git a/luxonis_train/models/predefined_models/keypoint_detection_model.py b/luxonis_train/models/predefined_models/keypoint_detection_model.py
index 588911c6..670b00b1 100644
--- a/luxonis_train/models/predefined_models/keypoint_detection_model.py
+++ b/luxonis_train/models/predefined_models/keypoint_detection_model.py
@@ -1,13 +1,13 @@
 from dataclasses import dataclass, field
 from typing import Literal
 
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
     LossModuleConfig,
     MetricModuleConfig,
     ModelNodeConfig,
 )
-from luxonis_train.utils.types import Kwargs
 
 from .base_predefined_model import BasePredefinedModel
 
@@ -21,7 +21,7 @@ class KeypointDetectionModel(BasePredefinedModel):
     loss_params: Kwargs = field(default_factory=dict)
     head_type: Literal[
         "ImplicitKeypointBBoxHead", "EfficientKeypointBBoxHead"
-    ] = "ImplicitKeypointBBoxHead"
+    ] = "EfficientKeypointBBoxHead"
     kpt_visualizer_params: Kwargs = field(default_factory=dict)
     bbox_visualizer_params: Kwargs = field(default_factory=dict)
     bbox_task_name: str | None = None
@@ -50,7 +50,7 @@ def nodes(self) -> list[ModelNodeConfig]:
 
         task = {}
         if self.bbox_task_name is not None:
-            task["bbox"] = self.bbox_task_name
+            task["boundingbox"] = self.bbox_task_name
         if self.kpt_task_name is not None:
             task["keypoints"] = self.kpt_task_name
 
diff --git a/luxonis_train/models/predefined_models/segmentation_model.py b/luxonis_train/models/predefined_models/segmentation_model.py
index b5e81f76..d1076239 100644
--- a/luxonis_train/models/predefined_models/segmentation_model.py
+++ b/luxonis_train/models/predefined_models/segmentation_model.py
@@ -1,13 +1,13 @@
 from dataclasses import dataclass, field
 from typing import Literal
 
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.config import (
     AttachedModuleConfig,
     LossModuleConfig,
     MetricModuleConfig,
     ModelNodeConfig,
 )
-from luxonis_train.utils.types import Kwargs
 
 from .base_predefined_model import BasePredefinedModel
 
diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md
index 2f147e23..60e5971c 100644
--- a/luxonis_train/nodes/README.md
+++ b/luxonis_train/nodes/README.md
@@ -77,7 +77,7 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf).
 | Key           | Type        | Default value               | Description                                         |
 | ------------- | ----------- | --------------------------- | --------------------------------------------------- |
 | channels_list | List\[int\] | \[64, 128, 256, 512, 1024\] | List of number of channels for each block           |
-| num_repeats   | List\[int\] | \[1, 6, 12, 18, 6\]         | List of number of repeats of RepVGGBlock            |
+| n_repeats     | List\[int\] | \[1, 6, 12, 18, 6\]         | List of number of repeats of RepVGGBlock            |
 | in_channels   | int         | 3                           | Number of input channels, should be 3 in most cases |
 | depth_mul     | int         | 0.33                        | Depth multiplier                                    |
 | width_mul     | int         | 0.25                        | Width multiplier                                    |
@@ -145,9 +145,9 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf).
 
 | Key           | Type             | Default value                                           | Description                               |
 | ------------- | ---------------- | ------------------------------------------------------- | ----------------------------------------- |
-| num_heads     | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads                    |
+| n_heads       | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads                    |
 | channels_list | List\[int\]      | \[256, 128, 128, 256, 256, 512\]                        | List of number of channels for each block |
-| num_repeats   | List\[int\]      | \[12, 12, 12, 12\]                                      | List of number of repeats of RepVGGBlock  |
+| n_repeats     | List\[int\]      | \[12, 12, 12, 12\]                                      | List of number of repeats of RepVGGBlock  |
 | depth_mul     | int              | 0.33                                                    | Depth multiplier                          |
 | width_mul     | int              | 0.25                                                    | Width multiplier                          |
 
@@ -182,7 +182,7 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf).
 
 | Key        | Type  | Default value | Description                                        |
 | ---------- | ----- | ------------- | -------------------------------------------------- |
-| num_heads  | bool  | 3             | Number of output heads                             |
+| n_heads    | bool  | 3             | Number of output heads                             |
 | conf_thres | float | 0.25          | confidence threshold for nms (used for evaluation) |
 | iou_thres  | float | 0.45          | iou threshold for nms (used for evaluation)        |
 
@@ -195,7 +195,7 @@ Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf).
 | Key              | Type                        | Default value | Description                                                                                                |
 | ---------------- | --------------------------- | ------------- | ---------------------------------------------------------------------------------------------------------- |
 | n_keypoints      | int \| None                 | None          | Number of keypoints.                                                                                       |
-| num_heads        | int                         | 3             | Number of output heads                                                                                     |
+| n_heads          | int                         | 3             | Number of output heads                                                                                     |
 | anchors          | List\[List\[int\]\] \| None | None          | Anchors used for object detection. If set to `None`, the anchors are computed at runtime from the dataset. |
 | init_coco_biases | bool                        | True          | Whether to use COCO bias and weight initialization                                                         |
 | conf_thres       | float                       | 0.25          | confidence threshold for nms (used for evaluation)                                                         |
diff --git a/luxonis_train/nodes/activations/__init__.py b/luxonis_train/nodes/activations/__init__.py
index 37aea0fc..0d3d1e0b 100644
--- a/luxonis_train/nodes/activations/__init__.py
+++ b/luxonis_train/nodes/activations/__init__.py
@@ -1,3 +1,3 @@
-from .activations import HSigmoid, HSwish
+from .activations import HSigmoid
 
-__all__ = ["HSigmoid", "HSwish"]
+__all__ = ["HSigmoid"]
diff --git a/luxonis_train/nodes/activations/activations.py b/luxonis_train/nodes/activations/activations.py
index f3abedd6..93703a1c 100644
--- a/luxonis_train/nodes/activations/activations.py
+++ b/luxonis_train/nodes/activations/activations.py
@@ -10,14 +10,3 @@ def __init__(self):
 
     def forward(self, x: Tensor) -> Tensor:
         return self.relu(x + 3) / 6
-
-
-class HSwish(nn.Module):
-    def __init__(self):
-        """H-Swish activation function from U{Searching for MobileNetV3
-        <https://arxiv.org/abs/1905.02244>}."""
-        super().__init__()
-        self.sigmoid = HSigmoid()
-
-    def forward(self, x: Tensor) -> Tensor:
-        return x * self.sigmoid(x)
diff --git a/luxonis_train/nodes/backbones/contextspatial.py b/luxonis_train/nodes/backbones/contextspatial.py
index 2cac4b81..cf98cd4c 100644
--- a/luxonis_train/nodes/backbones/contextspatial.py
+++ b/luxonis_train/nodes/backbones/contextspatial.py
@@ -1,9 +1,3 @@
-"""Implementation of Context Spatial backbone.
-
-Source: U{BiseNetV1<https://github.com/taveraantonio/BiseNetv1>}
-"""
-
-
 from torch import Tensor, nn
 from torch.nn import functional as F
 
@@ -13,21 +7,43 @@
     ConvModule,
     FeatureFusionBlock,
 )
+from luxonis_train.utils import Kwargs
 from luxonis_train.utils.registry import NODES
 
 
 class ContextSpatial(BaseNode[Tensor, list[Tensor]]):
-    def __init__(self, context_backbone: str = "MobileNetV2", **kwargs):
-        """Context spatial backbone.
-        TODO: Add more documentation.
+    def __init__(
+        self,
+        context_backbone: str | nn.Module = "MobileNetV2",
+        backbone_kwargs: Kwargs | None = None,
+        **kwargs,
+    ):
+        """Context Spatial backbone introduced in BiseNetV1.
 
+        Source: U{BiseNetV1<https://github.com/taveraantonio/BiseNetv1>}
+
+        @see: U{BiseNetv1: Bilateral Segmentation Network for
+            Real-time Semantic Segmentation
+            <https://arxiv.org/abs/1808.00897>}
 
         @type context_backbone: str
-        @param context_backbone: Backbone used. Defaults to C{MobileNetV2}.
+        @param context_backbone: Backbone used in the context path.
+            Can be either a string or a C{torch.nn.Module}.
+            If a string argument is used, it has to be a name of a module
+            stored in the L{NODES} registry. Defaults to C{MobileNetV2}.
+
+        @type backbone_kwargs: dict
+        @param backbone_kwargs: Keyword arguments for the backbone.
+            Only used when the C{context_backbone} argument is a string.
         """
         super().__init__(**kwargs)
 
-        self.context_path = ContextPath(NODES.get(context_backbone)(**kwargs))
+        if isinstance(context_backbone, str):
+            backbone_kwargs = backbone_kwargs or {}
+            backbone_kwargs |= kwargs
+            context_backbone = NODES.get(context_backbone)(**backbone_kwargs)
+
+        self.context_path = ContextPath(context_backbone)
         self.spatial_path = SpatialPath(3, 128)
         self.ffm = FeatureFusionBlock(256, 256)
 
@@ -35,22 +51,41 @@ def forward(self, inputs: Tensor) -> list[Tensor]:
         spatial_out = self.spatial_path(inputs)
         context16, _ = self.context_path(inputs)
         fm_fuse = self.ffm(spatial_out, context16)
-        outs = [fm_fuse]
-        return outs
+        return [fm_fuse]
 
 
 class SpatialPath(nn.Module):
     def __init__(self, in_channels: int, out_channels: int):
         super().__init__()
         intermediate_channels = 64
-        self.conv_7x7 = ConvModule(in_channels, intermediate_channels, 7, 2, 3)
+        self.conv_7x7 = ConvModule(
+            in_channels,
+            intermediate_channels,
+            kernel_size=7,
+            stride=2,
+            padding=3,
+        )
         self.conv_3x3_1 = ConvModule(
-            intermediate_channels, intermediate_channels, 3, 2, 1
+            intermediate_channels,
+            intermediate_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
         )
         self.conv_3x3_2 = ConvModule(
-            intermediate_channels, intermediate_channels, 3, 2, 1
+            intermediate_channels,
+            intermediate_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+        )
+        self.conv_1x1 = ConvModule(
+            intermediate_channels,
+            out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
         )
-        self.conv_1x1 = ConvModule(intermediate_channels, out_channels, 1, 1, 0)
 
     def forward(self, x: Tensor) -> Tensor:
         x = self.conv_7x7(x)
@@ -60,25 +95,30 @@ def forward(self, x: Tensor) -> Tensor:
 
 
 class ContextPath(nn.Module):
-    def __init__(self, backbone: BaseNode):
+    def __init__(self, backbone: nn.Module):
         super().__init__()
         self.backbone = backbone
 
-        self.up16 = nn.Upsample(scale_factor=2.0, mode="bilinear", align_corners=True)
-        self.up32 = nn.Upsample(scale_factor=2.0, mode="bilinear", align_corners=True)
+        self.up16 = nn.Upsample(
+            scale_factor=2.0, mode="bilinear", align_corners=True
+        )
+        self.up32 = nn.Upsample(
+            scale_factor=2.0, mode="bilinear", align_corners=True
+        )
 
         self.refine16 = ConvModule(128, 128, 3, 1, 1)
         self.refine32 = ConvModule(128, 128, 3, 1, 1)
 
-    def forward(self, x: Tensor) -> list[Tensor]:
-        *_, down16, down32 = self.backbone.forward(x)
+    def forward(self, x: Tensor) -> tuple[Tensor, Tensor]:
+        *_, down16, down32 = self.backbone(x)
 
         if not hasattr(self, "arm16"):
             self.arm16 = AttentionRefinmentBlock(down16.shape[1], 128)
             self.arm32 = AttentionRefinmentBlock(down32.shape[1], 128)
 
             self.global_context = nn.Sequential(
-                nn.AdaptiveAvgPool2d(1), ConvModule(down32.shape[1], 128, 1, 1, 0)
+                nn.AdaptiveAvgPool2d(1),
+                ConvModule(down32.shape[1], 128, 1, 1, 0),
             )
 
         arm_down16 = self.arm16(down16)
@@ -86,15 +126,18 @@ def forward(self, x: Tensor) -> list[Tensor]:
 
         global_down32 = self.global_context(down32)
         global_down32 = F.interpolate(
-            global_down32, size=down32.size()[2:], mode="bilinear", align_corners=True
+            global_down32,
+            size=down32.shape[2:],
+            mode="bilinear",
+            align_corners=True,
         )
 
-        arm_down32 = arm_down32 + global_down32
+        arm_down32 += global_down32
         arm_down32 = self.up32(arm_down32)
         arm_down32 = self.refine32(arm_down32)
 
-        arm_down16 = arm_down16 + arm_down32
+        arm_down16 += arm_down32
         arm_down16 = self.up16(arm_down16)
         arm_down16 = self.refine16(arm_down16)
 
-        return [arm_down16, arm_down32]
+        return arm_down16, arm_down32
diff --git a/luxonis_train/nodes/backbones/efficientnet.py b/luxonis_train/nodes/backbones/efficientnet.py
index e560bc5f..7744236a 100644
--- a/luxonis_train/nodes/backbones/efficientnet.py
+++ b/luxonis_train/nodes/backbones/efficientnet.py
@@ -1,8 +1,4 @@
-"""Implementation of the EfficientNet backbone.
-
-Source: U{https://github.com/rwightman/gen-efficientnet-pytorch}
-@license: U{Apache 2.0<https://github.com/rwightman/gen-efficientnet-pytorch/blob/master/LICENSE>}
-"""
+from typing import Any
 
 import torch
 from torch import Tensor, nn
@@ -13,33 +9,49 @@
 class EfficientNet(BaseNode[Tensor, list[Tensor]]):
     attach_index: int = -1
 
-    def __init__(self, download_weights: bool = False, **kwargs):
+    def __init__(
+        self,
+        download_weights: bool = False,
+        out_indices: list[int] | None = None,
+        **kwargs: Any,
+    ):
         """EfficientNet backbone.
 
+        EfficientNet is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a compound coefficient. Unlike conventional practice that arbitrary scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients.
+
+        Source: U{https://github.com/rwightman/gen-efficientnet-pytorch}
+
+        @license: U{Apache License, Version 2.0
+            <https://github.com/rwightman/gen-efficientnet-pytorch/blob/master/LICENSE>}
+
+        @see: U{https://paperswithcode.com/method/efficientnet}
+        @see: U{EfficientNet: Rethinking Model Scaling for
+            Convolutional Neural Networks
+            <https://arxiv.org/abs/1905.11946>}
         @type download_weights: bool
         @param download_weights: If C{True} download weights from imagenet. Defaults to
             C{False}.
+        @type out_indices: list[int] | None
+        @param out_indices: Indices of the output layers. Defaults to [0, 1, 2, 4, 6].
         """
         super().__init__(**kwargs)
 
-        efficientnet_lite0_model = torch.hub.load(
+        self.backbone: nn.Module = torch.hub.load(  # type: ignore
             "rwightman/gen-efficientnet-pytorch",
             "efficientnet_lite0",
             pretrained=download_weights,
         )
-        efficientnet_lite0_model.classifier = nn.Identity()
-        self.out_indices = [0, 1, 2, 4, 6]
-        efficientnet_lite0_model.bn2 = nn.Identity()
-        efficientnet_lite0_model.conv_head = nn.Identity()
-        self.backbone = efficientnet_lite0_model
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        x = self.backbone.conv_stem(x)
+        self.out_indices = out_indices or [0, 1, 2, 4, 6]
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        x = self.backbone.conv_stem(inputs)
         x = self.backbone.bn1(x)
         x = self.backbone.act1(x)
-        for i, m in enumerate(self.backbone.blocks):
-            x = m(x)
+
+        outs: list[Tensor] = []
+
+        for i, layer in enumerate(self.backbone.blocks):
+            x = layer(x)
             if i in self.out_indices:
                 outs.append(x)
 
diff --git a/luxonis_train/nodes/backbones/efficientrep/__init__.py b/luxonis_train/nodes/backbones/efficientrep/__init__.py
new file mode 100644
index 00000000..51ff264a
--- /dev/null
+++ b/luxonis_train/nodes/backbones/efficientrep/__init__.py
@@ -0,0 +1,3 @@
+from .efficientrep import EfficientRep
+
+__all__ = ["EfficientRep"]
diff --git a/luxonis_train/nodes/backbones/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py
similarity index 53%
rename from luxonis_train/nodes/backbones/efficientrep.py
rename to luxonis_train/nodes/backbones/efficientrep/efficientrep.py
index be558620..0143855c 100644
--- a/luxonis_train/nodes/backbones/efficientrep.py
+++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py
@@ -1,11 +1,5 @@
-"""Implementation of the EfficientRep backbone.
-
-Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
-Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
-"""
-
 import logging
-from typing import Literal
+from typing import Any
 
 from torch import Tensor, nn
 
@@ -15,63 +9,68 @@
     RepVGGBlock,
     SpatialPyramidPoolingBlock,
 )
-from luxonis_train.utils.general import make_divisible
+from luxonis_train.utils import make_divisible
+
+from .variants import VariantLiteral, get_variant
 
 logger = logging.getLogger(__name__)
 
 
 class EfficientRep(BaseNode[Tensor, list[Tensor]]):
+    in_channels: int
+
     def __init__(
         self,
-        variant: Literal["s", "n", "m", "l"] = "n",
+        variant: VariantLiteral = "nano",
         channels_list: list[int] | None = None,
-        num_repeats: list[int] | None = None,
-        depth_mul: float = 0.33,
-        width_mul: float = 0.25,
-        **kwargs,
+        n_repeats: list[int] | None = None,
+        depth_mul: float | None = None,
+        width_mul: float | None = None,
+        **kwargs: Any,
     ):
-        """EfficientRep backbone.
-
-        @type variant: Literal["s", "n", "m", "l"]
-        @param variant: EfficientRep variant. Defaults to "n".
+        """Implementation of the EfficientRep backbone.
+
+        Adapted from U{YOLOv6: A Single-Stage Object Detection Framework
+        for Industrial Applications
+        <https://arxiv.org/pdf/2209.02976.pdf>}.
+
+        @type variant: Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]
+        @param variant: EfficientRep variant. Defaults to "nano".
+            The variant determines the depth and width multipliers.
+            The depth multiplier determines the number of blocks in each stage and the width multiplier determines the number of channels.
+            The following variants are available:
+                - "n" or "nano" (default): depth_multiplier=0.33, width_multiplier=0.25
+                - "s" or "small": depth_multiplier=0.33, width_multiplier=0.50
+                - "m" or "medium": depth_multiplier=0.60, width_multiplier=0.75
+                - "l" or "large": depth_multiplier=1.0, width_multiplier=1.0
         @type channels_list: list[int] | None
         @param channels_list: List of number of channels for each block. If unspecified,
             defaults to [64, 128, 256, 512, 1024].
-        @type num_repeats: list[int] | None
-        @param num_repeats: List of number of repeats of RepVGGBlock. If unspecified,
+        @type n_repeats: list[int] | None
+        @param n_repeats: List of number of repeats of RepVGGBlock. If unspecified,
             defaults to [1, 6, 12, 18, 6].
         @type depth_mul: float
-        @param depth_mul: Depth multiplier. Depending on the variant, defaults to 0.33.
+        @param depth_mul: Depth multiplier. If provided, overrides the variant value.
         @type width_mul: float
-        @param width_mul: Width multiplier. Depending on the variant, defaults to 0.25.
-        @type kwargs: Any
-        @param kwargs: Additional arguments to pass to L{BaseNode}.
+        @param width_mul: Width multiplier. If provided, overrides the variant value.
         """
         super().__init__(**kwargs)
 
-        if variant not in EFFICIENTREP_VARIANTS:
-            raise ValueError(
-                f"EfficientRep model variant should be in {list(EFFICIENTREP_VARIANTS.keys())}"
-            )
-
-        (
-            depth_mul,
-            width_mul,
-        ) = EFFICIENTREP_VARIANTS[variant]
+        var = get_variant(variant)
+        depth_mul = depth_mul or var.depth_multiplier
+        width_mul = width_mul or var.width_multiplier
 
         channels_list = channels_list or [64, 128, 256, 512, 1024]
-        num_repeats = num_repeats or [1, 6, 12, 18, 6]
-        channels_list = [make_divisible(i * width_mul, 8) for i in channels_list]
-        num_repeats = [
-            (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats
+        n_repeats = n_repeats or [1, 6, 12, 18, 6]
+        channels_list = [
+            make_divisible(i * width_mul, 8) for i in channels_list
+        ]
+        n_repeats = [
+            (max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats
         ]
-
-        in_channels = self.in_channels
-        if not isinstance(in_channels, int):
-            raise ValueError("EfficientRep module expects only one input.")
 
         self.repvgg_encoder = RepVGGBlock(
-            in_channels=in_channels,
+            in_channels=self.in_channels,
             out_channels=channels_list[0],
             kernel_size=3,
             stride=2,
@@ -90,7 +89,7 @@ def __init__(
                     block=RepVGGBlock,
                     in_channels=channels_list[i + 1],
                     out_channels=channels_list[i + 1],
-                    num_blocks=num_repeats[i + 1],
+                    n_blocks=n_repeats[i + 1],
                 ),
             )
             self.blocks.append(curr_block)
@@ -107,27 +106,20 @@ def set_export_mode(self, mode: bool = True) -> None:
         """Reparametrizes instances of L{RepVGGBlock} in the network.
 
         @type mode: bool
-        @param mode: Whether to set the export mode. Defaults to C{True}.
+        @param mode: Whether to set the export mode. Defaults to
+            C{True}.
         """
         super().set_export_mode(mode)
         if self.export:
-            logger.info("Reparametrizing EfficientRep.")
+            logger.info("Reparametrizing 'EfficientRep'.")
             for module in self.modules():
                 if isinstance(module, RepVGGBlock):
                     module.reparametrize()
 
     def forward(self, inputs: Tensor) -> list[Tensor]:
-        outputs = []
+        outputs: list[Tensor] = []
         x = self.repvgg_encoder(inputs)
         for block in self.blocks:
             x = block(x)
             outputs.append(x)
         return outputs
-
-
-EFFICIENTREP_VARIANTS = {
-    "n": (0.33, 0.25),
-    "s": (0.33, 0.50),
-    "m": (0.60, 0.75),
-    "l": (1.0, 1.0),
-}
diff --git a/luxonis_train/nodes/backbones/efficientrep/variants.py b/luxonis_train/nodes/backbones/efficientrep/variants.py
new file mode 100644
index 00000000..7ced749e
--- /dev/null
+++ b/luxonis_train/nodes/backbones/efficientrep/variants.py
@@ -0,0 +1,44 @@
+from typing import Literal, TypeAlias
+
+from pydantic import BaseModel
+
+VariantLiteral: TypeAlias = Literal[
+    "n", "nano", "s", "small", "m", "medium", "l", "large"
+]
+
+
+class EfficientRepVariant(BaseModel):
+    depth_multiplier: float
+    width_multiplier: float
+
+
+def get_variant(variant: VariantLiteral) -> EfficientRepVariant:
+    variants = {
+        "n": EfficientRepVariant(
+            depth_multiplier=0.33,
+            width_multiplier=0.25,
+        ),
+        "s": EfficientRepVariant(
+            depth_multiplier=0.33,
+            width_multiplier=0.50,
+        ),
+        "m": EfficientRepVariant(
+            depth_multiplier=0.60,
+            width_multiplier=0.75,
+        ),
+        "l": EfficientRepVariant(
+            depth_multiplier=1.0,
+            width_multiplier=1.0,
+        ),
+    }
+    variants["nano"] = variants["n"]
+    variants["small"] = variants["s"]
+    variants["medium"] = variants["m"]
+    variants["large"] = variants["l"]
+
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            f"EfficientRep variant should be one of "
+            f"{list(variants.keys())}, got '{variant}'."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/micronet.py b/luxonis_train/nodes/backbones/micronet.py
deleted file mode 100644
index 074dce2a..00000000
--- a/luxonis_train/nodes/backbones/micronet.py
+++ /dev/null
@@ -1,842 +0,0 @@
-from typing import Literal
-
-import torch
-from torch import Tensor, nn
-
-from luxonis_train.nodes.activations import HSigmoid, HSwish
-from luxonis_train.nodes.base_node import BaseNode
-from luxonis_train.nodes.blocks import ConvModule
-
-
-class MicroNet(BaseNode[Tensor, list[Tensor]]):
-    """
-
-    TODO: DOCS
-    """
-
-    def __init__(self, variant: Literal["M1", "M2", "M3"] = "M1", **kwargs):
-        """MicroNet backbone.
-
-        @type variant: Literal["M1", "M2", "M3"]
-        @param variant: Model variant to use. Defaults to "M1".
-        """
-        super().__init__(**kwargs)
-
-        if variant not in MICRONET_VARIANTS_SETTINGS:
-            raise ValueError(
-                f"MicroNet model variant should be in {list(MICRONET_VARIANTS_SETTINGS.keys())}"
-            )
-
-        self.inplanes = 64
-        (
-            in_channels,
-            stem_groups,
-            _,
-            init_a,
-            init_b,
-            out_indices,
-            channels,
-            cfgs,
-        ) = MICRONET_VARIANTS_SETTINGS[variant]
-        self.out_indices = out_indices
-        self.channels = channels
-
-        self.features = nn.ModuleList([Stem(3, 2, stem_groups)])
-
-        for (
-            stride,
-            out_channels,
-            kernel_size,
-            c1,
-            c2,
-            g1,
-            g2,
-            _,
-            g3,
-            g4,
-            y1,
-            y2,
-            y3,
-            r,
-        ) in cfgs:
-            self.features.append(
-                MicroBlock(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    stride,
-                    (c1, c2),
-                    (g1, g2),
-                    (g3, g4),
-                    (y1, y2, y3),
-                    r,
-                    init_a,
-                    init_b,
-                )
-            )
-            in_channels = out_channels
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        for m in self.features:
-            x = m(x)
-            outs.append(x)
-        return outs
-
-
-class MicroBlock(nn.Module):
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: int = 3,
-        stride: int = 1,
-        t1: tuple[int, int] = (2, 2),
-        gs1: tuple[int, int] = (0, 6),
-        groups_1x1: tuple[int, int] = (1, 1),
-        dy: tuple[int, int, int] = (2, 0, 1),
-        r: int = 1,
-        init_a: tuple[float, float] = (1.0, 1.0),
-        init_b: tuple[float, float] = (0.0, 0.0),
-    ):
-        super().__init__()
-
-        self.identity = stride == 1 and in_channels == out_channels
-        y1, y2, y3 = dy
-        g1, g2 = groups_1x1
-        reduction = 8 * r
-        intermediate_channels = in_channels * t1[0] * t1[1]
-
-        if gs1[0] == 0:
-            self.layers = nn.Sequential(
-                DepthSpatialSepConv(in_channels, t1, kernel_size, stride),
-                DYShiftMax(
-                    intermediate_channels,
-                    intermediate_channels,
-                    init_a,
-                    init_b,
-                    True if y2 == 2 else False,
-                    gs1[1],
-                    reduction,
-                )
-                if y2 > 0
-                else nn.ReLU6(True),
-                ChannelShuffle(gs1[1]),
-                ChannelShuffle(intermediate_channels // 2)
-                if y2 != 0
-                else nn.Sequential(),
-                ConvModule(
-                    in_channels=intermediate_channels,
-                    out_channels=out_channels,
-                    kernel_size=1,
-                    groups=g1,
-                    activation=nn.Identity(),
-                ),
-                DYShiftMax(
-                    out_channels,
-                    out_channels,
-                    (1.0, 0.0),
-                    (0.0, 0.0),
-                    False,
-                    g2,
-                    reduction // 2,
-                )
-                if y3 > 0
-                else nn.Sequential(),
-                ChannelShuffle(g2),
-                ChannelShuffle(out_channels // 2)
-                if out_channels % 2 == 0 and y3 != 0
-                else nn.Sequential(),
-            )
-        elif g2 == 0:
-            self.layers = nn.Sequential(
-                ConvModule(
-                    in_channels=in_channels,
-                    out_channels=intermediate_channels,
-                    kernel_size=1,
-                    groups=gs1[0],
-                    activation=nn.Identity(),
-                ),
-                DYShiftMax(
-                    intermediate_channels,
-                    intermediate_channels,
-                    (1.0, 0.0),
-                    (0.0, 0.0),
-                    False,
-                    gs1[1],
-                    reduction,
-                )
-                if y3 > 0
-                else nn.Sequential(),
-            )
-        else:
-            self.layers = nn.Sequential(
-                ConvModule(
-                    in_channels=in_channels,
-                    out_channels=intermediate_channels,
-                    kernel_size=1,
-                    groups=gs1[0],
-                    activation=nn.Identity(),
-                ),
-                DYShiftMax(
-                    intermediate_channels,
-                    intermediate_channels,
-                    init_a,
-                    init_b,
-                    True if y1 == 2 else False,
-                    gs1[1],
-                    reduction,
-                )
-                if y1 > 0
-                else nn.ReLU6(True),
-                ChannelShuffle(gs1[1]),
-                DepthSpatialSepConv(intermediate_channels, (1, 1), kernel_size, stride),
-                nn.Sequential(),
-                DYShiftMax(
-                    intermediate_channels,
-                    intermediate_channels,
-                    init_a,
-                    init_b,
-                    True if y2 == 2 else False,
-                    gs1[1],
-                    reduction,
-                    True,
-                )
-                if y2 > 0
-                else nn.ReLU6(True),
-                ChannelShuffle(intermediate_channels // 4)
-                if y1 != 0 and y2 != 0
-                else nn.Sequential()
-                if y1 == 0 and y2 == 0
-                else ChannelShuffle(intermediate_channels // 2),
-                ConvModule(
-                    in_channels=intermediate_channels,
-                    out_channels=out_channels,
-                    kernel_size=1,
-                    groups=g1,
-                    activation=nn.Identity(),
-                ),
-                DYShiftMax(
-                    out_channels,
-                    out_channels,
-                    (1.0, 0.0),
-                    (0.0, 0.0),
-                    False,
-                    g2,
-                    reduction=reduction // 2
-                    if out_channels < intermediate_channels
-                    else reduction,
-                )
-                if y3 > 0
-                else nn.Sequential(),
-                ChannelShuffle(g2),
-                ChannelShuffle(out_channels // 2) if y3 != 0 else nn.Sequential(),
-            )
-
-    def forward(self, inputs: Tensor) -> Tensor:
-        out = self.layers(inputs)
-        if self.identity:
-            out += inputs
-        return out
-
-
-class ChannelShuffle(nn.Module):
-    def __init__(self, groups: int):
-        super().__init__()
-        self.groups = groups
-
-    def forward(self, x: Tensor) -> Tensor:
-        b, c, h, w = x.size()
-        channels_per_group = c // self.groups
-        x = x.view(b, self.groups, channels_per_group, h, w)
-        x = torch.transpose(x, 1, 2).contiguous()
-        out = x.view(b, -1, h, w)
-        return out
-
-
-class DYShiftMax(nn.Module):
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        init_a: tuple[float, float] = (0.0, 0.0),
-        init_b: tuple[float, float] = (0.0, 0.0),
-        act_relu: bool = True,
-        g: int = 6,
-        reduction: int = 4,
-        expansion: bool = False,
-    ):
-        super().__init__()
-        self.exp: Literal[2, 4] = 4 if act_relu else 2
-        self.init_a = init_a
-        self.init_b = init_b
-        self.out_channels = out_channels
-
-        self.avg_pool = nn.Sequential(nn.Sequential(), nn.AdaptiveAvgPool2d(1))
-
-        squeeze = self._make_divisible(in_channels // reduction, 4)
-
-        self.fc = nn.Sequential(
-            nn.Linear(in_channels, squeeze),
-            nn.ReLU(True),
-            nn.Linear(squeeze, out_channels * self.exp),
-            HSigmoid(),
-        )
-
-        if g != 1 and expansion:
-            g = in_channels // g
-
-        gc = in_channels // g
-        index = Tensor(range(in_channels)).view(1, in_channels, 1, 1)
-        index = index.view(1, g, gc, 1, 1)
-        indexgs = torch.split(index, [1, g - 1], dim=1)
-        indexgs = torch.cat([indexgs[1], indexgs[0]], dim=1)
-        indexs = torch.split(indexgs, [1, gc - 1], dim=2)
-        indexs = torch.cat([indexs[1], indexs[0]], dim=2)
-        self.index = indexs.view(in_channels).long()
-
-    def forward(self, x: Tensor) -> Tensor:
-        B, C, _, _ = x.shape
-        x_out = x
-
-        y = self.avg_pool(x).view(B, C)
-        y = self.fc(y).view(B, -1, 1, 1)
-        y = (y - 0.5) * 4.0
-
-        x2 = x_out[:, self.index, :, :]
-
-        if self.exp == 4:
-            a1, b1, a2, b2 = torch.split(y, self.out_channels, dim=1)
-
-            a1 = a1 + self.init_a[0]
-            a2 = a2 + self.init_b[1]
-            b1 = b1 + self.init_b[0]
-            b2 = b2 + self.init_b[1]
-
-            z1 = x_out * a1 + x2 * b1
-            z2 = x_out * a2 + x2 * b2
-
-            out = torch.max(z1, z2)
-
-        elif self.exp == 2:
-            a1, b1 = torch.split(y, self.out_channels, dim=1)
-            a1 = a1 + self.init_a[0]
-            b1 = b1 + self.init_b[0]
-            out = x_out * a1 + x2 * b1
-        else:
-            raise RuntimeError("Expansion should be 2 or 4.")
-
-        return out
-
-    def _make_divisible(self, v, divisor, min_value=None):
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-        # Make sure that round down does not go down by more than 10%.
-        if new_v < 0.9 * v:
-            new_v += divisor
-        return new_v
-
-
-class SwishLinear(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int):
-        super().__init__()
-        self.linear = nn.Sequential(
-            nn.Linear(in_channels, out_channels), nn.BatchNorm1d(out_channels), HSwish()
-        )
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.linear(x)
-
-
-class SpatialSepConvSF(nn.Module):
-    def __init__(
-        self, in_channels: int, outs: tuple[int, int], kernel_size: int, stride: int
-    ):
-        super().__init__()
-        out_channels1, out_channels2 = outs
-        self.conv = nn.Sequential(
-            nn.Conv2d(
-                in_channels,
-                out_channels1,
-                (kernel_size, 1),
-                (stride, 1),
-                (kernel_size // 2, 0),
-                bias=False,
-            ),
-            nn.BatchNorm2d(out_channels1),
-            nn.Conv2d(
-                out_channels1,
-                out_channels1 * out_channels2,
-                (1, kernel_size),
-                (1, stride),
-                (0, kernel_size // 2),
-                groups=out_channels1,
-                bias=False,
-            ),
-            nn.BatchNorm2d(out_channels1 * out_channels2),
-            ChannelShuffle(out_channels1),
-        )
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.conv(x)
-
-
-class Stem(nn.Module):
-    def __init__(self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)):
-        super().__init__()
-        self.stem = nn.Sequential(
-            SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True)
-        )
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.stem(x)
-
-
-class DepthSpatialSepConv(nn.Module):
-    def __init__(
-        self, in_channels: int, expand: tuple[int, int], kernel_size: int, stride: int
-    ):
-        super().__init__()
-        exp1, exp2 = expand
-        intermediate_channels = in_channels * exp1
-        out_channels = in_channels * exp1 * exp2
-
-        self.conv = nn.Sequential(
-            nn.Conv2d(
-                in_channels,
-                intermediate_channels,
-                (kernel_size, 1),
-                (stride, 1),
-                (kernel_size // 2, 0),
-                groups=in_channels,
-                bias=False,
-            ),
-            nn.BatchNorm2d(intermediate_channels),
-            nn.Conv2d(
-                intermediate_channels,
-                out_channels,
-                (1, kernel_size),
-                (1, stride),
-                (0, kernel_size // 2),
-                groups=intermediate_channels,
-                bias=False,
-            ),
-            nn.BatchNorm2d(out_channels),
-        )
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.conv(x)
-
-
-MICRONET_VARIANTS_SETTINGS = {
-    "M1": [
-        6,  # stem_ch
-        [3, 2],  # stem_groups
-        960,  # out_ch
-        [1.0, 1.0],  # init_a
-        [0.0, 0.0],  # init_b
-        [1, 2, 4, 7],  # out indices
-        [8, 16, 32, 576],
-        [
-            # s, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
-            [2, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1],
-            [2, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1],
-            [
-                2,
-                16,
-                5,
-                2,
-                2,
-                0,
-                16,
-                16,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                1,
-                32,
-                5,
-                1,
-                6,
-                4,
-                4,
-                32,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                2,
-                64,
-                5,
-                1,
-                6,
-                8,
-                8,
-                64,
-                8,
-                8,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                1,
-                96,
-                3,
-                1,
-                6,
-                8,
-                8,
-                96,
-                8,
-                8,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2],  # 96->96(4,24)->576
-        ],
-    ],
-    "M2": [
-        8,
-        [4, 2],
-        1024,
-        [1.0, 1.0],
-        [0.0, 0.0],
-        [1, 3, 6, 9],
-        [12, 24, 64, 768],
-        [
-            # s,  c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
-            [
-                2,
-                12,
-                3,
-                2,
-                2,
-                0,
-                8,
-                12,
-                4,
-                4,
-                2,
-                0,
-                1,
-                1,
-            ],
-            [
-                2,
-                16,
-                3,
-                2,
-                2,
-                0,
-                12,
-                16,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                1,
-                24,
-                3,
-                2,
-                2,
-                0,
-                16,
-                24,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                2,
-                32,
-                5,
-                1,
-                6,
-                6,
-                6,
-                32,
-                4,
-                4,
-                2,
-                2,
-                1,
-                1,
-            ],
-            [
-                1,
-                32,
-                5,
-                1,
-                6,
-                8,
-                8,
-                32,
-                4,
-                4,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [
-                1,
-                64,
-                5,
-                1,
-                6,
-                8,
-                8,
-                64,
-                8,
-                8,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [
-                2,
-                96,
-                5,
-                1,
-                6,
-                8,
-                8,
-                96,
-                8,
-                8,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [
-                1,
-                128,
-                3,
-                1,
-                6,
-                12,
-                12,
-                128,
-                8,
-                8,
-                2,
-                2,
-                1,
-                2,
-            ],
-            [1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2],
-        ],
-    ],
-    "M3": [
-        12,
-        [4, 3],
-        1024,
-        [1.0, 0.5],
-        [0.0, 0.5],
-        [1, 3, 8, 12],
-        [16, 24, 80, 864],
-        [
-            # s,  c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
-            [
-                2,
-                16,
-                3,
-                2,
-                2,
-                0,
-                12,
-                16,
-                4,
-                4,
-                0,
-                2,
-                0,
-                1,
-            ],
-            [
-                2,
-                24,
-                3,
-                2,
-                2,
-                0,
-                16,
-                24,
-                4,
-                4,
-                0,
-                2,
-                0,
-                1,
-            ],
-            [
-                1,
-                24,
-                3,
-                2,
-                2,
-                0,
-                24,
-                24,
-                4,
-                4,
-                0,
-                2,
-                0,
-                1,
-            ],
-            [
-                2,
-                32,
-                5,
-                1,
-                6,
-                6,
-                6,
-                32,
-                4,
-                4,
-                0,
-                2,
-                0,
-                1,
-            ],
-            [
-                1,
-                32,
-                5,
-                1,
-                6,
-                8,
-                8,
-                32,
-                4,
-                4,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                64,
-                5,
-                1,
-                6,
-                8,
-                8,
-                48,
-                8,
-                8,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                80,
-                5,
-                1,
-                6,
-                8,
-                8,
-                80,
-                8,
-                8,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                80,
-                5,
-                1,
-                6,
-                10,
-                10,
-                80,
-                8,
-                8,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                2,
-                120,
-                5,
-                1,
-                6,
-                10,
-                10,
-                120,
-                10,
-                10,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                120,
-                5,
-                1,
-                6,
-                12,
-                12,
-                120,
-                10,
-                10,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [
-                1,
-                144,
-                3,
-                1,
-                6,
-                12,
-                12,
-                144,
-                12,
-                12,
-                0,
-                2,
-                0,
-                2,
-            ],
-            [1, 864, 3, 1, 6, 12, 12, 0, 0, 0, 0, 2, 0, 2],
-        ],
-    ],
-}
diff --git a/luxonis_train/nodes/backbones/micronet/__init__.py b/luxonis_train/nodes/backbones/micronet/__init__.py
new file mode 100644
index 00000000..5b41ece3
--- /dev/null
+++ b/luxonis_train/nodes/backbones/micronet/__init__.py
@@ -0,0 +1,3 @@
+from .micronet import MicroNet
+
+__all__ = ["MicroNet"]
diff --git a/luxonis_train/nodes/backbones/micronet/blocks.py b/luxonis_train/nodes/backbones/micronet/blocks.py
new file mode 100644
index 00000000..3da5e15e
--- /dev/null
+++ b/luxonis_train/nodes/backbones/micronet/blocks.py
@@ -0,0 +1,515 @@
+from typing import Literal
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.activations import HSigmoid
+from luxonis_train.nodes.blocks import ConvModule
+
+
+class MicroBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int = 3,
+        stride: int = 1,
+        expansion_ratios: tuple[int, int] = (2, 2),
+        groups_1: tuple[int, int] = (0, 6),
+        groups_2: tuple[int, int] = (1, 1),
+        use_dynamic_shift: tuple[int, int, int] = (2, 0, 1),
+        reduction_factor: int = 1,
+        init_a: tuple[float, float] = (1.0, 1.0),
+        init_b: tuple[float, float] = (0.0, 0.0),
+    ):
+        """
+        MicroBlock: The basic building block of MicroNet.
+
+        This block implements the Micro-Factorized Convolution and Dynamic Shift-Max activation.
+        It can be configured to use different combinations of these components based on the network design.
+
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type out_channels: int
+        @param out_channels: Number of output channels.
+        @type kernel_size: int
+        @param kernel_size: Size of the convolution kernel. Defaults to 3.
+        @type stride: int
+        @param stride: Stride of the convolution. Defaults to 1.
+        @type expansion_ratios: tuple[int, int]
+        @param expansion_ratios: Expansion ratios for the intermediate channels. Defaults to (2, 2).
+        @type groups_1: tuple[int, int]
+        @param groups_1: Groups for the first set of convolutions. Defaults to (0, 6).
+        @type groups_2: tuple[int, int]
+        @param groups_2: Groups for the second set of convolutions. Defaults to (1, 1).
+        @type use_dynamic_shift: tuple[int, int, int]
+        @param use_dynamic_shift: Flags to use Dynamic Shift-Max in different positions. Defaults to (2, 0, 1).
+        @type reduction_factor: int
+        @param reduction_factor: Reduction factor for the squeeze-and-excitation-like operation. Defaults to 1.
+        @type init_a: tuple[float, float]
+        @param init_a: Initialization parameters for Dynamic Shift-Max. Defaults to (1.0, 1.0).
+        @type init_b: tuple[float, float]
+        @param init_b: Initialization parameters for Dynamic Shift-Max. Defaults to (0.0, 0.0).
+        """
+        super().__init__()
+
+        self.use_residual = stride == 1 and in_channels == out_channels
+        self.expansion_ratios = expansion_ratios
+        use_dy1, use_dy2, use_dy3 = use_dynamic_shift
+        group1, group2 = groups_2
+        reduction = 8 * reduction_factor
+        intermediate_channels = (
+            in_channels * expansion_ratios[0] * expansion_ratios[1]
+        )
+
+        if groups_1[0] == 0:
+            self.layers = self._create_lite_block(
+                in_channels,
+                out_channels,
+                intermediate_channels,
+                kernel_size,
+                stride,
+                groups_1[1],
+                group1,
+                group2,
+                use_dy2,
+                use_dy3,
+                reduction,
+                init_a,
+                init_b,
+            )
+        elif group2 == 0:
+            self.layers = self._create_transition_block(
+                in_channels,
+                intermediate_channels,
+                groups_1[0],
+                groups_1[1],
+                use_dy3,
+                reduction,
+            )
+        else:
+            self.layers = self._create_full_block(
+                in_channels,
+                out_channels,
+                intermediate_channels,
+                kernel_size,
+                stride,
+                groups_1,
+                group1,
+                group2,
+                use_dy1,
+                use_dy2,
+                use_dy3,
+                reduction,
+                init_a,
+                init_b,
+            )
+
+    def _create_lite_block(
+        self,
+        in_channels: int,
+        out_channels: int,
+        intermediate_channels: int,
+        kernel_size: int,
+        stride: int,
+        group1: int,
+        group2: int,
+        group3: int,
+        use_dy2: int,
+        use_dy3: int,
+        reduction: int,
+        init_a: tuple[float, float],
+        init_b: tuple[float, float],
+    ) -> nn.Sequential:
+        return nn.Sequential(
+            DepthSpatialSepConv(
+                in_channels, self.expansion_ratios, kernel_size, stride
+            ),
+            DYShiftMax(
+                intermediate_channels,
+                intermediate_channels,
+                init_a,
+                init_b,
+                True if use_dy2 == 2 else False,
+                group1,
+                reduction,
+            )
+            if use_dy2 > 0
+            else nn.ReLU6(True),
+            ChannelShuffle(group1),
+            ChannelShuffle(intermediate_channels // 2)
+            if use_dy2 != 0
+            else nn.Sequential(),
+            ConvModule(
+                in_channels=intermediate_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                groups=group2,
+                activation=nn.Identity(),
+            ),
+            DYShiftMax(
+                out_channels,
+                out_channels,
+                (1.0, 0.0),
+                (0.0, 0.0),
+                False,
+                group3,
+                reduction // 2,
+            )
+            if use_dy3 > 0
+            else nn.Sequential(),
+            ChannelShuffle(group3),
+            ChannelShuffle(out_channels // 2)
+            if out_channels % 2 == 0 and use_dy3 != 0
+            else nn.Sequential(),
+        )
+
+    def _create_transition_block(
+        self,
+        in_channels: int,
+        intermediate_channels: int,
+        group1: int,
+        group2: int,
+        use_dy3: int,
+        reduction: int,
+    ) -> nn.Sequential:
+        return nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=intermediate_channels,
+                kernel_size=1,
+                groups=group1,
+                activation=nn.Identity(),
+            ),
+            DYShiftMax(
+                intermediate_channels,
+                intermediate_channels,
+                (1.0, 0.0),
+                (0.0, 0.0),
+                False,
+                group2,
+                reduction,
+            )
+            if use_dy3 > 0
+            else nn.Sequential(),
+        )
+
+    def _create_full_block(
+        self,
+        in_channels: int,
+        out_channels: int,
+        intermediate_channels: int,
+        kernel_size: int,
+        stride: int,
+        groups_1: tuple[int, int],
+        group1: int,
+        group2: int,
+        use_dy1: int,
+        use_dy2: int,
+        use_dy3: int,
+        reduction: int,
+        init_a: tuple[float, float],
+        init_b: tuple[float, float],
+    ) -> nn.Sequential:
+        return nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=intermediate_channels,
+                kernel_size=1,
+                groups=groups_1[0],
+                activation=nn.Identity(),
+            ),
+            DYShiftMax(
+                intermediate_channels,
+                intermediate_channels,
+                init_a,
+                init_b,
+                True if use_dy1 == 2 else False,
+                groups_1[1],
+                reduction,
+            )
+            if use_dy1 > 0
+            else nn.ReLU6(True),
+            ChannelShuffle(groups_1[1]),
+            DepthSpatialSepConv(
+                intermediate_channels, (1, 1), kernel_size, stride
+            ),
+            DYShiftMax(
+                intermediate_channels,
+                intermediate_channels,
+                init_a,
+                init_b,
+                True if use_dy2 == 2 else False,
+                groups_1[1],
+                reduction,
+                True,
+            )
+            if use_dy2 > 0
+            else nn.ReLU6(True),
+            ChannelShuffle(intermediate_channels // 4)
+            if use_dy1 != 0 and use_dy2 != 0
+            else nn.Sequential()
+            if use_dy1 == 0 and use_dy2 == 0
+            else ChannelShuffle(intermediate_channels // 2),
+            ConvModule(
+                in_channels=intermediate_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                groups=group1,
+                activation=nn.Identity(),
+            ),
+            DYShiftMax(
+                out_channels,
+                out_channels,
+                (1.0, 0.0),
+                (0.0, 0.0),
+                False,
+                group2,
+                reduction=reduction // 2
+                if out_channels < intermediate_channels
+                else reduction,
+            )
+            if use_dy3 > 0
+            else nn.Sequential(),
+            ChannelShuffle(group2),
+            ChannelShuffle(out_channels // 2)
+            if use_dy3 != 0
+            else nn.Sequential(),
+        )
+
+    def forward(self, inputs: Tensor) -> Tensor:
+        out = self.layers(inputs)
+        if self.use_residual:
+            out += inputs
+        return out
+
+
+class ChannelShuffle(nn.Module):
+    def __init__(self, groups: int):
+        """Shuffle the channels of the input tensor.
+
+        This operation is used to mix information between groups after
+        grouped convolutions.
+
+        @type groups: int
+        @param groups: Number of groups to divide the channels into
+            before shuffling.
+        """
+
+        super().__init__()
+        self.groups = groups
+
+    def forward(self, x: Tensor) -> Tensor:
+        batch_size, channels, height, width = x.size()
+        channels_per_group = channels // self.groups
+        x = x.view(batch_size, self.groups, channels_per_group, height, width)
+        x = torch.transpose(x, 1, 2).contiguous()
+        out = x.view(batch_size, -1, height, width)
+        return out
+
+
+class DYShiftMax(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        init_a: tuple[float, float] = (0.0, 0.0),
+        init_b: tuple[float, float] = (0.0, 0.0),
+        use_relu: bool = True,
+        groups: int = 6,
+        reduction: int = 4,
+        expansion: bool = False,
+    ):
+        """Dynamic Shift-Max activation function.
+
+        This module implements the Dynamic Shift-Max operation, which
+        adaptively fuses and selects channel information based on the
+        input.
+
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type out_channels: int
+        @param out_channels: Number of output channels.
+        @type init_a: tuple[float, float]
+        @param init_a: Initial values for the 'a' parameters. Defaults
+            to (0.0, 0.0).
+        @type init_b: tuple[float, float]
+        @param init_b: Initial values for the 'b' parameters. Defaults
+            to (0.0, 0.0).
+        @type use_relu: bool
+        @param use_relu: Whether to use ReLU activation. Defaults to
+            True.
+        @type groups: int
+        @param groups: Number of groups for channel shuffling. Defaults
+            to 6.
+        @type reduction: int
+        @param reduction: Reduction factor for the squeeze operation.
+            Defaults to 4.
+        @type expansion: bool
+        @param expansion: Whether to use expansion in grouping. Defaults
+            to False.
+        """
+        super().__init__()
+        self.exp: Literal[2, 4] = 4 if use_relu else 2
+        self.init_a = init_a
+        self.init_b = init_b
+        self.out_channels = out_channels
+
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+
+        squeeze_channels = self._make_divisible(in_channels // reduction, 4)
+
+        self.fc = nn.Sequential(
+            nn.Linear(in_channels, squeeze_channels),
+            nn.ReLU(True),
+            nn.Linear(squeeze_channels, out_channels * self.exp),
+            HSigmoid(),
+        )
+
+        if groups != 1 and expansion:
+            groups = in_channels // groups
+
+        channels_per_group = in_channels // groups
+        index = torch.arange(in_channels).view(1, in_channels, 1, 1)
+        index = index.view(1, groups, channels_per_group, 1, 1)
+        index_groups = torch.split(index, [1, groups - 1], dim=1)
+        index_groups = torch.cat([index_groups[1], index_groups[0]], dim=1)
+        index_splits = torch.split(
+            index_groups, [1, channels_per_group - 1], dim=2
+        )
+        index_splits = torch.cat([index_splits[1], index_splits[0]], dim=2)
+        self.index = index_splits.view(in_channels).long()
+
+    def forward(self, x: Tensor) -> Tensor:
+        batch_size, channels, _, _ = x.shape
+        x_out = x
+
+        y = self.avg_pool(x).view(batch_size, channels)
+        y = self.fc(y).view(batch_size, -1, 1, 1)
+        y = (y - 0.5) * 4.0
+
+        x2 = x_out[:, self.index, :, :]
+
+        if self.exp == 4:
+            a1, b1, a2, b2 = torch.split(y, self.out_channels, dim=1)
+
+            a1 = a1 + self.init_a[0]
+            a2 = a2 + self.init_b[1]
+            b1 = b1 + self.init_b[0]
+            b2 = b2 + self.init_b[1]
+
+            z1 = x_out * a1 + x2 * b1
+            z2 = x_out * a2 + x2 * b2
+
+            out = torch.max(z1, z2)
+
+        elif self.exp == 2:
+            a1, b1 = torch.split(y, self.out_channels, dim=1)
+            a1 = a1 + self.init_a[0]
+            b1 = b1 + self.init_b[0]
+            out = x_out * a1 + x2 * b1
+        else:
+            raise RuntimeError("Expansion should be 2 or 4.")
+
+        return out
+
+    def _make_divisible(
+        self, value: int, divisor: int, min_value: int | None = None
+    ) -> int:
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(value + divisor / 2) // divisor * divisor)
+        # Make sure that round down does not go down by more than 10%.
+        if new_v < 0.9 * value:
+            new_v += divisor
+        return new_v
+
+
+class SpatialSepConvSF(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        outs: tuple[int, int],
+        kernel_size: int,
+        stride: int,
+    ):
+        super().__init__()
+        out_channels1, out_channels2 = outs
+        self.conv = nn.Sequential(
+            nn.Conv2d(
+                in_channels,
+                out_channels1,
+                kernel_size=(kernel_size, 1),
+                stride=(stride, 1),
+                padding=(kernel_size // 2, 0),
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels1),
+            nn.Conv2d(
+                out_channels1,
+                out_channels1 * out_channels2,
+                kernel_size=(1, kernel_size),
+                stride=(1, stride),
+                padding=(0, kernel_size // 2),
+                groups=out_channels1,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels1 * out_channels2),
+            ChannelShuffle(out_channels1),
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.conv(x)
+
+
+class Stem(nn.Module):
+    def __init__(
+        self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)
+    ):
+        super().__init__()
+        self.stem = nn.Sequential(
+            SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True)
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.stem(x)
+
+
+class DepthSpatialSepConv(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        expand: tuple[int, int],
+        kernel_size: int,
+        stride: int,
+    ):
+        super().__init__()
+        exp1, exp2 = expand
+        intermediate_channels = in_channels * exp1
+        out_channels = in_channels * exp1 * exp2
+
+        self.conv = nn.Sequential(
+            nn.Conv2d(
+                in_channels,
+                intermediate_channels,
+                (kernel_size, 1),
+                (stride, 1),
+                padding=(kernel_size // 2, 0),
+                groups=in_channels,
+                bias=False,
+            ),
+            nn.BatchNorm2d(intermediate_channels),
+            nn.Conv2d(
+                intermediate_channels,
+                out_channels,
+                (1, kernel_size),
+                (1, stride),
+                padding=(0, kernel_size // 2),
+                groups=intermediate_channels,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_channels),
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.conv(x)
diff --git a/luxonis_train/nodes/backbones/micronet/micronet.py b/luxonis_train/nodes/backbones/micronet/micronet.py
new file mode 100644
index 00000000..82df5cb3
--- /dev/null
+++ b/luxonis_train/nodes/backbones/micronet/micronet.py
@@ -0,0 +1,62 @@
+from typing import Any, Literal
+
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+
+from .blocks import MicroBlock, Stem
+from .variants import get_variant
+
+
+class MicroNet(BaseNode[Tensor, list[Tensor]]):
+    def __init__(
+        self,
+        variant: Literal["M1", "M2", "M3"] = "M1",
+        out_indices: list[int] | None = None,
+        **kwargs: Any,
+    ):
+        """MicroNet backbone.
+
+        This class creates the full MicroNet architecture based on the
+        specified variant. It consists of a stem layer followed by
+        multiple MicroBlocks.
+
+        @type variant: Literal["M1", "M2", "M3"]
+        @param variant: Model variant to use. Defaults to "M1".
+        @type out_indices: list[int] | None
+        @param out_indices: Indices of the output layers. If provided,
+            overrides the variant value.
+        """
+        super().__init__(**kwargs)
+
+        var = get_variant(variant)
+        self.out_indices = out_indices or var.out_indices
+        in_channels = var.stem_channels
+
+        self.layers = nn.ModuleList([Stem(3, 2, var.stem_groups)])
+
+        for bc in var.block_configs:
+            self.layers.append(
+                MicroBlock(
+                    in_channels,
+                    bc.out_channels,
+                    bc.kernel_size,
+                    bc.stride,
+                    bc.expand_ratio,
+                    bc.groups_1,
+                    bc.groups_2,
+                    bc.dy_shifts,
+                    bc.reduction_factor,
+                    var.init_a,
+                    var.init_b,
+                )
+            )
+            in_channels = bc.out_channels
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
+        for i, layer in enumerate(self.layers):
+            inputs = layer(inputs)
+            if i in self.out_indices:
+                outs.append(inputs)
+        return outs
diff --git a/luxonis_train/nodes/backbones/micronet/variants.py b/luxonis_train/nodes/backbones/micronet/variants.py
new file mode 100644
index 00000000..22a8d552
--- /dev/null
+++ b/luxonis_train/nodes/backbones/micronet/variants.py
@@ -0,0 +1,344 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class MicroBlockConfig(BaseModel):
+    stride: int
+    out_channels: int
+    kernel_size: int
+    expand_ratio: tuple[int, int]
+    groups_1: tuple[int, int]
+    groups_2: tuple[int, int]
+    dy_shifts: tuple[int, int, int]
+    reduction_factor: int
+
+
+class MicroNetVariant(BaseModel):
+    stem_channels: int
+    stem_groups: tuple[int, int]
+    init_a: tuple[float, float]
+    init_b: tuple[float, float]
+    out_indices: list[int]
+    block_configs: list[MicroBlockConfig]
+
+
+M1 = MicroNetVariant(
+    stem_channels=6,
+    stem_groups=(3, 2),
+    init_a=(1.0, 1.0),
+    init_b=(0.0, 0.0),
+    out_indices=[1, 2, 4, 7],
+    block_configs=[
+        MicroBlockConfig(
+            stride=2,
+            out_channels=8,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 6),
+            groups_2=(2, 2),
+            dy_shifts=(2, 0, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=16,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 8),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=16,
+            kernel_size=5,
+            expand_ratio=(2, 2),
+            groups_1=(0, 16),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(4, 4),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=64,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=96,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=576,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(0, 0),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+    ],
+)
+
+M2 = MicroNetVariant(
+    stem_channels=8,
+    stem_groups=(4, 2),
+    init_a=(1.0, 1.0),
+    init_b=(0.0, 0.0),
+    out_indices=[1, 3, 6, 9],
+    block_configs=[
+        MicroBlockConfig(
+            stride=2,
+            out_channels=12,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 8),
+            groups_2=(4, 4),
+            dy_shifts=(2, 0, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=16,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 12),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=24,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 16),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(6, 6),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(4, 4),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=64,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=96,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=128,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(8, 8),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=768,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(16, 16),
+            groups_2=(0, 0),
+            dy_shifts=(2, 2, 1),
+            reduction_factor=2,
+        ),
+    ],
+)
+
+M3 = MicroNetVariant(
+    stem_channels=12,
+    stem_groups=(4, 3),
+    init_a=(1.0, 0.5),
+    init_b=(0.0, 0.5),
+    out_indices=[1, 3, 8, 12],
+    block_configs=[
+        MicroBlockConfig(
+            stride=2,
+            out_channels=16,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 12),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=24,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 16),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=24,
+            kernel_size=3,
+            expand_ratio=(2, 2),
+            groups_1=(0, 24),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(6, 6),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=1,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=32,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(4, 4),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=64,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=2,
+            out_channels=80,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(8, 8),
+            groups_2=(8, 8),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=80,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(10, 10),
+            groups_2=(8, 8),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=120,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(10, 10),
+            groups_2=(10, 10),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=120,
+            kernel_size=5,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(10, 10),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=144,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(12, 12),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+        MicroBlockConfig(
+            stride=1,
+            out_channels=864,
+            kernel_size=3,
+            expand_ratio=(1, 6),
+            groups_1=(12, 12),
+            groups_2=(0, 0),
+            dy_shifts=(0, 2, 0),
+            reduction_factor=2,
+        ),
+    ],
+)
+
+
+def get_variant(variant: Literal["M1", "M2", "M3"]) -> MicroNetVariant:
+    variants = {"M1": M1, "M2": M2, "M3": M3}
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            "MicroNet model variant should be in "
+            f"{list(variants.keys())}, got {variant}."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/mobilenetv2.py b/luxonis_train/nodes/backbones/mobilenetv2.py
index 48161835..8de19854 100644
--- a/luxonis_train/nodes/backbones/mobilenetv2.py
+++ b/luxonis_train/nodes/backbones/mobilenetv2.py
@@ -1,44 +1,51 @@
-"""MobileNetV2 backbone.
-
-TODO: source?
-"""
+from typing import Any
 
 import torchvision
-from torch import Tensor, nn
+from torch import Tensor
 
 from luxonis_train.nodes.base_node import BaseNode
 
 
 class MobileNetV2(BaseNode[Tensor, list[Tensor]]):
-    """Implementation of the MobileNetV2 backbone.
-
-    TODO: add more info
-    """
-
-    def __init__(self, download_weights: bool = False, **kwargs):
-        """Constructor of the MobileNetV2 backbone.
+    def __init__(
+        self,
+        download_weights: bool = False,
+        out_indices: list[int] | None = None,
+        **kwargs: Any,
+    ):
+        """MobileNetV2 backbone.
+
+        This class implements the MobileNetV2 model as described in:
+        U{MobileNetV2: Inverted Residuals and Linear Bottlenecks <https://arxiv.org/pdf/1801.04381v4>} by Sandler I{et al.}
+
+        The network consists of an initial fully convolutional layer, followed by
+        19 bottleneck residual blocks, and a final 1x1 convolution. It can be used
+        as a feature extractor for tasks like image classification, object detection,
+        and semantic segmentation.
+
+        Key features:
+            - Inverted residual structure with linear bottlenecks
+            - Depth-wise separable convolutions for efficiency
+            - Configurable width multiplier and input resolution
 
         @type download_weights: bool
         @param download_weights: If True download weights from imagenet. Defaults to
             False.
-        @type kwargs: Any
-        @param kwargs: Additional arguments to pass to L{BaseNode}.
+        @type out_indices: list[int] | None
+        @param out_indices: Indices of the output layers. Defaults to [3, 6, 13, 18].
         """
         super().__init__(**kwargs)
 
-        mobilenet_v2 = torchvision.models.mobilenet_v2(
+        self.backbone = torchvision.models.mobilenet_v2(
             weights="DEFAULT" if download_weights else None
         )
-        mobilenet_v2.classifier = nn.Identity()
-        self.out_indices = [3, 6, 13, 18]
-        self.channels = [24, 32, 96, 1280]
-        self.backbone = mobilenet_v2
-
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
-        for i, module in enumerate(self.backbone.features):
-            x = module(x)
+        self.out_indices = out_indices or [3, 6, 13, 18]
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
+        for i, layer in enumerate(self.backbone.features):
+            inputs = layer(inputs)
             if i in self.out_indices:
-                outs.append(x)
+                outs.append(inputs)
 
         return outs
diff --git a/luxonis_train/nodes/backbones/mobileone/__init__.py b/luxonis_train/nodes/backbones/mobileone/__init__.py
new file mode 100644
index 00000000..a6e573aa
--- /dev/null
+++ b/luxonis_train/nodes/backbones/mobileone/__init__.py
@@ -0,0 +1,3 @@
+from .mobileone import MobileOne
+
+__all__ = ["MobileOne"]
diff --git a/luxonis_train/nodes/backbones/mobileone.py b/luxonis_train/nodes/backbones/mobileone/blocks.py
similarity index 55%
rename from luxonis_train/nodes/backbones/mobileone.py
rename to luxonis_train/nodes/backbones/mobileone/blocks.py
index 2d460fd0..63e19eae 100644
--- a/luxonis_train/nodes/backbones/mobileone.py
+++ b/luxonis_train/nodes/backbones/mobileone/blocks.py
@@ -4,170 +4,12 @@
 @license: U{Apple<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
 """
 
-
-from typing import Literal
-
 import torch
 from torch import Tensor, nn
 
-from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import ConvModule, SqueezeExciteBlock
 
 
-class MobileOne(BaseNode[Tensor, list[Tensor]]):
-    """Implementation of MobileOne backbone.
-
-    TODO: add more details
-    """
-
-    in_channels: int
-
-    VARIANTS_SETTINGS: dict[str, dict] = {
-        "s0": {"width_multipliers": (0.75, 1.0, 1.0, 2.0), "num_conv_branches": 4},
-        "s1": {"width_multipliers": (1.5, 1.5, 2.0, 2.5)},
-        "s2": {"width_multipliers": (1.5, 2.0, 2.5, 4.0)},
-        "s3": {"width_multipliers": (2.0, 2.5, 3.0, 4.0)},
-        "s4": {"width_multipliers": (3.0, 3.5, 3.5, 4.0), "use_se": True},
-    }
-
-    def __init__(self, variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", **kwargs):
-        """Constructor for the MobileOne module.
-
-        @type variant: Literal["s0", "s1", "s2", "s3", "s4"]
-        @param variant: Specifies which variant of the MobileOne network to use. For
-            details, see TODO. Defaults to "s0".
-        """
-        super().__init__(**kwargs)
-
-        if variant not in MobileOne.VARIANTS_SETTINGS.keys():
-            raise ValueError(
-                f"MobileOne model variant should be in {list(MobileOne.VARIANTS_SETTINGS.keys())}"
-            )
-
-        variant_params = MobileOne.VARIANTS_SETTINGS[variant]
-        # TODO: make configurable
-        self.width_multipliers = variant_params["width_multipliers"]
-        self.num_conv_branches = variant_params.get("num_conv_branches", 1)
-        self.num_blocks_per_stage = [2, 8, 10, 1]
-        self.use_se = variant_params.get("use_se", False)
-
-        self.in_planes = min(64, int(64 * self.width_multipliers[0]))
-
-        self.stage0 = MobileOneBlock(
-            in_channels=self.in_channels,
-            out_channels=self.in_planes,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-        )
-        self.cur_layer_idx = 1
-        self.stage1 = self._make_stage(
-            int(64 * self.width_multipliers[0]),
-            self.num_blocks_per_stage[0],
-            num_se_blocks=0,
-        )
-        self.stage2 = self._make_stage(
-            int(128 * self.width_multipliers[1]),
-            self.num_blocks_per_stage[1],
-            num_se_blocks=0,
-        )
-        self.stage3 = self._make_stage(
-            int(256 * self.width_multipliers[2]),
-            self.num_blocks_per_stage[2],
-            num_se_blocks=int(self.num_blocks_per_stage[2] // 2) if self.use_se else 0,
-        )
-        self.stage4 = self._make_stage(
-            int(512 * self.width_multipliers[3]),
-            self.num_blocks_per_stage[3],
-            num_se_blocks=self.num_blocks_per_stage[3] if self.use_se else 0,
-        )
-
-    def forward(self, inputs: Tensor) -> list[Tensor]:
-        outs = []
-        x = self.stage0(inputs)
-        outs.append(x)
-        x = self.stage1(x)
-        outs.append(x)
-        x = self.stage2(x)
-        outs.append(x)
-        x = self.stage3(x)
-        outs.append(x)
-        x = self.stage4(x)
-        outs.append(x)
-
-        return outs
-
-    def export_mode(self, export: bool = True) -> None:
-        """Sets the module to export mode.
-
-        Reparameterizes the model to obtain a plain CNN-like structure for inference.
-        TODO: add more details
-
-        @warning: The reparametrization is destructive and cannot be reversed!
-
-        @type export: bool
-        @param export: Whether to set the export mode to True or False. Defaults to True.
-        """
-        if export:
-            for module in self.modules():
-                if hasattr(module, "reparameterize"):
-                    module.reparameterize()
-
-    def _make_stage(self, planes: int, num_blocks: int, num_se_blocks: int):
-        """Build a stage of MobileOne model.
-
-        @type planes: int
-        @param planes: Number of output channels.
-        @type num_blocks: int
-        @param num_blocks: Number of blocks in this stage.
-        @type num_se_blocks: int
-        @param num_se_blocks: Number of SE blocks in this stage.
-        @rtype: nn.Sequential
-        @return: A stage of MobileOne model.
-        """
-        # Get strides for all layers
-        strides = [2] + [1] * (num_blocks - 1)
-        blocks = []
-        for ix, stride in enumerate(strides):
-            use_se = False
-            if num_se_blocks > num_blocks:
-                raise ValueError(
-                    "Number of SE blocks cannot " "exceed number of layers."
-                )
-            if ix >= (num_blocks - num_se_blocks):
-                use_se = True
-
-            # Depthwise conv
-            blocks.append(
-                MobileOneBlock(
-                    in_channels=self.in_planes,
-                    out_channels=self.in_planes,
-                    kernel_size=3,
-                    stride=stride,
-                    padding=1,
-                    groups=self.in_planes,
-                    use_se=use_se,
-                    num_conv_branches=self.num_conv_branches,
-                )
-            )
-            # Pointwise conv
-            blocks.append(
-                MobileOneBlock(
-                    in_channels=self.in_planes,
-                    out_channels=planes,
-                    kernel_size=1,
-                    stride=1,
-                    padding=0,
-                    groups=1,
-                    use_se=use_se,
-                    num_conv_branches=self.num_conv_branches,
-                )
-            )
-            self.in_planes = planes
-            self.cur_layer_idx += 1
-        return nn.Sequential(*blocks)
-
-
 class MobileOneBlock(nn.Module):
     """MobileOne building block.
 
@@ -186,7 +28,7 @@ def __init__(
         padding: int = 0,
         groups: int = 1,
         use_se: bool = False,
-        num_conv_branches: int = 1,
+        n_conv_branches: int = 1,
     ):
         """Construct a MobileOneBlock module.
 
@@ -205,9 +47,11 @@ def __init__(
         @type groups: int
         @param groups: Group number. Defaults to 1.
         @type use_se: bool
-        @param use_se: Whether to use SE-ReLU activations. Defaults to False.
-        @type num_conv_branches: int
-        @param num_conv_branches: Number of linear conv branches. Defaults to 1.
+        @param use_se: Whether to use SE-ReLU activations. Defaults to
+            False.
+        @type n_conv_branches: int
+        @param n_conv_branches: Number of linear conv branches. Defaults
+            to 1.
         """
         super().__init__()
 
@@ -216,17 +60,17 @@ def __init__(
         self.kernel_size = kernel_size
         self.in_channels = in_channels
         self.out_channels = out_channels
-        self.num_conv_branches = num_conv_branches
+        self.n_conv_branches = n_conv_branches
         self.inference_mode = False
 
-        # Check if SE-ReLU is requested
+        self.se: nn.Module
         if use_se:
             self.se = SqueezeExciteBlock(
                 in_channels=out_channels,
                 intermediate_channels=int(out_channels * 0.0625),
             )
         else:
-            self.se = nn.Identity()  # type: ignore
+            self.se = nn.Identity()
         self.activation = nn.ReLU()
 
         # Re-parameterizable skip connection
@@ -237,8 +81,8 @@ def __init__(
         )
 
         # Re-parameterizable conv branches
-        rbr_conv = list()
-        for _ in range(self.num_conv_branches):
+        rbr_conv: list[nn.Module] = []
+        for _ in range(self.n_conv_branches):
             rbr_conv.append(
                 ConvModule(
                     in_channels=self.in_channels,
@@ -265,9 +109,9 @@ def __init__(
                 activation=nn.Identity(),
             )
 
-    def forward(self, inputs: Tensor):
+    def forward(self, inputs: Tensor) -> Tensor:
         """Apply forward pass."""
-        # Inference mode forward pass.
+
         if self.inference_mode:
             return self.activation(self.se(self.reparam_conv(inputs)))
 
@@ -284,7 +128,7 @@ def forward(self, inputs: Tensor):
 
         # Other branches
         out = scale_out + identity_out
-        for ix in range(self.num_conv_branches):
+        for ix in range(self.n_conv_branches):
             out += self.rbr_conv[ix](inputs)
 
         return self.activation(self.se(out))
@@ -315,10 +159,10 @@ def reparameterize(self):
         # Delete un-used branches
         for para in self.parameters():
             para.detach_()
-        self.__delattr__("rbr_conv")
-        self.__delattr__("rbr_scale")
+        del self.rbr_conv
+        del self.rbr_scale
         if hasattr(self, "rbr_skip"):
-            self.__delattr__("rbr_skip")
+            del self.rbr_skip
 
         self.inference_mode = True
 
@@ -336,18 +180,22 @@ def _get_kernel_bias(self) -> tuple[Tensor, Tensor]:
             kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale)
             # Pad scale branch kernel to match conv branch kernel size.
             pad = self.kernel_size // 2
-            kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad])
+            kernel_scale = torch.nn.functional.pad(
+                kernel_scale, [pad, pad, pad, pad]
+            )
 
         # get weights and bias of skip branch
         kernel_identity = torch.zeros(())
         bias_identity = torch.zeros(())
         if self.rbr_skip is not None:
-            kernel_identity, bias_identity = self._fuse_bn_tensor(self.rbr_skip)
+            kernel_identity, bias_identity = self._fuse_bn_tensor(
+                self.rbr_skip
+            )
 
         # get weights and bias of conv branches
         kernel_conv = torch.zeros(())
         bias_conv = torch.zeros(())
-        for ix in range(self.num_conv_branches):
+        for ix in range(self.n_conv_branches):
             _kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix])
             kernel_conv = kernel_conv + _kernel
             bias_conv = bias_conv + _bias
@@ -356,7 +204,7 @@ def _get_kernel_bias(self) -> tuple[Tensor, Tensor]:
         bias_final = bias_conv + bias_scale + bias_identity
         return kernel_final, bias_final
 
-    def _fuse_bn_tensor(self, branch) -> tuple[Tensor, Tensor]:
+    def _fuse_bn_tensor(self, branch: nn.Module) -> tuple[Tensor, Tensor]:
         """Method to fuse batchnorm layer with preceeding conv layer.
         Reference: U{https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95}
 
@@ -374,13 +222,21 @@ def _fuse_bn_tensor(self, branch) -> tuple[Tensor, Tensor]:
             if not hasattr(self, "id_tensor"):
                 input_dim = self.in_channels // self.groups
                 kernel_value = torch.zeros(
-                    (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
+                    (
+                        self.in_channels,
+                        input_dim,
+                        self.kernel_size,
+                        self.kernel_size,
+                    ),
                     dtype=branch.weight.dtype,
                     device=branch.weight.device,
                 )
                 for i in range(self.in_channels):
                     kernel_value[
-                        i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
+                        i,
+                        i % input_dim,
+                        self.kernel_size // 2,
+                        self.kernel_size // 2,
                     ] = 1
                 self.id_tensor = kernel_value
             kernel = self.id_tensor
diff --git a/luxonis_train/nodes/backbones/mobileone/mobileone.py b/luxonis_train/nodes/backbones/mobileone/mobileone.py
new file mode 100644
index 00000000..8180f960
--- /dev/null
+++ b/luxonis_train/nodes/backbones/mobileone/mobileone.py
@@ -0,0 +1,197 @@
+"""MobileOne backbone.
+
+Source: U{<https://github.com/apple/ml-mobileone>}
+@license: U{Apple<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
+"""
+
+import logging
+from typing import Any, Literal
+
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+
+from .blocks import MobileOneBlock
+from .variants import get_variant
+
+logger = logging.getLogger(__name__)
+
+
+class MobileOne(BaseNode[Tensor, list[Tensor]]):
+    in_channels: int
+
+    def __init__(
+        self,
+        variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0",
+        width_multipliers: tuple[float, float, float, float] | None = None,
+        n_conv_branches: int | None = None,
+        use_se: bool | None = None,
+        **kwargs: Any,
+    ):
+        """MobileOne: An efficient CNN backbone for mobile devices.
+
+        The architecture focuses on reducing memory access costs and improving parallelism
+        while allowing aggressive parameter scaling for better representation capacity.
+        Different variants (S0-S4) offer various accuracy-latency tradeoffs.
+
+        Key features:
+            - Designed for low latency on mobile while maintaining high accuracy
+            - Uses re-parameterizable branches during training that get folded at inference
+            - Employs trivial over-parameterization branches for improved accuracy
+            - Simple feed-forward structure at inference with no branches/skip connections
+            - Variants achieve <1ms inference time on iPhone 12 with up to 75.9% top-1 ImageNet accuracy
+            - Outperforms other efficient architectures like MobileNets on image classification,
+              object detection and semantic segmentation tasks
+            - Uses only basic operators available across platforms (no custom activations)
+
+
+        Reference: U{MobileOne: An Improved One millisecond Mobile Backbone
+        <https://arxiv.org/abs/2206.04040>}
+
+        @type variant: Literal["s0", "s1", "s2", "s3", "s4"]
+        @param variant: Specifies which variant of the MobileOne network to use. Defaults to "s0".
+            Each variant specifies a predefined set of values for:
+                - width multipliers - A tuple of 4 float values specifying the width multipliers for each stage of the network. If the use of SE blocks is disabled, the last two values are ignored.
+                - number of convolution branches - An integer specifying the number of linear convolution branches in MobileOne block.
+                - use of SE blocks - A boolean specifying whether to use SE blocks in the network.
+
+            The variants are as follows:
+                - s0 (default): width_multipliers=(0.75, 1.0, 1.0, 2.0), n_conv_branches=4, use_se=False
+                - s1: width_multipliers=(1.5, 1.5, 2.0, 2.5), n_conv_branches=1, use_se=False
+                - s2: width_multipliers=(1.5, 2.0, 2.5, 4.0), n_conv_branches=1, use_se=False
+                - s3: width_multipliers=(2.0, 2.5, 3.0, 4.0), n_conv_branches=1, use_se=False
+                - s4: width_multipliers=(3.0, 3.5, 3.5, 4.0), n_conv_branches=1, use_se=True
+
+        @type width_multipliers: tuple[float, float, float, float] | None
+        @param width_multipliers: Width multipliers for each stage. If provided, overrides the variant values.
+        @type n_conv_branches: int | None
+        @param n_conv_branches: Number of linear convolution branches in MobileOne block. If provided, overrides the variant values.
+        @type use_se: bool | None
+        @param use_se: Whether to use SE blocks in the network. If provided, overrides the variant value.
+        """
+        super().__init__(**kwargs)
+
+        var = get_variant(variant)
+
+        width_multipliers = width_multipliers or var.width_multipliers
+        use_se = use_se or var.use_se
+        self.n_blocks_per_stage = [2, 8, 10, 1]
+        self.n_conv_branches = n_conv_branches or var.n_conv_branches
+
+        self.in_planes = min(64, int(64 * width_multipliers[0]))
+
+        self.stage0 = MobileOneBlock(
+            in_channels=self.in_channels,
+            out_channels=self.in_planes,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+        )
+        self.cur_layer_idx = 1
+        self.stage1 = self._make_stage(
+            int(64 * width_multipliers[0]),
+            self.n_blocks_per_stage[0],
+            n_se_blocks=0,
+        )
+        self.stage2 = self._make_stage(
+            int(128 * width_multipliers[1]),
+            self.n_blocks_per_stage[1],
+            n_se_blocks=0,
+        )
+        self.stage3 = self._make_stage(
+            int(256 * width_multipliers[2]),
+            self.n_blocks_per_stage[2],
+            n_se_blocks=self.n_blocks_per_stage[2] // 2 if use_se else 0,
+        )
+        self.stage4 = self._make_stage(
+            int(512 * width_multipliers[3]),
+            self.n_blocks_per_stage[3],
+            n_se_blocks=self.n_blocks_per_stage[3] if use_se else 0,
+        )
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
+        x = self.stage0(inputs)
+        outs.append(x)
+        x = self.stage1(x)
+        outs.append(x)
+        x = self.stage2(x)
+        outs.append(x)
+        x = self.stage3(x)
+        outs.append(x)
+        x = self.stage4(x)
+        outs.append(x)
+
+        return outs
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Sets the module to export mode.
+
+        Reparameterizes the model to obtain a plain CNN-like structure for inference.
+        TODO: add more details
+
+        @warning: The reparametrization is destructive and cannot be reversed!
+
+        @type export: bool
+        @param export: Whether to set the export mode to True or False. Defaults to True.
+        """
+        super().set_export_mode(mode)
+        if self.export:
+            logger.info("Reparametrizing 'MobileOne'.")
+            for module in self.modules():
+                if hasattr(module, "reparameterize"):
+                    module.reparameterize()
+
+    def _make_stage(self, planes: int, n_blocks: int, n_se_blocks: int):
+        """Build a stage of MobileOne model.
+
+        @type planes: int
+        @param planes: Number of output channels.
+        @type n_blocks: int
+        @param n_blocks: Number of blocks in this stage.
+        @type n_se_blocks: int
+        @param n_se_blocks: Number of SE blocks in this stage.
+        @rtype: nn.Sequential
+        @return: A stage of MobileOne model.
+        """
+        # Get strides for all layers
+        strides = [2] + [1] * (n_blocks - 1)
+        blocks: list[nn.Module] = []
+        for ix, stride in enumerate(strides):
+            use_se = False
+            if n_se_blocks > n_blocks:
+                raise ValueError(
+                    "Number of SE blocks cannot " "exceed number of layers."
+                )
+            if ix >= (n_blocks - n_se_blocks):
+                use_se = True
+
+            # Depthwise conv
+            blocks.append(
+                MobileOneBlock(
+                    in_channels=self.in_planes,
+                    out_channels=self.in_planes,
+                    kernel_size=3,
+                    stride=stride,
+                    padding=1,
+                    groups=self.in_planes,
+                    use_se=use_se,
+                    n_conv_branches=self.n_conv_branches,
+                )
+            )
+            # Pointwise conv
+            blocks.append(
+                MobileOneBlock(
+                    in_channels=self.in_planes,
+                    out_channels=planes,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0,
+                    groups=1,
+                    use_se=use_se,
+                    n_conv_branches=self.n_conv_branches,
+                )
+            )
+            self.in_planes = planes
+            self.cur_layer_idx += 1
+        return nn.Sequential(*blocks)
diff --git a/luxonis_train/nodes/backbones/mobileone/variants.py b/luxonis_train/nodes/backbones/mobileone/variants.py
new file mode 100644
index 00000000..fbb0add3
--- /dev/null
+++ b/luxonis_train/nodes/backbones/mobileone/variants.py
@@ -0,0 +1,39 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class MobileOneVariant(BaseModel):
+    width_multipliers: tuple[float, float, float, float]
+    n_conv_branches: int = 1
+    use_se: bool = False
+
+
+def get_variant(
+    variant: Literal["s0", "s1", "s2", "s3", "s4"],
+) -> MobileOneVariant:
+    variants = {
+        "s0": MobileOneVariant(
+            width_multipliers=(0.75, 1.0, 1.0, 2.0),
+            n_conv_branches=4,
+        ),
+        "s1": MobileOneVariant(
+            width_multipliers=(1.5, 1.5, 2.0, 2.5),
+        ),
+        "s2": MobileOneVariant(
+            width_multipliers=(1.5, 2.0, 2.5, 4.0),
+        ),
+        "s3": MobileOneVariant(
+            width_multipliers=(2.0, 2.5, 3.0, 4.0),
+        ),
+        "s4": MobileOneVariant(
+            width_multipliers=(3.0, 3.5, 3.5, 4.0),
+            use_se=True,
+        ),
+    }
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            "MobileOne model variant should be in "
+            f"{list(variants.keys())}, got {variant}."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/repvgg.py b/luxonis_train/nodes/backbones/repvgg.py
deleted file mode 100644
index c536c78e..00000000
--- a/luxonis_train/nodes/backbones/repvgg.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import logging
-from typing import Literal
-
-import torch.utils.checkpoint as checkpoint
-from torch import Tensor, nn
-
-from luxonis_train.nodes.blocks import RepVGGBlock
-
-from ..base_node import BaseNode
-
-logger = logging.getLogger(__name__)
-
-
-class RepVGG(BaseNode):
-    """Implementation of RepVGG backbone.
-
-    Source: U{https://github.com/DingXiaoH/RepVGG}
-    @license: U{MIT<https://github.com/DingXiaoH/RepVGG/blob/main/LICENSE>}.
-
-    @todo: technical documentation
-    """
-
-    in_channels: int
-    attach_index: int = -1
-
-    VARIANTS_SETTINGS = {
-        "A0": {
-            "num_blocks": [2, 4, 14, 1],
-            "width_multiplier": [0.75, 0.75, 0.75, 2.5],
-        },
-        "A1": {
-            "num_blocks": [2, 4, 14, 1],
-            "width_multiplier": [1, 1, 1, 2.5],
-        },
-        "A2": {
-            "num_blocks": [2, 4, 14, 1],
-            "width_multiplier": [1.5, 1.5, 1.5, 2.75],
-        },
-    }
-
-    def __init__(
-        self,
-        variant: Literal["A0", "A1", "A2"] = "A0",
-        num_blocks: list[int] | None = None,
-        width_multiplier: list[float] | None = None,
-        override_groups_map: dict[int, int] | None = None,
-        use_se: bool = False,
-        use_checkpoint: bool = False,
-        **kwargs,
-    ):
-        """Constructor for the RepVGG module.
-
-        @type variant: Literal["A0", "A1", "A2"]
-        @param variant: RepVGG model variant. Defaults to "A0".
-        @type override_groups_map: dict[int, int] | None
-        @param override_groups_map: Dictionary mapping layer index to number of groups.
-        @type use_se: bool
-        @param use_se: Whether to use Squeeze-and-Excitation blocks.
-        @type use_checkpoint: bool
-        @param use_checkpoint: Whether to use checkpointing.
-        @type num_blocks: list[int] | None
-        @param num_blocks: Number of blocks in each stage.
-        @type width_multiplier: list[float] | None
-        @param width_multiplier: Width multiplier for each stage.
-        """
-        super().__init__(**kwargs)
-        if variant not in self.VARIANTS_SETTINGS.keys():
-            raise ValueError(
-                f"RepVGG model variant should be one of "
-                f"{list(self.VARIANTS_SETTINGS.keys())}."
-            )
-
-        num_blocks = num_blocks or self.VARIANTS_SETTINGS[variant]["num_blocks"]
-        width_multiplier = (
-            width_multiplier or self.VARIANTS_SETTINGS[variant]["width_multiplier"]
-        )
-        self.override_groups_map = override_groups_map or {}
-        assert 0 not in self.override_groups_map
-        self.use_se = use_se
-        self.use_checkpoint = use_checkpoint
-
-        self.in_planes = min(64, int(64 * width_multiplier[0]))
-        self.stage0 = RepVGGBlock(
-            in_channels=self.in_channels,
-            out_channels=self.in_planes,
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            use_se=self.use_se,
-        )
-        self.cur_layer_idx = 1
-        self.stage1 = self._make_stage(
-            int(64 * width_multiplier[0]), num_blocks[0], stride=2
-        )
-        self.stage2 = self._make_stage(
-            int(128 * width_multiplier[1]), num_blocks[1], stride=2
-        )
-        self.stage3 = self._make_stage(
-            int(256 * width_multiplier[2]), num_blocks[2], stride=2
-        )
-        self.stage4 = self._make_stage(
-            int(512 * width_multiplier[3]), num_blocks[3], stride=2
-        )
-        self.gap = nn.AdaptiveAvgPool2d(output_size=1)
-
-    def forward(self, inputs: Tensor) -> list[Tensor]:
-        outputs = []
-        out = self.stage0(inputs)
-        for stage in (self.stage1, self.stage2, self.stage3, self.stage4):
-            for block in stage:
-                if self.use_checkpoint:
-                    out = checkpoint.checkpoint(block, out)
-                else:
-                    out = block(out)
-            outputs.append(out)
-        return outputs
-
-    def _make_stage(self, planes: int, num_blocks: int, stride: int):
-        strides = [stride] + [1] * (num_blocks - 1)
-        blocks = []
-        for stride in strides:
-            cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
-            blocks.append(
-                RepVGGBlock(
-                    in_channels=self.in_planes,
-                    out_channels=planes,
-                    kernel_size=3,
-                    stride=stride,
-                    padding=1,
-                    groups=cur_groups,
-                    use_se=self.use_se,
-                )
-            )
-            self.in_planes = planes
-            self.cur_layer_idx += 1
-        return nn.ModuleList(blocks)
-
-    def set_export_mode(self, mode: bool = True) -> None:
-        """Reparametrizes instances of L{RepVGGBlock} in the network.
-
-        @type mode: bool
-        @param mode: Whether to set the export mode. Defaults to C{True}.
-        """
-        super().set_export_mode(mode)
-        if self.export:
-            logger.info("Reparametrizing RepVGG.")
-            for module in self.modules():
-                if isinstance(module, RepVGGBlock):
-                    module.reparametrize()
diff --git a/luxonis_train/nodes/backbones/repvgg/__init__.py b/luxonis_train/nodes/backbones/repvgg/__init__.py
new file mode 100644
index 00000000..61a5a4fc
--- /dev/null
+++ b/luxonis_train/nodes/backbones/repvgg/__init__.py
@@ -0,0 +1,3 @@
+from .repvgg import RepVGG
+
+__all__ = ["RepVGG"]
diff --git a/luxonis_train/nodes/backbones/repvgg/repvgg.py b/luxonis_train/nodes/backbones/repvgg/repvgg.py
new file mode 100644
index 00000000..fd8a5e67
--- /dev/null
+++ b/luxonis_train/nodes/backbones/repvgg/repvgg.py
@@ -0,0 +1,135 @@
+import logging
+from collections import defaultdict
+from typing import Any, Literal
+
+import torch.utils.checkpoint as checkpoint
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import RepVGGBlock
+
+from .variants import get_variant
+
+logger = logging.getLogger(__name__)
+
+
+class RepVGG(BaseNode[Tensor, list[Tensor]]):
+    in_channels: int
+    attach_index: int = -1
+
+    def __init__(
+        self,
+        variant: Literal["A0", "A1", "A2"] = "A0",
+        n_blocks: tuple[int, int, int, int] | None = None,
+        width_multiplier: tuple[float, float, float, float] | None = None,
+        override_groups_map: dict[int, int] | None = None,
+        use_se: bool = False,
+        use_checkpoint: bool = False,
+        **kwargs: Any,
+    ):
+        """RepVGG backbone.
+
+        RepVGG is a VGG-style convolutional architecture.
+
+            - Simple feed-forward topology without any branching.
+            - 3x3 convolutions and ReLU activations.
+            - No automatic search, manual refinement or compound scaling.
+
+        @license: U{MIT
+            <https://github.com/DingXiaoH/RepVGG/blob/main/LICENSE>}.
+
+        @see: U{https://github.com/DingXiaoH/RepVGG}
+        @see: U{https://paperswithcode.com/method/repvgg}
+        @see: U{RepVGG: Making VGG-style ConvNets Great Again
+            <https://arxiv.org/abs/2101.03697>}
+
+
+        @type variant: Literal["A0", "A1", "A2"]
+        @param variant: RepVGG model variant. Defaults to "A0".
+        @type override_groups_map: dict[int, int] | None
+        @param override_groups_map: Dictionary mapping layer index to number of groups. The layers are indexed starting from 0.
+        @type use_se: bool
+        @param use_se: Whether to use Squeeze-and-Excitation blocks.
+        @type use_checkpoint: bool
+        @param use_checkpoint: Whether to use checkpointing.
+        @type n_blocks: tuple[int, int, int, int] | None
+        @param n_blocks: Number of blocks in each stage.
+        @type width_multiplier: tuple[float, float, float, float] | None
+        @param width_multiplier: Width multiplier for each stage.
+        """
+        super().__init__(**kwargs)
+        var = get_variant(variant)
+
+        n_blocks = n_blocks or var.n_blocks
+        width_multiplier = width_multiplier or var.width_multiplier
+        override_groups_map = defaultdict(lambda: 1, override_groups_map or {})
+        self.use_se = use_se
+        self.use_checkpoint = use_checkpoint
+
+        self.in_planes = min(64, int(64 * width_multiplier[0]))
+        self.stage0 = RepVGGBlock(
+            in_channels=self.in_channels,
+            out_channels=self.in_planes,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            use_se=self.use_se,
+        )
+        self.blocks = nn.ModuleList(
+            [
+                block
+                for i in range(4)
+                for block in self._make_stage(
+                    int(2**i * 64 * width_multiplier[i]),
+                    n_blocks[i],
+                    stride=2,
+                    groups=override_groups_map[i],
+                )
+            ]
+        )
+        self.gap = nn.AdaptiveAvgPool2d(output_size=1)
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outputs: list[Tensor] = []
+        out = self.stage0(inputs)
+        for block in self.blocks:
+            if self.use_checkpoint:
+                out = checkpoint.checkpoint(block, out)
+            else:
+                out = block(out)
+            outputs.append(out)  # type: ignore
+        return outputs
+
+    def _make_stage(
+        self, channels: int, n_blocks: int, stride: int, groups: int
+    ) -> nn.ModuleList:
+        strides = [stride] + [1] * (n_blocks - 1)
+        blocks: list[nn.Module] = []
+        for stride in strides:
+            blocks.append(
+                RepVGGBlock(
+                    in_channels=self.in_planes,
+                    out_channels=channels,
+                    kernel_size=3,
+                    stride=stride,
+                    padding=1,
+                    groups=groups,
+                    use_se=self.use_se,
+                )
+            )
+            self.in_planes = channels
+        return nn.ModuleList(blocks)
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Reparametrizes instances of L{RepVGGBlock} in the network.
+
+        @type mode: bool
+        @param mode: Whether to set the export mode. Defaults to
+            C{True}.
+        """
+        super().set_export_mode(mode)
+        if self.export:
+            logger.info("Reparametrizing RepVGG.")
+            for module in self.modules():
+                if isinstance(module, RepVGGBlock):
+                    module.reparametrize()
diff --git a/luxonis_train/nodes/backbones/repvgg/variants.py b/luxonis_train/nodes/backbones/repvgg/variants.py
new file mode 100644
index 00000000..a5c734b5
--- /dev/null
+++ b/luxonis_train/nodes/backbones/repvgg/variants.py
@@ -0,0 +1,31 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class RepVGGVariant(BaseModel):
+    n_blocks: tuple[int, int, int, int]
+    width_multiplier: tuple[float, float, float, float]
+
+
+def get_variant(variant: Literal["A0", "A1", "A2"]) -> RepVGGVariant:
+    variants = {
+        "A0": RepVGGVariant(
+            n_blocks=(2, 4, 14, 1),
+            width_multiplier=(0.75, 0.75, 0.75, 2.5),
+        ),
+        "A1": RepVGGVariant(
+            n_blocks=(2, 4, 14, 1),
+            width_multiplier=(1, 1, 1, 2.5),
+        ),
+        "A2": RepVGGVariant(
+            n_blocks=(2, 4, 14, 1),
+            width_multiplier=(1.5, 1.5, 1.5, 2.75),
+        ),
+    }
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            f"RepVGG variant should be one of "
+            f"{list(variants.keys())}, got '{variant}'."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/backbones/resnet.py b/luxonis_train/nodes/backbones/resnet.py
index e4228410..93a13d4a 100644
--- a/luxonis_train/nodes/backbones/resnet.py
+++ b/luxonis_train/nodes/backbones/resnet.py
@@ -1,55 +1,98 @@
-"""ResNet backbone.
-
-Source: U{https://pytorch.org/vision/main/models/resnet.html}
-@license: U{PyTorch<https://github.com/pytorch/pytorch/blob/master/LICENSE>}
-"""
-from typing import Literal
+from typing import Any, Literal
 
 import torchvision
-from torch import Tensor, nn
+from torch import Tensor
+from torchvision.models import ResNet as TorchResNet
 
-from ..base_node import BaseNode
+from luxonis_train.nodes.base_node import BaseNode
 
 
 class ResNet(BaseNode[Tensor, list[Tensor]]):
     def __init__(
         self,
         variant: Literal["18", "34", "50", "101", "152"] = "18",
-        channels_list: list[int] | None = None,
         download_weights: bool = False,
-        **kwargs,
+        zero_init_residual: bool = False,
+        groups: int = 1,
+        width_per_group: int = 64,
+        replace_stride_with_dilation: tuple[bool, bool, bool] = (
+            False,
+            False,
+            False,
+        ),
+        **kwargs: Any,
     ):
-        """Implementation of the ResNetX backbone.
+        """ResNet backbone.
+
+        Implements the backbone of a ResNet (Residual Network) architecture.
+
+        ResNet is designed to address the vanishing gradient problem in deep neural networks
+        by introducing skip connections. These connections allow the network to learn
+        residual functions with reference to the layer inputs, enabling training of much
+        deeper networks.
+
+        This backbone can be used as a feature extractor for various computer vision tasks
+        such as image classification, object detection, and semantic segmentation. It
+        provides a robust set of features that can be fine-tuned for specific applications.
 
-        TODO: add more info
+        The architecture consists of stacked residual blocks, each containing convolutional
+        layers, batch normalization, and ReLU activations. The skip connections can be
+        either identity mappings or projections, depending on the block type.
 
+        Source: U{https://pytorch.org/vision/main/models/resnet.html}
+
+        @license: U{PyTorch<https://github.com/pytorch/pytorch/blob/master/LICENSE>}
+
+        @param variant: ResNet variant, determining the depth and structure of the network. Options are:
+            - "18": 18 layers, uses basic blocks, smaller model suitable for simpler tasks.
+            - "34": 34 layers, uses basic blocks, good balance of depth and computation.
+            - "50": 50 layers, introduces bottleneck blocks, deeper feature extraction.
+            - "101": 101 layers, uses bottleneck blocks, high capacity for complex tasks.
+            - "152": 152 layers, deepest variant, highest capacity but most computationally intensive.
+            The number in each variant represents the total number of weighted layers.
+            Deeper networks generally offer higher accuracy but require more computation.
         @type variant: Literal["18", "34", "50", "101", "152"]
-        @param variant: ResNet variant. Defaults to "18".
-        @type channels_list: list[int] | None
-        @param channels_list: List of channels to return.
-            If unset, defaults to [64, 128, 256, 512].
+        @default variant: "18"
 
         @type download_weights: bool
-        @param download_weights: If True download weights from imagenet.
+        @param download_weights: If True download weights trained on imagenet.
             Defaults to False.
+        @type zero_init_residual: bool
+        @param zero_init_residual: Zero-initialize the last BN in each residual branch,
+            so that the residual branch starts with zeros, and each residual block behaves like an identity.
+            This improves the model by 0.2~0.3% according to U{Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour <https://arxiv.org/abs/1706.02677>}. Defaults to C{False}.
+
+        @type groups: int
+        @param groups: Number of groups for each block.
+            Defaults to 1. Can be set to a different value only
+            for ResNet-50, ResNet-101, and ResNet-152.
+            The width of the convolutional blocks is computed as
+            C{int(in_channels * (width_per_group / 64.0)) * groups}
+
+        @type width_per_group: int
+        @param width_per_group: Number of channels per group.
+            Defaults to 64. Can be set to a different value only
+            for ResNet-50, ResNet-101, and ResNet-152.
+            The width of the convolutional blocks is computed as
+            C{int(in_channels * (width_per_group / 64.0)) * groups}
+
+        @type replace_stride_with_dilation: tuple[bool, bool, bool]
+        @param replace_stride_with_dilation: Tuple of booleans where each
+            indicates if the 2x2 strides should be replaced with a dilated convolution instead.
+            Defaults to (False, False, False). Can be set to a different value only for ResNet-50, ResNet-101, and ResNet-152.
         """
         super().__init__(**kwargs)
-
-        if variant not in RESNET_VARIANTS:
-            raise ValueError(
-                f"ResNet model variant should be in {list(RESNET_VARIANTS.keys())}"
-            )
-
-        self.backbone = RESNET_VARIANTS[variant](
-            weights="DEFAULT" if download_weights else None
+        self.backbone = self._get_backbone(
+            variant,
+            weights="DEFAULT" if download_weights else None,
+            zero_init_residual=zero_init_residual,
+            groups=groups,
+            width_per_group=width_per_group,
+            replace_stride_with_dilation=replace_stride_with_dilation,
         )
 
-        self.backbone.fc = nn.Identity()
-
-        self.channels_list = channels_list or [64, 128, 256, 512]
-
     def forward(self, inputs: Tensor) -> list[Tensor]:
-        outs = []
+        outs: list[Tensor] = []
         x = self.backbone.conv1(inputs)
         x = self.backbone.bn1(x)
         x = self.backbone.relu(x)
@@ -66,11 +109,20 @@ def forward(self, inputs: Tensor) -> list[Tensor]:
 
         return outs
 
-
-RESNET_VARIANTS = {
-    "18": torchvision.models.resnet18,
-    "34": torchvision.models.resnet34,
-    "50": torchvision.models.resnet50,
-    "101": torchvision.models.resnet101,
-    "152": torchvision.models.resnet152,
-}
+    @staticmethod
+    def _get_backbone(
+        variant: Literal["18", "34", "50", "101", "152"], **kwargs: Any
+    ) -> TorchResNet:
+        variants = {
+            "18": torchvision.models.resnet18,
+            "34": torchvision.models.resnet34,
+            "50": torchvision.models.resnet50,
+            "101": torchvision.models.resnet101,
+            "152": torchvision.models.resnet152,
+        }
+        if variant not in variants:
+            raise ValueError(
+                "ResNet model variant should be in "
+                f"{list(variants.keys())}, got {variant}."
+            )
+        return variants[variant](**kwargs)
diff --git a/luxonis_train/nodes/backbones/rexnetv1.py b/luxonis_train/nodes/backbones/rexnetv1.py
index 6d23857e..6567586a 100644
--- a/luxonis_train/nodes/backbones/rexnetv1.py
+++ b/luxonis_train/nodes/backbones/rexnetv1.py
@@ -1,15 +1,11 @@
-"""Implementation of the ReXNetV1 backbone.
-
-Source: U{https://github.com/clovaai/rexnet}
-@license: U{MIT<https://github.com/clovaai/rexnet/blob/master/LICENSE>}
-"""
+from typing import Any
 
 import torch
 from torch import Tensor, nn
 
 from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import ConvModule
-from luxonis_train.utils.general import make_divisible
+from luxonis_train.utils import make_divisible
 
 
 class ReXNetV1_lite(BaseNode[Tensor, list[Tensor]]):
@@ -21,10 +17,33 @@ def __init__(
         final_ch: int = 164,
         multiplier: float = 1.0,
         kernel_sizes: int | list[int] = 3,
-        **kwargs,
+        out_indices: list[int] | None = None,
+        **kwargs: Any,
     ):
-        """ReXNetV1_lite backbone.
+        """ReXNetV1 (Rank Expansion Networks) backbone, lite version.
+
+        ReXNet proposes a new approach to designing lightweight CNN architectures by:
+
+            - Studying proper channel dimension expansion at the layer level using rank analysis
+            - Searching for effective channel configurations across the entire network
+            - Parameterizing channel dimensions as a linear function of network depth
+
+        Key aspects:
+
+            - Uses inverted bottleneck blocks similar to MobileNetV2
+            - Employs a linear parameterization of channel dimensions across blocks
+            - Replaces ReLU6 with SiLU (Swish-1) activation in certain layers
+            - Incorporates Squeeze-and-Excitation modules
+
+        ReXNet achieves state-of-the-art performance among lightweight models on ImageNet
+        classification and transfers well to tasks like object detection and fine-grained classification.
 
+        Source: U{https://github.com/clovaai/rexnet}
+
+        @license: U{MIT
+            <https://github.com/clovaai/rexnet/blob/master/LICENSE>}
+        @copyright: 2021-present NAVER Corp.
+        @see U{Rethinking Channel Dimensions for Efficient Model Design <https://arxiv.org/abs/2007.00992>}
         @type fix_head_stem: bool
         @param fix_head_stem: Whether to multiply head stem. Defaults to False.
         @type divisible_value: int
@@ -37,40 +56,44 @@ def __init__(
         @param multiplier: Channel dimension multiplier. Defaults to 1.0.
         @type kernel_sizes: int | list[int]
         @param kernel_sizes: Kernel size for each block. Defaults to 3.
+        @param out_indices: list[int] | None
+        @param out_indices: Indices of the output layers. Defaults to [1, 4, 10, 17].
         """
         super().__init__(**kwargs)
 
-        self.out_indices = [1, 4, 10, 17]
-        self.channels = [16, 48, 112, 184]
         layers = [1, 2, 2, 3, 3, 5]
         strides = [1, 2, 2, 2, 1, 2]
 
+        self.n_convblocks = sum(layers)
+        self.out_indices = out_indices or [1, 4, 10, 17]
+
         kernel_sizes = (
-            [kernel_sizes] * 6 if isinstance(kernel_sizes, int) else kernel_sizes
+            [kernel_sizes] * 6
+            if isinstance(kernel_sizes, int)
+            else kernel_sizes
         )
 
-        strides = sum(
-            [
-                [element] + [1] * (layers[idx] - 1)
-                for idx, element in enumerate(strides)
-            ],
-            [],
-        )
+        strides = [
+            s if i == 0 else 1
+            for layer, s in zip(layers, strides)
+            for i in range(layer)
+        ]
         ts = [1] * layers[0] + [6] * sum(layers[1:])
-        kernel_sizes = sum(
-            [[element] * layers[idx] for idx, element in enumerate(kernel_sizes)], []
-        )
-        self.num_convblocks = sum(layers[:])
+        kernel_sizes = [
+            ks for ks, layer in zip(kernel_sizes, layers) for _ in range(layer)
+        ]
 
         features: list[nn.Module] = []
         inplanes = input_ch / multiplier if multiplier < 1.0 else input_ch
-        first_channel = 32 / multiplier if multiplier < 1.0 or fix_head_stem else 32
+        first_channel = (
+            32 / multiplier if multiplier < 1.0 or fix_head_stem else 32
+        )
         first_channel = make_divisible(
             int(round(first_channel * multiplier)), divisible_value
         )
 
-        in_channels_group = []
-        channels_group = []
+        in_channels_group: list[int] = []
+        channels_group: list[int] = []
 
         features.append(
             ConvModule(
@@ -83,7 +106,7 @@ def __init__(
             )
         )
 
-        for i in range(self.num_convblocks):
+        for i in range(self.n_convblocks):
             inplanes_divisible = make_divisible(
                 int(round(inplanes * multiplier)), divisible_value
             )
@@ -92,7 +115,7 @@ def __init__(
                 channels_group.append(inplanes_divisible)
             else:
                 in_channels_group.append(inplanes_divisible)
-                inplanes += final_ch / (self.num_convblocks - 1 * 1.0)
+                inplanes += final_ch / (self.n_convblocks - 1 * 1.0)
                 inplanes_divisible = make_divisible(
                     int(round(inplanes * multiplier)), divisible_value
                 )
@@ -100,7 +123,12 @@ def __init__(
 
         assert channels_group
         for in_c, c, t, k, s in zip(
-            in_channels_group, channels_group, ts, kernel_sizes, strides, strict=True
+            in_channels_group,
+            channels_group,
+            ts,
+            kernel_sizes,
+            strides,
+            strict=True,
         ):
             features.append(
                 LinearBottleneck(
@@ -109,7 +137,9 @@ def __init__(
             )
 
         pen_channels = (
-            int(1280 * multiplier) if multiplier > 1 and not fix_head_stem else 1280
+            int(1280 * multiplier)
+            if multiplier > 1 and not fix_head_stem
+            else 1280
         )
         features.append(
             ConvModule(
@@ -121,12 +151,12 @@ def __init__(
         )
         self.features = nn.Sequential(*features)
 
-    def forward(self, x: Tensor) -> list[Tensor]:
-        outs = []
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        outs: list[Tensor] = []
         for i, module in enumerate(self.features):
-            x = module(x)
+            inputs = module(inputs)
             if i in self.out_indices:
-                outs.append(x)
+                outs.append(inputs)
         return outs
 
 
@@ -138,14 +168,12 @@ def __init__(
         t: int,
         kernel_size: int = 3,
         stride: int = 1,
-        **kwargs,
     ):
-        super(LinearBottleneck, self).__init__(**kwargs)
-        self.conv_shortcut = None
+        super().__init__()
         self.use_shortcut = stride == 1 and in_channels <= channels
         self.in_channels = in_channels
         self.out_channels = channels
-        out = []
+        out: list[nn.Module] = []
         if t != 1:
             dw_channels = in_channels * t
             out.append(
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 9db45316..aad0b2f2 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -1,25 +1,26 @@
 import inspect
+import logging
 from abc import ABC, abstractmethod
+from contextlib import suppress
 from typing import Generic, TypeVar
 
+from luxonis_ml.data import LabelType
 from luxonis_ml.utils.registry import AutoRegisterMeta
-from pydantic import BaseModel, ValidationError
 from torch import Size, Tensor, nn
+from typeguard import TypeCheckError, check_type
 
-from luxonis_train.utils.general import DatasetMetadata, validate_packet
-from luxonis_train.utils.registry import NODES
-from luxonis_train.utils.types import (
+from luxonis_train.utils import (
     AttachIndexType,
-    FeaturesProtocol,
+    DatasetMetadata,
     IncompatibleException,
-    LabelType,
     Packet,
 )
+from luxonis_train.utils.registry import NODES
 
 ForwardOutputT = TypeVar("ForwardOutputT")
 ForwardInputT = TypeVar("ForwardInputT")
 
-__all__ = ["BaseNode"]
+logger = logging.getLogger(__name__)
 
 
 class BaseNode(
@@ -41,13 +42,10 @@ class BaseNode(
     of lists of tensors. Each key in the dictionary represents a different output
     from the previous node. Input to the node is a list of L{Packet}s, output is a single L{Packet}.
 
-    Each node can define a list of L{BaseProtocol}s that the inputs must conform to.
-    L{BaseProtocol} is a pydantic model that defines the structure of the input.
-    When the node is called, the inputs are validated against the protocols and
-    then sent to the L{unwrap} method. The C{unwrap} method should return a valid
-    input to the L{forward} method. Outputs of the C{forward} method are then
-    send to L{weap} method, which wraps the output into a C{Packet}, which is the
-    output of the node.
+    When the node is called, the inputs are sent to the L{unwrap} method.
+    The C{unwrap} method should return a valid input to the L{forward} method.
+    Outputs of the C{forward} method are then send to L{wrap} method,
+    which wraps the output into a C{Packet}. The wrapped C{Packet} is the final output of the node.
 
     The L{run} method combines the C{unwrap}, C{forward} and C{wrap} methods
     together with input validation.
@@ -55,13 +53,12 @@ class BaseNode(
     When subclassing, the following methods should be implemented:
         - L{forward}: Forward pass of the module.
         - L{unwrap}: Optional. Unwraps the inputs from the input packet.
-          The default implementation expects a single input with `features` key.
+            The default implementation expects a single input with `features` key.
         - L{wrap}: Optional. Wraps the output of the forward pass
-          into a `Packet[Tensor]`. The default implementation expects wraps the output
-          of the forward pass into a packet with either "features" or the task name as the key.
+            into a `Packet[Tensor]`. The default implementation expects wraps the output
+            of the forward pass into a packet with either "features" or the task name as the key.
 
     Additionally, the following class attributes can be defined:
-        - L{input_protocols}: List of input protocols used to validate inputs to the node.
         - L{attach_index}: Index of previous output that this node attaches to.
         - L{tasks}: Dictionary of tasks that the node supports.
 
@@ -94,32 +91,6 @@ def wrap(output: Tensor) -> Packet[Tensor]:
                 # by the attached modules.
                 return {"classification": [output]}
 
-    @type input_shapes: list[Packet[Size]] | None
-    @param input_shapes: List of input shapes for the module.
-
-    @type original_in_shape: Size | None
-    @param original_in_shape: Original input shape of the model. Some
-        nodes won't function if not provided.
-
-    @type dataset_metadata: L{DatasetMetadata} | None
-    @param dataset_metadata: Metadata of the dataset.
-        Some nodes won't function if not provided.
-
-    @type n_classes: int | None
-    @param n_classes: Number of classes in the dataset. Provide only
-        in case `dataset_metadata` is not provided. Defaults to None.
-
-    @type in_sizes: Size | list[Size] | None
-    @param in_sizes: List of input sizes for the node.
-        Provide only in case the `input_shapes` were not provided.
-
-    @type _tasks: dict[LabelType, str] | None
-    @param _tasks: Dictionary of tasks that the node supports. Overrides the
-        class L{tasks} attribute. Shouldn't be provided by the user in most cases.
-
-    @type input_protocols: list[type[BaseModel]]
-    @ivar input_protocols: List of input protocols used to validate inputs to the node.
-        Defaults to [L{FeaturesProtocol}].
 
     @type attach_index: AttachIndexType
     @ivar attach_index: Index of previous output that this node attaches to.
@@ -135,7 +106,6 @@ class L{tasks} attribute. Shouldn't be provided by the user in most cases.
         Only needs to be defined for head nodes.
     """
 
-    input_protocols: list[type[BaseModel]] = [FeaturesProtocol]
     attach_index: AttachIndexType
     tasks: list[LabelType] | dict[LabelType, str] | None = None
 
@@ -148,10 +118,50 @@ def __init__(
         n_classes: int | None = None,
         n_keypoints: int | None = None,
         in_sizes: Size | list[Size] | None = None,
+        attach_index: AttachIndexType | None = None,
         _tasks: dict[LabelType, str] | None = None,
     ):
+        """Constructor for the BaseNode.
+
+        @type input_shapes: list[Packet[Size]] | None
+        @param input_shapes: List of input shapes for the module.
+
+        @type original_in_shape: Size | None
+        @param original_in_shape: Original input shape of the model. Some
+            nodes won't function if not provided.
+
+        @type dataset_metadata: L{DatasetMetadata} | None
+        @param dataset_metadata: Metadata of the dataset.
+            Some nodes won't function if not provided.
+
+        @type n_classes: int | None
+        @param n_classes: Number of classes in the dataset. Provide only
+            in case `dataset_metadata` is not provided. Defaults to None.
+
+        @type in_sizes: Size | list[Size] | None
+        @param in_sizes: List of input sizes for the node.
+            Provide only in case the `input_shapes` were not provided.
+
+        @type attach_index: AttachIndexType
+        @param attach_index: Index of previous output that this node attaches to.
+            Can be a single integer to specify a single output, a tuple of
+            two or three integers to specify a range of outputs or `"all"` to
+            specify all outputs. Defaults to "all". Python indexing conventions apply. If provided as a constructor argument, overrides the class attribute.
+
+
+        @type _tasks: dict[LabelType, str] | None
+        @param _tasks: Dictionary of tasks that the node supports. Overrides the
+            class L{tasks} attribute. Shouldn't be provided by the user in most cases.
+        """
         super().__init__()
 
+        if attach_index is not None:
+            logger.warning(
+                f"Node {self.name} overrides `attach_index` "
+                f"by setting it to '{attach_index}'. "
+                "Make sure this is intended."
+            )
+            self.attach_index = attach_index
         self._tasks = None
         if _tasks is not None:
             self._tasks = _tasks
@@ -180,15 +190,36 @@ def __init__(
         self._epoch = 0
         self._in_sizes = in_sizes
 
+        self._check_type_overrides()
+
     @staticmethod
     def _process_tasks(
         tasks: dict[LabelType, str] | list[LabelType],
     ) -> dict[LabelType, str]:
         if isinstance(tasks, dict):
             return tasks
-        if isinstance(tasks, list):
+        else:
             return {task: task.value for task in tasks}
 
+    def _check_type_overrides(self) -> None:
+        properties = []
+        for name, value in inspect.getmembers(self.__class__):
+            if isinstance(value, property):
+                properties.append(name)
+        for name, typ in self.__annotations__.items():
+            if name in properties:
+                with suppress(RuntimeError):
+                    value = getattr(self, name)
+                    try:
+                        check_type(value, typ)
+                    except TypeCheckError as e:
+                        raise IncompatibleException(
+                            f"Node '{self.name}' specifies the type of the property `{name}` as `{typ}`, "
+                            f"but received `{type(value)}`. "
+                            f"This may indicate that the '{self.name}' node is "
+                            "not compatible with its predecessor."
+                        ) from e
+
     def get_task_name(self, task: LabelType) -> str:
         """Gets the name of a task for a particular C{LabelType}.
 
@@ -196,14 +227,15 @@ def get_task_name(self, task: LabelType) -> str:
         @param task: Task to get the name for.
         @rtype: str
         @return: Name of the task.
+        @raises RuntimeError: If the node does not define any tasks.
         @raises ValueError: If the task is not supported by the node.
         """
         if not self._tasks:
-            raise ValueError(f"Node {self.name} does not have any tasks defined.")
+            raise RuntimeError(f"Node '{self.name}' does not define any task.")
 
         if task not in self._tasks:
             raise ValueError(
-                f"Node {self.name} does not support the {task.value} task."
+                f"Node '{self.name}' does not support the '{task.value}' task."
             )
         return self._tasks[task]
 
@@ -213,14 +245,20 @@ def name(self) -> str:
 
     @property
     def task(self) -> str:
-        """Getter for the task."""
+        """Getter for the task.
+
+        @type: str
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the node defines more than one task. In
+            that case, use the L{get_task_name} method instead.
+        """
         if not self._tasks:
-            raise ValueError(f"{self.name} does not have any tasks defined.")
+            raise RuntimeError(f"{self.name} does not define any task.")
 
         if len(self._tasks) > 1:
             raise ValueError(
                 f"Node {self.name} has multiple tasks defined. "
-                "Use `get_task_name` method instead."
+                "Use the `get_task_name` method instead."
             )
         return next(iter(self._tasks.values()))
 
@@ -242,22 +280,27 @@ def get_class_names(self, task: LabelType) -> list[str]:
         @rtype: list[str]
         @return: Class names for the task.
         """
-        return self.dataset_metadata.class_names(self.get_task_name(task))
+        return self.dataset_metadata.classes(self.get_task_name(task))
 
     @property
     def n_keypoints(self) -> int:
-        """Getter for the number of keypoints."""
+        """Getter for the number of keypoints.
+
+        @type: int
+        @raises ValueError: If the node does not support keypoints.
+        @raises RuntimeError: If the node doesn't define any task.
+        """
         if self._n_keypoints is not None:
             return self._n_keypoints
 
         if self._tasks:
             if LabelType.KEYPOINTS not in self._tasks:
-                raise (ValueError(f"{self.name} does not support keypoints."))
+                raise ValueError(f"{self.name} does not support keypoints.")
             return self.dataset_metadata.n_keypoints(
                 self.get_task_name(LabelType.KEYPOINTS)
             )
 
-        raise ValueError(
+        raise RuntimeError(
             f"{self.name} does not have any tasks defined, "
             "`BaseNode.n_keypoints` property cannot be used. "
             "Either override the `tasks` class attribute, "
@@ -267,12 +310,19 @@ def n_keypoints(self) -> int:
 
     @property
     def n_classes(self) -> int:
-        """Getter for the number of classes."""
+        """Getter for the number of classes.
+
+        @type: int
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the number of classes is different for
+            different tasks. In that case, use the L{get_n_classes}
+            method.
+        """
         if self._n_classes is not None:
             return self._n_classes
 
         if not self._tasks:
-            raise ValueError(
+            raise RuntimeError(
                 f"{self.name} does not have any tasks defined, "
                 "`BaseNode.n_classes` property cannot be used. "
                 "Either override the `tasks` class attribute, "
@@ -296,9 +346,16 @@ def n_classes(self) -> int:
 
     @property
     def class_names(self) -> list[str]:
-        """Getter for the class names."""
+        """Getter for the class names.
+
+        @type: list[str]
+        @raises RuntimeError: If the node doesn't define any task.
+        @raises ValueError: If the class names are different for
+            different tasks. In that case, use the L{get_class_names}
+            method.
+        """
         if not self._tasks:
-            raise ValueError(
+            raise RuntimeError(
                 f"{self.name} does not have any tasks defined, "
                 "`BaseNode.class_names` property cannot be used. "
                 "Either override the `tasks` class attribute, "
@@ -306,10 +363,10 @@ def class_names(self) -> list[str]:
                 "the `BaseNode.dataset_metadata.class_names` method manually."
             )
         elif len(self._tasks) == 1:
-            return self.dataset_metadata.class_names(self.task)
+            return self.dataset_metadata.classes(self.task)
         else:
             class_names = [
-                self.dataset_metadata.class_names(self.get_task_name(task))
+                self.dataset_metadata.classes(self.get_task_name(task))
                 for task in self._tasks
             ]
             if all(set(names) == set(class_names[0]) for names in class_names):
@@ -322,14 +379,25 @@ def class_names(self) -> list[str]:
 
     @property
     def input_shapes(self) -> list[Packet[Size]]:
-        """Getter for the input shapes."""
+        """Getter for the input shapes.
+
+        @type: list[Packet[Size]]
+        @raises RuntimeError: If the C{input_shapes} were not set during
+            initialization.
+        """
+
         if self._input_shapes is None:
             raise self._non_set_error("input_shapes")
         return self._input_shapes
 
     @property
     def original_in_shape(self) -> Size:
-        """Getter for the original input shape."""
+        """Getter for the original input shape as [N, H, W].
+
+        @type: Size
+        @raises RuntimeError: If the C{original_in_shape} were not set
+            during initialization.
+        """
         if self._original_in_shape is None:
             raise self._non_set_error("original_in_shape")
         return self._original_in_shape
@@ -339,10 +407,11 @@ def dataset_metadata(self) -> DatasetMetadata:
         """Getter for the dataset metadata.
 
         @type: L{DatasetMetadata}
-        @raises ValueError: If the C{dataset_metadata} is C{None}.
+        @raises RuntimeError: If the C{dataset_metadata} were not set
+            during initialization.
         """
         if self._dataset_metadata is None:
-            raise ValueError(
+            raise RuntimeError(
                 f"{self._non_set_error('dataset_metadata')}"
                 "Either provide `dataset_metadata` or `n_classes`."
             )
@@ -358,7 +427,7 @@ def in_sizes(self) -> Size | list[Size]:
         In case `in_sizes` were provided during initialization, they are returned
         directly.
 
-        Example:
+        Example::
 
             >>> input_shapes = [{"features": [Size(64, 128, 128), Size(3, 224, 224)]}]
             >>> attach_index = -1
@@ -369,7 +438,7 @@ def in_sizes(self) -> Size | list[Size]:
             >>> in_sizes = [Size(64, 128, 128), Size(3, 224, 224)]
 
         @type: Size | list[Size]
-        @raises IncompatibleException: If the C{input_shapes} are too complicated for
+        @raises RuntimeError: If the C{input_shapes} are too complicated for
             the default implementation.
         """
         if self._in_sizes is not None:
@@ -377,27 +446,25 @@ def in_sizes(self) -> Size | list[Size]:
 
         features = self.input_shapes[0].get("features")
         if features is None:
-            raise IncompatibleException(
+            raise RuntimeError(
                 f"Feature field is missing in {self.name}. "
                 "The default implementation of `in_sizes` cannot be used."
             )
-        shapes = self.get_attached(self.input_shapes[0]["features"])
-        if isinstance(shapes, list) and len(shapes) == 1:
-            return shapes[0]
-        return shapes
+        return self.get_attached(self.input_shapes[0]["features"])
 
     @property
     def in_channels(self) -> int | list[int]:
         """Simplified getter for the number of input channels.
 
-        Should work out of the box for most cases where the C{input_shapes} are
-        sufficiently simple. Otherwise the C{input_shapes} should be used directly. If
-        C{attach_index} is set to "all" or is a slice, returns a list of input channels,
+        Should work out of the box for most cases where the
+        C{input_shapes} are sufficiently simple. Otherwise the
+        C{input_shapes} should be used directly. If C{attach_index} is
+        set to "all" or is a slice, returns a list of input channels,
         otherwise returns a single value.
 
         @type: int | list[int]
-        @raises IncompatibleException: If the C{input_shapes} are too complicated for
-            the default implementation.
+        @raises RuntimeError: If the C{input_shapes} are too complicated
+            for the default implementation of C{in_sizes}.
         """
         return self._get_nth_size(-3)
 
@@ -409,8 +476,8 @@ def in_height(self) -> int | list[int]:
         sufficiently simple. Otherwise the `input_shapes` should be used directly.
 
         @type: int | list[int]
-        @raises IncompatibleException: If the C{input_shapes} are too complicated for
-            the default implementation.
+        @raises RuntimeError: If the C{input_shapes} are too complicated for
+            the default implementation of C{in_sizes}.
         """
         return self._get_nth_size(-2)
 
@@ -422,8 +489,8 @@ def in_width(self) -> int | list[int]:
         sufficiently simple. Otherwise the `input_shapes` should be used directly.
 
         @type: int | list[int]
-        @raises IncompatibleException: If the C{input_shapes} are too complicated for
-            the default implementation.
+        @raises RuntimeError: If the C{input_shapes} are too complicated for
+            the default implementation of C{in_sizes}.
         """
         return self._get_nth_size(-1)
 
@@ -443,23 +510,26 @@ def set_export_mode(self, mode: bool = True) -> None:
     def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT:
         """Prepares inputs for the forward pass.
 
-        Unwraps the inputs from the C{list[Packet[Tensor]]} input so they can be passed
-        to the forward call. The default implementation expects a single input with
-        C{features} key and returns the tensor or tensors at the C{attach_index}
-        position.
+        Unwraps the inputs from the C{list[Packet[Tensor]]} input so
+        they can be passed to the forward call. The default
+        implementation expects a single input with C{features} key and
+        returns the tensor or tensors at the C{attach_index} position.
 
-        For most cases the default implementation should be sufficient. Exceptions are
-        modules with multiple inputs or producing more complex outputs. This is
-        typically the case for output nodes.
+        For most cases the default implementation should be sufficient.
+        Exceptions are modules with multiple inputs or producing more
+        complex outputs. This is typically the case for output nodes.
 
         @type inputs: list[Packet[Tensor]]
         @param inputs: Inputs to the node.
         @rtype: ForwardInputT
-        @return: Prepared inputs, ready to be passed to the L{forward} method.
+        @return: Prepared inputs, ready to be passed to the L{forward}
+            method.
+        @raises ValueError: If the number of inputs is not equal to 1.
+            In such cases the method has to be overridden.
         """
         if len(inputs) > 1:
-            raise IncompatibleException(
-                f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead."
+            raise ValueError(
+                f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead. "
                 "If the node expects multiple inputs, the `unwrap` method should be overridden."
             )
         return self.get_attached(inputs[0]["features"])  # type: ignore
@@ -468,9 +538,9 @@ def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT:
     def forward(self, inputs: ForwardInputT) -> ForwardOutputT:
         """Forward pass of the module.
 
-        @type inputs: ForwardInputT
+        @type inputs: L{ForwardInputT}
         @param inputs: Inputs to the module.
-        @rtype: ForwardOutputT
+        @rtype: L{ForwardOutputT}
         @return: Result of the forward pass.
         """
         ...
@@ -502,27 +572,30 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]:
 
         @rtype: L{Packet}[Tensor]
         @return: Wrapped output.
+
+        @raises ValueError: If the C{output} argument is not a tensor or a list of tensors.
+            In such cases the L{wrap} method should be overridden.
         """
 
-        match output:
-            case Tensor() as out:
-                outputs = [out]
-            case list(tensors) if all(isinstance(t, Tensor) for t in tensors):
-                outputs = tensors
-            case _:
-                raise IncompatibleException(
-                    "Default `wrap` expects a single tensor or a list of tensors."
-                )
+        if isinstance(output, Tensor):
+            outputs = [output]
+        elif isinstance(output, (list, tuple)) and all(
+            isinstance(t, Tensor) for t in output
+        ):
+            outputs = list(output)
+        else:
+            raise ValueError(
+                "Default `wrap` expects a single tensor or a list of tensors."
+            )
         try:
             task = self.task
-        except ValueError:
+        except RuntimeError:
             task = "features"
         return {task: outputs}
 
     def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
-        """Combines the forward pass with the wrapping and unwrapping of the inputs.
-
-        Additionally validates the inputs against `input_protocols`.
+        """Combines the forward pass with the wrapping and unwrapping of
+        the inputs.
 
         @type inputs: list[Packet[Tensor]]
         @param inputs: Inputs to the module.
@@ -531,9 +604,9 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
         @return: Outputs of the module as a dictionary of list of tensors:
             `{"features": [Tensor, ...], "segmentation": [Tensor]}`
 
-        @raises IncompatibleException: If the inputs are not compatible with the node.
+        @raises RuntimeError: If default L{wrap} or L{unwrap} methods are not sufficient.
         """
-        unwrapped = self.unwrap(self.validate(inputs))
+        unwrapped = self.unwrap(inputs)
         outputs = self(unwrapped)
         wrapped = self.wrap(outputs)
         str_tasks = [task.value for task in self._tasks] if self._tasks else []
@@ -543,38 +616,21 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
                 wrapped[self.get_task_name(LabelType(key))] = value
         return wrapped
 
-    def validate(self, data: list[Packet[Tensor]]) -> list[Packet[Tensor]]:
-        """Validates the inputs against `input_protocols`."""
-        if len(data) != len(self.input_protocols):
-            raise IncompatibleException(
-                f"Node {self.name} expects {len(self.input_protocols)} inputs, "
-                f"but got {len(data)} inputs instead."
-            )
-        try:
-            return [
-                validate_packet(d, protocol)
-                for d, protocol in zip(data, self.input_protocols)
-            ]
-        except ValidationError as e:
-            raise IncompatibleException.from_validation_error(e, self.name) from e
-
     T = TypeVar("T", Tensor, Size)
 
     def get_attached(self, lst: list[T]) -> list[T] | T:
         """Gets the attached elements from a list.
 
-        This method is used to get the attached elements from a list based on
-        the `attach_index` attribute.
+        This method is used to get the attached elements from a list
+        based on the C{attach_index} attribute.
 
         @type lst: list[T]
-        @param lst: List to get the attached elements from. Can be either
-            a list of tensors or a list of sizes.
-
+        @param lst: List to get the attached elements from. Can be
+            either a list of tensors or a list of sizes.
         @rtype: list[T] | T
-        @return: Attached elements. If `attach_index` is set to `"all"` or is a slice,
-            returns a list of attached elements.
-
-        @raises ValueError: If the `attach_index` is invalid.
+        @return: Attached elements. If C{attach_index} is set to
+            C{"all"} or is a slice, returns a list of attached elements.
+        @raises ValueError: If the C{attach_index} is invalid.
         """
 
         def _normalize_index(index: int) -> int:
@@ -608,7 +664,9 @@ def _normalize_slice(i: int, j: int) -> slice:
             case (int(i), int(j), int(k)):
                 return lst[i:j:k]
             case _:
-                raise ValueError(f"Invalid attach index: `{self.attach_index}`")
+                raise ValueError(
+                    f"Invalid attach index: `{self.attach_index}`"
+                )
 
     def _get_nth_size(self, idx: int) -> int | list[int]:
         match self.in_sizes:
@@ -617,8 +675,8 @@ def _get_nth_size(self, idx: int) -> int | list[int]:
             case list(sizes):
                 return [size[idx] for size in sizes]
 
-    def _non_set_error(self, name: str) -> ValueError:
-        return ValueError(
-            f"{self.name} is trying to access `{name}`, "
+    def _non_set_error(self, name: str) -> RuntimeError:
+        return RuntimeError(
+            f"'{self.name}' node is trying to access `{name}`, "
             "but it was not set during initialization. "
         )
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 0e0a4ad2..9231ea85 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -1,6 +1,3 @@
-# TODO:  cleanup, document
-# Check if some blocks could be merged togetner.
-
 import math
 from typing import TypeVar
 
@@ -13,7 +10,8 @@
 
 class EfficientDecoupledBlock(nn.Module):
     def __init__(self, n_classes: int, in_channels: int):
-        """Efficient Decoupled block used for class and regression predictions.
+        """Efficient Decoupled block used for class and regression
+        predictions.
 
         @type n_classes: int
         @param n_classes: Number of classes.
@@ -39,7 +37,9 @@ def __init__(self, n_classes: int, in_channels: int):
                 padding=1,
                 activation=nn.SiLU(),
             ),
-            nn.Conv2d(in_channels=in_channels, out_channels=n_classes, kernel_size=1),
+            nn.Conv2d(
+                in_channels=in_channels, out_channels=n_classes, kernel_size=1
+            ),
         )
         self.regression_branch = nn.Sequential(
             ConvModule(
@@ -152,7 +152,10 @@ def __init__(
 
         super().__init__(
             nn.ConvTranspose2d(
-                in_channels, out_channels, kernel_size=kernel_size, stride=stride
+                in_channels,
+                out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
             ),
             ConvModule(out_channels, out_channels, kernel_size=3, padding=1),
         )
@@ -299,7 +302,9 @@ def forward(self, x: Tensor) -> Tensor:
         else:
             id_out = self.rbr_identity(x)
 
-        return self.nonlinearity(self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out))
+        return self.nonlinearity(
+            self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out)
+        )
 
     def reparametrize(self) -> None:
         if hasattr(self, "rbr_reparam"):
@@ -318,15 +323,16 @@ def reparametrize(self) -> None:
         )
         self.rbr_reparam.weight.data = kernel  # type: ignore
         self.rbr_reparam.bias.data = bias  # type: ignore
-        self.__delattr__("rbr_dense")
-        self.__delattr__("rbr_1x1")
+        del self.rbr_dense
+        del self.rbr_1x1
         if hasattr(self, "rbr_identity"):
-            self.__delattr__("rbr_identity")
+            del self.rbr_identity
         if hasattr(self, "id_tensor"):
-            self.__delattr__("id_tensor")
+            del self.id_tensor
 
     def _get_equivalent_kernel_bias(self) -> tuple[Tensor, Tensor]:
-        """Derives the equivalent kernel and bias in a DIFFERENTIABLE way."""
+        """Derives the equivalent kernel and bias in a DIFFERENTIABLE
+        way."""
         kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
         kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
         kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
@@ -343,7 +349,9 @@ def _pad_1x1_to_3x3_tensor(self, kernel1x1: Tensor | None) -> Tensor:
         else:
             return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
 
-    def _fuse_bn_tensor(self, branch: nn.Module | None) -> tuple[Tensor, Tensor]:
+    def _fuse_bn_tensor(
+        self, branch: nn.Module | None
+    ) -> tuple[Tensor, Tensor]:
         if branch is None:
             return torch.tensor(0), torch.tensor(0)
         if isinstance(branch, nn.Sequential):
@@ -381,11 +389,11 @@ def __init__(
         block: type[nn.Module],
         in_channels: int,
         out_channels: int,
-        num_blocks: int = 1,
+        n_blocks: int = 1,
     ):
-        """Module which repeats the block n times. First block accepts in_channels and
-        outputs out_channels while subsequent blocks accept out_channels and output
-        out_channels.
+        """Module which repeats the block n times. First block accepts
+        in_channels and outputs out_channels while subsequent blocks
+        accept out_channels and output out_channels.
 
         @type block: L{nn.Module}
         @param block: Block to repeat.
@@ -393,14 +401,14 @@ def __init__(
         @param in_channels: Number of input channels.
         @type out_channels: int
         @param out_channels: Number of output channels.
-        @type num_blocks: int
-        @param num_blocks: Number of blocks to repeat. Defaults to C{1}.
+        @type n_blocks: int
+        @param n_blocks: Number of blocks to repeat. Defaults to C{1}.
         """
         super().__init__()
 
         in_channels = in_channels
         self.blocks = nn.ModuleList()
-        for _ in range(num_blocks):
+        for _ in range(n_blocks):
             self.blocks.append(
                 block(in_channels=in_channels, out_channels=out_channels)
             )
@@ -413,8 +421,11 @@ def forward(self, x: Tensor) -> Tensor:
 
 
 class SpatialPyramidPoolingBlock(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 5):
-        """Spatial Pyramid Pooling block with ReLU activation on three different scales.
+    def __init__(
+        self, in_channels: int, out_channels: int, kernel_size: int = 5
+    ):
+        """Spatial Pyramid Pooling block with ReLU activation on three
+        different scales.
 
         @type in_channels: int
         @param in_channels: Number of input channels.
@@ -476,7 +487,9 @@ def forward(self, x: Tensor) -> Tensor:
 
 
 class FeatureFusionBlock(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int, reduction: int = 1):
+    def __init__(
+        self, in_channels: int, out_channels: int, reduction: int = 1
+    ):
         """Feature Fusion block adapted from: U{https://github.com/taveraantonio/BiseNetv1}.
 
         @type in_channels: int
@@ -600,19 +613,19 @@ def __init__(
         in_channels: int,
         in_channels_next: int,
         out_channels: int,
-        num_repeats: int,
+        n_repeats: int,
     ):
         """UpBlock used in RepPAN neck.
 
         @type in_channels: int
         @param in_channels: Number of input channels.
         @type in_channels_next: int
-        @param in_channels_next: Number of input channels of next input which is used in
-            concat.
+        @param in_channels_next: Number of input channels of next input
+            which is used in concat.
         @type out_channels: int
         @param out_channels: Number of output channels.
-        @type num_repeats: int
-        @param num_repeats: Number of RepVGGBlock repeats.
+        @type n_repeats: int
+        @param n_repeats: Number of RepVGGBlock repeats.
         """
 
         super().__init__()
@@ -634,7 +647,7 @@ def __init__(
             block=RepVGGBlock,
             in_channels=in_channels_next + out_channels,
             out_channels=out_channels,
-            num_blocks=num_repeats,
+            n_blocks=n_repeats,
         )
 
     def forward(self, x0: Tensor, x1: Tensor) -> tuple[Tensor, Tensor]:
@@ -652,21 +665,22 @@ def __init__(
         downsample_out_channels: int,
         in_channels_next: int,
         out_channels: int,
-        num_repeats: int,
+        n_repeats: int,
     ):
         """DownBlock used in RepPAN neck.
 
         @type in_channels: int
         @param in_channels: Number of input channels.
         @type downsample_out_channels: int
-        @param downsample_out_channels: Number of output channels after downsample.
+        @param downsample_out_channels: Number of output channels after
+            downsample.
         @type in_channels_next: int
-        @param in_channels_next: Number of input channels of next input which is used in
-            concat.
+        @param in_channels_next: Number of input channels of next input
+            which is used in concat.
         @type out_channels: int
         @param out_channels: Number of output channels.
-        @type num_repeats: int
-        @param num_repeats: Number of RepVGGBlock repeats.
+        @type n_repeats: int
+        @param n_repeats: Number of RepVGGBlock repeats.
         """
         super().__init__()
 
@@ -681,7 +695,7 @@ def __init__(
             block=RepVGGBlock,
             in_channels=downsample_out_channels + in_channels_next,
             out_channels=out_channels,
-            num_blocks=num_repeats,
+            n_blocks=n_repeats,
         )
 
     def forward(self, x0: Tensor, x1: Tensor) -> Tensor:
diff --git a/luxonis_train/nodes/heads/bisenet_head.py b/luxonis_train/nodes/heads/bisenet_head.py
index 3fef7584..dd6e6333 100644
--- a/luxonis_train/nodes/heads/bisenet_head.py
+++ b/luxonis_train/nodes/heads/bisenet_head.py
@@ -1,31 +1,28 @@
-"""BiSeNet segmentation head.
-
-Adapted from U{https://github.com/taveraantonio/BiseNetv1}.
-License: NOT SPECIFIED.
-"""
-
+from typing import Any
 
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
 from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import ConvModule
-from luxonis_train.utils.general import infer_upscale_factor
-from luxonis_train.utils.types import LabelType, Packet
+from luxonis_train.utils import infer_upscale_factor
 
 
 class BiSeNetHead(BaseNode[Tensor, Tensor]):
     in_height: int
+    in_width: int
     in_channels: int
 
     tasks: list[LabelType] = [LabelType.SEGMENTATION]
 
-    def __init__(
-        self,
-        intermediate_channels: int = 64,
-        **kwargs,
-    ):
+    def __init__(self, intermediate_channels: int = 64, **kwargs: Any):
         """BiSeNet segmentation head.
-        TODO: Add more documentation.
+
+        Source: U{BiseNetV1<https://github.com/taveraantonio/BiseNetv1>}
+        @license: NOT SPECIFIED.
+        @see: U{BiseNetv1: Bilateral Segmentation Network for
+            Real-time Semantic Segmentation
+            <https://arxiv.org/abs/1808.00897>}
 
         @type intermediate_channels: int
         @param intermediate_channels: How many intermediate channels to use.
@@ -33,17 +30,28 @@ def __init__(
         """
         super().__init__(**kwargs)
 
-        original_height = self.original_in_shape[1]
-        upscale_factor = 2 ** infer_upscale_factor(self.in_height, original_height)
+        h, w = self.original_in_shape[1:]
+        upscale_factor = 2 ** infer_upscale_factor(
+            (self.in_height, self.in_width), (h, w)
+        )
         out_channels = self.n_classes * upscale_factor * upscale_factor
 
-        self.conv_3x3 = ConvModule(self.in_channels, intermediate_channels, 3, 1, 1)
-        self.conv_1x1 = nn.Conv2d(intermediate_channels, out_channels, 1, 1, 0)
+        self.conv_3x3 = ConvModule(
+            self.in_channels,
+            intermediate_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+        )
+        self.conv_1x1 = nn.Conv2d(
+            intermediate_channels,
+            out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
         self.upscale = nn.PixelShuffle(upscale_factor)
 
-    def wrap(self, output: Tensor) -> Packet[Tensor]:
-        return {"segmentation": [output]}
-
     def forward(self, inputs: Tensor) -> Tensor:
         x = self.conv_3x3(inputs)
         x = self.conv_1x1(x)
diff --git a/luxonis_train/nodes/heads/classification_head.py b/luxonis_train/nodes/heads/classification_head.py
index 07b3d72b..5961c853 100644
--- a/luxonis_train/nodes/heads/classification_head.py
+++ b/luxonis_train/nodes/heads/classification_head.py
@@ -1,3 +1,5 @@
+from typing import Any
+
 from torch import Tensor, nn
 
 from luxonis_train.nodes.base_node import BaseNode
@@ -8,16 +10,15 @@ class ClassificationHead(BaseNode[Tensor, Tensor]):
     in_channels: int
     tasks: list[LabelType] = [LabelType.CLASSIFICATION]
 
-    def __init__(
-        self,
-        dropout_rate: float = 0.2,
-        **kwargs,
-    ):
+    def __init__(self, dropout_rate: float = 0.2, **kwargs: Any):
         """Simple classification head.
 
+        Consists of a global average pooling layer followed by a dropout
+        layer and a single linear layer.
+
         @type dropout_rate: float
-        @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults
-            to C{0.2}.
+        @param dropout_rate: Dropout rate before last layer, range C{[0,
+            1]}. Defaults to C{0.2}.
         """
         super().__init__(**kwargs)
 
diff --git a/luxonis_train/nodes/heads/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py
index 5607a2a8..6f0e01e7 100644
--- a/luxonis_train/nodes/heads/efficient_bbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_bbox_head.py
@@ -1,22 +1,20 @@
-"""Head for object detection.
-
-Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
-Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
-"""
-
-from typing import Literal
+import logging
+from typing import Any, Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
 from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import EfficientDecoupledBlock
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils import (
+    Packet,
     anchors_for_fpn_features,
     dist2bbox,
     non_max_suppression,
 )
-from luxonis_train.utils.types import LabelType, Packet
+
+logger = logging.getLogger(__name__)
 
 
 class EfficientBBoxHead(
@@ -31,24 +29,24 @@ def __init__(
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
         max_det: int = 300,
-        **kwargs,
+        **kwargs: Any,
     ):
         """Head for object detection.
 
-        TODO: add more documentation
-
+        Adapted from U{YOLOv6: A Single-Stage Object Detection Framework
+        for Industrial Applications
+        <https://arxiv.org/pdf/2209.02976.pdf>}.
         @type n_heads: Literal[2,3,4]
-        @param n_heads: Number of output heads. Defaults to 3.
-          ***Note:*** Should be same also on neck in most cases.
-
+        @param n_heads: Number of output heads. Defaults to 3. B{Note:}
+            Should be same also on neck in most cases.
         @type conf_thres: float
-        @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
-
+        @param conf_thres: Threshold for confidence. Defaults to
+            C{0.25}.
         @type iou_thres: float
         @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
-
         @type max_det: int
-        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
+        @param max_det: Maximum number of detections retained after NMS.
+            Defaults to C{300}.
         """
         super().__init__(**kwargs)
 
@@ -58,11 +56,18 @@ def __init__(
         self.iou_thres = iou_thres
         self.max_det = max_det
 
-        self.stride = self._fit_stride_to_num_heads()
+        self.stride = self._fit_stride_to_n_heads()
         self.grid_cell_offset = 0.5
         self.grid_cell_size = 5.0
 
         self.heads = nn.ModuleList()
+        if len(self.in_channels) < self.n_heads:
+            logger.warning(
+                f"Head '{self.name}' was set to use {self.n_heads} heads, "
+                f"but received only {len(self.in_channels)} inputs. "
+                f"Changing number of heads to {len(self.in_channels)}."
+            )
+            self.n_heads = len(self.in_channels)
         for i in range(self.n_heads):
             curr_head = EfficientDecoupledBlock(
                 n_classes=self.n_classes,
@@ -92,18 +97,25 @@ def wrap(
         features, cls_score_list, reg_distri_list = output
 
         if self.export:
-            outputs = []
-            for out_cls, out_reg in zip(cls_score_list, reg_distri_list, strict=True):
+            outputs: list[Tensor] = []
+            for out_cls, out_reg in zip(
+                cls_score_list, reg_distri_list, strict=True
+            ):
                 conf, _ = out_cls.max(1, keepdim=True)
                 out = torch.cat([out_reg, conf, out_cls], dim=1)
                 outputs.append(out)
             return {self.task: outputs}
 
         cls_tensor = torch.cat(
-            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
+            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))],
+            dim=2,
         ).permute(0, 2, 1)
         reg_tensor = torch.cat(
-            [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2
+            [
+                reg_distri_list[i].flatten(2)
+                for i in range(len(reg_distri_list))
+            ],
+            dim=2,
         ).permute(0, 2, 1)
 
         if self.training:
@@ -122,8 +134,9 @@ def wrap(
                 "distributions": [reg_tensor],
             }
 
-    def _fit_stride_to_num_heads(self):
-        """Returns correct stride for number of heads and attach index."""
+    def _fit_stride_to_n_heads(self):
+        """Returns correct stride for number of heads and attach
+        index."""
         stride = torch.tensor(
             [
                 self.original_in_shape[1] / x[2]  # type: ignore
@@ -136,7 +149,8 @@ def _fit_stride_to_num_heads(self):
     def _process_to_bbox(
         self, output: tuple[list[Tensor], Tensor, Tensor]
     ) -> list[Tensor]:
-        """Performs post-processing of the output and returns bboxs after NMS."""
+        """Performs post-processing of the output and returns bboxs
+        after NMS."""
         features, cls_score_list, reg_dist_list = output
         _, anchor_points, _, stride_tensor = anchors_for_fpn_features(
             features,
@@ -146,7 +160,9 @@ def _process_to_bbox(
             multiply_with_stride=False,
         )
 
-        pred_bboxes = dist2bbox(reg_dist_list, anchor_points, out_format="xyxy")
+        pred_bboxes = dist2bbox(
+            reg_dist_list, anchor_points, out_format="xyxy"
+        )
 
         pred_bboxes *= stride_tensor
         output_merged = torch.cat(
diff --git a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py
index 03d29296..51b8b704 100644
--- a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py
@@ -1,15 +1,16 @@
-from typing import Literal
+from typing import Any, Literal
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
 from luxonis_train.nodes.blocks import ConvModule
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils import (
+    Packet,
     anchors_for_fpn_features,
     dist2bbox,
     non_max_suppression,
 )
-from luxonis_train.utils.types import LabelType, Packet
 
 from .efficient_bbox_head import EfficientBBoxHead
 
@@ -23,7 +24,7 @@ def __init__(
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
         max_det: int = 300,
-        **kwargs,
+        **kwargs: Any,
     ):
         """Head for object and keypoint detection.
 
@@ -68,7 +69,12 @@ def forward(
     ) -> tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]:
         features, cls_score_list, reg_distri_list = super().forward(inputs)
 
-        _, self.anchor_points, _, self.stride_tensor = anchors_for_fpn_features(
+        (
+            _,
+            self.anchor_points,
+            _,
+            self.stride_tensor,
+        ) = anchors_for_fpn_features(
             features,
             self.stride,
             self.grid_cell_size,
@@ -84,17 +90,18 @@ def forward(
         return features, cls_score_list, reg_distri_list, kpt_list
 
     def wrap(
-        self, output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]
+        self,
+        output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]],
     ) -> Packet[Tensor]:
         features, cls_score_list, reg_distri_list, kpt_list = output
         bs = features[0].shape[0]
         if self.export:
-            outputs = []
+            outputs: list[Tensor] = []
             for out_cls, out_reg, out_kpts in zip(
                 cls_score_list, reg_distri_list, kpt_list, strict=True
             ):
-                chunks = out_kpts.split(3, dim=1)
-                modified_chunks = []
+                chunks = torch.split(out_kpts, 3, dim=1)
+                modified_chunks: list[Tensor] = []
                 for chunk in chunks:
                     x = chunk[:, 0:1, :, :]
                     y = chunk[:, 1:2, :, :]
@@ -105,11 +112,17 @@ def wrap(
                 out = torch.cat([out_reg, out_cls, out_kpts_modified], dim=1)
                 outputs.append(out)
             return {"outputs": outputs}
+
         cls_tensor = torch.cat(
-            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
+            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))],
+            dim=2,
         ).permute(0, 2, 1)
         reg_tensor = torch.cat(
-            [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2
+            [
+                reg_distri_list[i].flatten(2)
+                for i in range(len(reg_distri_list))
+            ],
+            dim=2,
         ).permute(0, 2, 1)
         kpt_tensor = torch.cat(
             [
@@ -143,7 +156,7 @@ def wrap(
             "keypoints_raw": [kpt_tensor],
         }
 
-    def _dist2kpts(self, kpts):
+    def _dist2kpts(self, kpts: Tensor) -> Tensor:
         """Decodes keypoints."""
         y = kpts.clone()
 
@@ -154,8 +167,12 @@ def _dist2kpts(self, kpts):
         anchor_points_x = anchor_points_transposed[0].view(1, -1, 1)
         anchor_points_y = anchor_points_transposed[1].view(1, -1, 1)
 
-        y[:, :, 0::3] = (y[:, :, 0::3] * 2.0 + (anchor_points_x - 0.5)) * stride_tensor
-        y[:, :, 1::3] = (y[:, :, 1::3] * 2.0 + (anchor_points_y - 0.5)) * stride_tensor
+        y[:, :, 0::3] = (
+            y[:, :, 0::3] * 2.0 + (anchor_points_x - 0.5)
+        ) * stride_tensor
+        y[:, :, 1::3] = (
+            y[:, :, 1::3] * 2.0 + (anchor_points_y - 0.5)
+        ) * stride_tensor
         y[:, :, 2::3] = y[:, :, 2::3].sigmoid()
 
         return y
@@ -163,10 +180,13 @@ def _dist2kpts(self, kpts):
     def _process_to_bbox_and_kps(
         self, output: tuple[list[Tensor], Tensor, Tensor, Tensor]
     ) -> list[Tensor]:
-        """Performs post-processing of the output and returns bboxs after NMS."""
+        """Performs post-processing of the output and returns bboxs
+        after NMS."""
         features, cls_score_list, reg_dist_list, keypoints = output
 
-        pred_bboxes = dist2bbox(reg_dist_list, self.anchor_points, out_format="xyxy")
+        pred_bboxes = dist2bbox(
+            reg_dist_list, self.anchor_points, out_format="xyxy"
+        )
 
         pred_bboxes *= self.stride_tensor
         output_merged = torch.cat(
diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
index 0ca995c5..5de88650 100644
--- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py
@@ -1,34 +1,38 @@
 import logging
 import math
-from typing import cast
+from typing import Any, cast
 
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
 from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils import (
+    Packet,
     non_max_suppression,
     process_bbox_predictions,
     process_keypoints_predictions,
 )
-from luxonis_train.utils.types import LabelType, Packet
 
 logger = logging.getLogger(__name__)
 
 
-class ImplicitKeypointBBoxHead(BaseNode):
-    tasks: list[LabelType] = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]
+class ImplicitKeypointBBoxHead(
+    BaseNode[list[Tensor], tuple[list[Tensor], Tensor]]
+):
+    tasks = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]
+    in_channels: list[int]
 
     def __init__(
         self,
-        num_heads: int = 3,
+        n_heads: int = 3,
         anchors: list[list[float]] | None = None,
         init_coco_biases: bool = True,
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
         max_det: int = 300,
-        **kwargs,
+        **kwargs: Any,
     ):
         """Head for object and keypoint detection.
 
@@ -37,8 +41,8 @@ def __init__(
 
         TODO: more technical documentation
 
-        @type num_heads: int
-        @param num_heads: Number of output heads. Defaults to C{3}.
+        @type n_heads: int
+        @param n_heads: Number of output heads. Defaults to C{3}.
             B{Note:} Should be same also on neck in most cases.
         @type anchors: list[list[float]] | None
         @param anchors: Anchors used for object detection.
@@ -53,16 +57,27 @@ def __init__(
         """
         super().__init__(**kwargs)
 
-        if anchors is None:
-            logger.info("No anchors provided, generating them automatically.")
-            anchors, recall = self.dataset_metadata.autogenerate_anchors(num_heads)
-            logger.info(f"Anchors generated. Best possible recall: {recall:.2f}")
-
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
         self.max_det = max_det
 
-        self.num_heads = num_heads
+        self.n_heads = n_heads
+        if len(self.in_channels) < self.n_heads:
+            logger.warning(
+                f"Head '{self.name}' was set to use {self.n_heads} heads, "
+                f"but received only {len(self.in_channels)} inputs. "
+                f"Changing number of heads to {len(self.in_channels)}."
+            )
+            self.n_heads = len(self.in_channels)
+
+        if anchors is None:
+            logger.info("No anchors provided, generating them automatically.")
+            anchors, recall = self.dataset_metadata.autogenerate_anchors(
+                self.n_heads
+            )
+            logger.info(
+                f"Anchors generated. Best possible recall: {recall:.2f}"
+            )
 
         self.box_offset = 5
         self.n_det_out = self.n_classes + self.box_offset
@@ -71,13 +86,13 @@ def __init__(
         self.n_anchors = len(anchors[0]) // 2
         self.grid: list[Tensor] = []
 
-        self.anchors = torch.tensor(anchors).float().view(self.num_heads, -1, 2)
-        self.anchor_grid = self.anchors.clone().view(self.num_heads, 1, -1, 1, 1, 2)
-
-        self.channel_list, self.stride = self._fit_to_num_heads(
-            cast(list[int], self.in_channels)
+        self.anchors = torch.tensor(anchors).float().view(self.n_heads, -1, 2)
+        self.anchor_grid = self.anchors.clone().view(
+            self.n_heads, 1, -1, 1, 1, 2
         )
 
+        self.channel_list, self.stride = self._fit_to_n_heads(self.in_channels)
+
         self.learnable_mul_add_conv = nn.ModuleList(
             LearnableMulAddConv(
                 add_channel=in_channels,
@@ -108,7 +123,7 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]:
 
         self.anchor_grid = self.anchor_grid.to(inputs[0].device)
 
-        for i in range(self.num_heads):
+        for i in range(self.n_heads):
             feat = cast(
                 Tensor,
                 torch.cat(
@@ -123,11 +138,17 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]:
             batch_size, _, feature_height, feature_width = feat.shape
             if i >= len(self.grid):
                 self.grid.append(
-                    self._construct_grid(feature_width, feature_height).to(feat.device)
+                    self._construct_grid(feature_width, feature_height).to(
+                        feat.device
+                    )
                 )
 
             feat = feat.reshape(
-                batch_size, self.n_anchors, self.n_out, feature_height, feature_width
+                batch_size,
+                self.n_anchors,
+                self.n_out,
+                feature_height,
+                feature_width,
             ).permute(0, 1, 3, 4, 2)
 
             features.append(feat)
@@ -139,8 +160,8 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]:
 
         return features, torch.cat(predictions, dim=1)
 
-    def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
-        features, predictions = outputs
+    def wrap(self, output: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
+        features, predictions = output
 
         if self.export:
             return {"boxes_and_keypoints": [predictions]}
@@ -160,7 +181,8 @@ def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
         return {
             "boundingbox": [detection[:, :6] for detection in nms],
             "keypoints": [
-                detection[:, 6:].reshape(-1, self.n_keypoints, 3) for detection in nms
+                detection[:, 6:].reshape(-1, self.n_keypoints, 3)
+                for detection in nms
             ],
             "features": features,
         }
@@ -169,10 +191,12 @@ def _build_predictions(
         self, feat: Tensor, anchor_grid: Tensor, grid: Tensor, stride: Tensor
     ) -> Tensor:
         batch_size = feat.shape[0]
-        x_bbox = feat[..., : self.box_offset + self.n_classes]
-        x_keypoints = feat[..., self.box_offset + self.n_classes :]
+        bbox = feat[..., : self.box_offset + self.n_classes]
+        keypoints = feat[..., self.box_offset + self.n_classes :]
 
-        box_cxcy, box_wh, box_tail = process_bbox_predictions(x_bbox, anchor_grid)
+        box_cxcy, box_wh, box_tail = process_bbox_predictions(
+            bbox, anchor_grid
+        )
         grid = grid.to(box_cxcy.device)
         stride = stride.to(box_cxcy.device)
         box_cxcy = (box_cxcy + grid) * stride
@@ -180,7 +204,7 @@ def _build_predictions(
 
         grid_x = grid[..., 0:1]
         grid_y = grid[..., 1:2]
-        kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(x_keypoints)
+        kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(keypoints)
         kpt_x = (kpt_x + grid_x) * stride
         kpt_y = (kpt_y + grid_y) * stride
         kpt_vis_sig = kpt_vis.sigmoid()
@@ -200,12 +224,14 @@ def _infer_bbox(
         )
         return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1)
 
-    def _fit_to_num_heads(self, channel_list: list):
-        out_channel_list = channel_list[: self.num_heads]
+    def _fit_to_n_heads(
+        self, channel_list: list[int]
+    ) -> tuple[list[int], Tensor]:
+        out_channel_list = channel_list[: self.n_heads]
         stride = torch.tensor(
             [
                 self.original_in_shape[1] / h
-                for h in cast(list[int], self.in_height)[: self.num_heads]
+                for h in cast(list[int], self.in_height)[: self.n_heads]
             ],
             dtype=torch.int,
         )
@@ -214,11 +240,15 @@ def _fit_to_num_heads(self, channel_list: list):
     def _initialize_weights_and_biases(self, class_freq: Tensor | None = None):
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
             elif isinstance(m, nn.BatchNorm2d):
                 m.eps = 1e-3
                 m.momentum = 0.03
-            elif isinstance(m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6)):
+            elif isinstance(
+                m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6)
+            ):
                 m.inplace = True
 
         for mi, s in zip(self.learnable_mul_add_conv, self.stride):
@@ -233,7 +263,8 @@ def _initialize_weights_and_biases(self, class_freq: Tensor | None = None):
 
     def _construct_grid(self, feature_width: int, feature_height: int):
         grid_y, grid_x = torch.meshgrid(
-            [torch.arange(feature_height), torch.arange(feature_width)], indexing="ij"
+            [torch.arange(feature_height), torch.arange(feature_width)],
+            indexing="ij",
         )
         return (
             torch.stack((grid_x, grid_y), 2)
diff --git a/luxonis_train/nodes/heads/segmentation_head.py b/luxonis_train/nodes/heads/segmentation_head.py
index 1b29df7b..240b956c 100644
--- a/luxonis_train/nodes/heads/segmentation_head.py
+++ b/luxonis_train/nodes/heads/segmentation_head.py
@@ -1,39 +1,33 @@
-"""Implementation of a basic segmentation head.
+from typing import Any
 
-Adapted from: U{https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py}
-@license: U{BSD-3 <https://github.com/pytorch/vision/blob/main/LICENSE>}
-"""
-
-import torch.nn as nn
-from torch import Tensor
+from luxonis_ml.data import LabelType
+from torch import Tensor, nn
 
 from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import UpBlock
-from luxonis_train.utils.general import infer_upscale_factor
-from luxonis_train.utils.types import LabelType
+from luxonis_train.utils import infer_upscale_factor
 
 
 class SegmentationHead(BaseNode[Tensor, Tensor]):
     in_height: int
+    in_width: int
     in_channels: int
+
     tasks: list[LabelType] = [LabelType.SEGMENTATION]
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs: Any):
         """Basic segmentation FCN head.
 
-        Note that it doesn't ensure that ouptut is same size as input.
-
-        @type kwargs: Any
-        @param kwargs: Additional arguments to pass to L{BaseNode}.
+        Adapted from: U{https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py}
+        @license: U{BSD-3 <https://github.com/pytorch/vision/blob/main/LICENSE>}
         """
         super().__init__(**kwargs)
+        h, w = self.original_in_shape[1:]
+        n_up = infer_upscale_factor((self.in_height, self.in_width), (h, w))
 
-        original_height = self.original_in_shape[1]
-        num_up = infer_upscale_factor(self.in_height, original_height, strict=False)
-
-        modules = []
+        modules: list[nn.Module] = []
         in_channels = self.in_channels
-        for _ in range(int(num_up)):
+        for _ in range(int(n_up)):
             modules.append(
                 UpBlock(in_channels=in_channels, out_channels=in_channels // 2)
             )
diff --git a/luxonis_train/nodes/necks/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck.py
index bd05f083..107151a6 100644
--- a/luxonis_train/nodes/necks/reppan_neck.py
+++ b/luxonis_train/nodes/necks/reppan_neck.py
@@ -1,141 +1,147 @@
-"""Implementation of the RepPANNeck module.
-
-Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial
-Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
-It has the balance of feature fusion ability and hardware efficiency.
-"""
-
-
-from typing import Literal, cast
+from typing import Any, Literal
 
 from torch import Tensor, nn
 
 from luxonis_train.nodes.base_node import BaseNode
 from luxonis_train.nodes.blocks import RepDownBlock, RepUpBlock
-from luxonis_train.utils.general import make_divisible
+from luxonis_train.utils import make_divisible
 
 
 class RepPANNeck(BaseNode[list[Tensor], list[Tensor]]):
+    in_channels: list[int]
+
     def __init__(
         self,
-        num_heads: Literal[2, 3, 4] = 3,
+        n_heads: Literal[2, 3, 4] = 3,
         channels_list: list[int] | None = None,
-        num_repeats: list[int] | None = None,
+        n_repeats: list[int] | None = None,
         depth_mul: float = 0.33,
         width_mul: float = 0.25,
-        **kwargs,
+        **kwargs: Any,
     ):
-        """Constructor for the RepPANNeck module.
+        """Implementation of the RepPANNeck module.
+
+        Adapted from U{YOLOv6: A Single-Stage Object Detection Framework
+        for Industrial Applications<https://arxiv.org/pdf/2209.02976.pdf>}.
+        It has the balance of feature fusion ability and hardware efficiency.
 
-        @type num_heads: Literal[2,3,4]
-        @param num_heads: Number of output heads. Defaults to 3. ***Note: Should be same
-            also on head in most cases.***
+        @type n_heads: Literal[2,3,4]
+        @param n_heads: Number of output heads. Defaults to 3. B{Note: Should be same
+            also on head in most cases.}
         @type channels_list: list[int] | None
-        @param channels_list: List of number of channels for each block. Defaults to
-            C{[256, 128, 128, 256, 256, 512]}.
-        @type num_repeats: list[int] | None
-        @param num_repeats: List of number of repeats of RepVGGBlock. Defaults to C{[12,
-            12, 12, 12]}.
+        @param channels_list: List of number of channels for each block.
+            Defaults to C{[256, 128, 128, 256, 256, 512]}.
+        @type n_repeats: list[int] | None
+        @param n_repeats: List of number of repeats of RepVGGBlock.
+            Defaults to C{[12, 12, 12, 12]}.
         @type depth_mul: float
-        @param depth_mul: Depth multiplier. Defaults to 0.33.
+        @param depth_mul: Depth multiplier. Defaults to C{0.33}.
         @type width_mul: float
-        @param width_mul: Width multiplier. Defaults to 0.25.
+        @param width_mul: Width multiplier. Defaults to C{0.25}.
         """
 
         super().__init__(**kwargs)
 
-        num_repeats = num_repeats or [12, 12, 12, 12]
-        channels_list = channels_list or [256, 128, 128, 256, 256, 512]
+        self.n_heads = n_heads
 
-        self.num_heads = num_heads
+        n_repeats = n_repeats or [12, 12, 12, 12]
+        channels_list = channels_list or [256, 128, 128, 256, 256, 512]
 
-        channels_list = [make_divisible(ch * width_mul, 8) for ch in channels_list]
-        num_repeats = [
-            (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats
+        channels_list = [
+            make_divisible(ch * width_mul, 8) for ch in channels_list
         ]
-        channels_list, num_repeats = self._fit_to_num_heads(channels_list, num_repeats)
+        n_repeats = [
+            (max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats
+        ]
+        channels_list, n_repeats = self._fit_to_n_heads(
+            channels_list, n_repeats
+        )
 
         self.up_blocks = nn.ModuleList()
 
-        in_channels = cast(list[int], self.in_channels)[-1]
+        in_channels = self.in_channels[-1]
         out_channels = channels_list[0]
-        in_channels_next = cast(list[int], self.in_channels)[-2]
-        curr_num_repeats = num_repeats[0]
+        in_channels_next = self.in_channels[-2]
+        curr_n_repeats = n_repeats[0]
         up_out_channel_list = [in_channels]  # used in DownBlocks
 
-        for i in range(1, num_heads):
+        for i in range(1, n_heads):
             curr_up_block = RepUpBlock(
                 in_channels=in_channels,
                 in_channels_next=in_channels_next,
                 out_channels=out_channels,
-                num_repeats=curr_num_repeats,
+                n_repeats=curr_n_repeats,
             )
             up_out_channel_list.append(out_channels)
             self.up_blocks.append(curr_up_block)
-            if len(self.up_blocks) == (num_heads - 1):
+            if len(self.up_blocks) == (n_heads - 1):
                 up_out_channel_list.reverse()
                 break
 
             in_channels = out_channels
             out_channels = channels_list[i]
-            in_channels_next = cast(list[int], self.in_channels)[-1 - (i + 1)]
-            curr_num_repeats = num_repeats[i]
+            in_channels_next = self.in_channels[-1 - (i + 1)]
+            curr_n_repeats = n_repeats[i]
 
         self.down_blocks = nn.ModuleList()
-        channels_list_down_blocks = channels_list[(num_heads - 1) :]
-        num_repeats_down_blocks = num_repeats[(num_heads - 1) :]
+        channels_list_down_blocks = channels_list[(n_heads - 1) :]
+        n_repeats_down_blocks = n_repeats[(n_heads - 1) :]
 
         in_channels = out_channels
         downsample_out_channels = channels_list_down_blocks[0]
         in_channels_next = up_out_channel_list[0]
         out_channels = channels_list_down_blocks[1]
-        curr_num_repeats = num_repeats_down_blocks[0]
+        curr_n_repeats = n_repeats_down_blocks[0]
 
-        for i in range(1, num_heads):
+        for i in range(1, n_heads):
             curr_down_block = RepDownBlock(
                 in_channels=in_channels,
                 downsample_out_channels=downsample_out_channels,
                 in_channels_next=in_channels_next,
                 out_channels=out_channels,
-                num_repeats=curr_num_repeats,
+                n_repeats=curr_n_repeats,
             )
             self.down_blocks.append(curr_down_block)
-            if len(self.down_blocks) == (num_heads - 1):
+            if len(self.down_blocks) == (n_heads - 1):
                 break
 
             in_channels = out_channels
             downsample_out_channels = channels_list_down_blocks[2 * i]
             in_channels_next = up_out_channel_list[i]
             out_channels = channels_list_down_blocks[2 * i + 1]
-            curr_num_repeats = num_repeats_down_blocks[i]
+            curr_n_repeats = n_repeats_down_blocks[i]
 
     def forward(self, inputs: list[Tensor]) -> list[Tensor]:
-        x0 = inputs[-1]
-        up_block_outs = []
-        for i, up_block in enumerate(self.up_blocks):
-            conv_out, x0 = up_block(x0, inputs[-1 - (i + 1)])
+        x = inputs[-1]
+        up_block_outs: list[Tensor] = []
+        for up_block, input_ in zip(
+            self.up_blocks, inputs[-2::-1], strict=False
+        ):
+            conv_out, x = up_block(x, input_)
             up_block_outs.append(conv_out)
-        up_block_outs.reverse()
 
-        outs = [x0]
-        for i, down_block in enumerate(self.down_blocks):
-            x0 = down_block(x0, up_block_outs[i])
-            outs.append(x0)
+        outs = [x]
+        for down_block, up_out in zip(
+            self.down_blocks, reversed(up_block_outs)
+        ):
+            x = down_block(x, up_out)
+            outs.append(x)
         return outs
 
-    def _fit_to_num_heads(
-        self, channels_list: list[int], num_repeats: list[int]
+    def _fit_to_n_heads(
+        self, channels_list: list[int], n_repeats: list[int]
     ) -> tuple[list[int], list[int]]:
-        """Fits channels_list and num_repeats to num_heads by removing or adding items.
+        """Fits channels_list and n_repeats to n_heads by removing or
+        adding items.
 
         Also scales the numbers based on offset
         """
-        if self.num_heads == 3:
-            ...
-        elif self.num_heads == 2:
-            channels_list = [channels_list[0], channels_list[4], channels_list[5]]
-            num_repeats = [num_repeats[0], num_repeats[3]]
-        elif self.num_heads == 4:
+        if self.n_heads == 2:
+            channels_list = [channels_list[i] for i in [0, 4, 5]]
+            n_repeats = [n_repeats[0], n_repeats[3]]
+        elif self.n_heads == 3:
+            return channels_list, n_repeats
+        elif self.n_heads == 4:
             channels_list = [
                 channels_list[0],
                 channels_list[1],
@@ -147,17 +153,11 @@ def _fit_to_num_heads(
                 channels_list[4],
                 channels_list[5],
             ]
-            num_repeats = [
-                num_repeats[0],
-                num_repeats[1],
-                num_repeats[1],
-                num_repeats[2],
-                num_repeats[2],
-                num_repeats[3],
-            ]
+            n_repeats = [n_repeats[i] for i in [0, 1, 1, 2, 2, 3]]
         else:
             raise ValueError(
-                f"Specified number of heads ({self.num_heads}) not supported."
+                f"Specified number of heads ({self.n_heads}) not supported."
+                "The number of heads should be 2, 3 or 4."
             )
 
-        return channels_list, num_repeats
+        return channels_list, n_repeats
diff --git a/luxonis_train/optimizers/__init__.py b/luxonis_train/optimizers/__init__.py
new file mode 100644
index 00000000..acd73792
--- /dev/null
+++ b/luxonis_train/optimizers/__init__.py
@@ -0,0 +1 @@
+from .optimizers import *
diff --git a/luxonis_train/utils/optimizers.py b/luxonis_train/optimizers/optimizers.py
similarity index 92%
rename from luxonis_train/utils/optimizers.py
rename to luxonis_train/optimizers/optimizers.py
index 7583cef9..c2a4bf12 100644
--- a/luxonis_train/utils/optimizers.py
+++ b/luxonis_train/optimizers/optimizers.py
@@ -1,4 +1,4 @@
-from torch import optim
+import torch.optim as optim
 
 from luxonis_train.utils.registry import OPTIMIZERS
 
diff --git a/luxonis_train/schedulers/__init__.py b/luxonis_train/schedulers/__init__.py
new file mode 100644
index 00000000..99bcd9d9
--- /dev/null
+++ b/luxonis_train/schedulers/__init__.py
@@ -0,0 +1 @@
+from .schedulers import *
diff --git a/luxonis_train/utils/schedulers.py b/luxonis_train/schedulers/schedulers.py
similarity index 100%
rename from luxonis_train/utils/schedulers.py
rename to luxonis_train/schedulers/schedulers.py
diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py
index 609304c3..c47d3d33 100644
--- a/luxonis_train/utils/__init__.py
+++ b/luxonis_train/utils/__init__.py
@@ -1,5 +1,52 @@
-from .assigners import *
-from .config import *
-from .loaders import *
-from .optimizers import *
-from .schedulers import *
+from .boundingbox import (
+    anchors_for_fpn_features,
+    anchors_from_dataset,
+    bbox2dist,
+    bbox_iou,
+    compute_iou_loss,
+    dist2bbox,
+    match_to_anchor,
+    non_max_suppression,
+    process_bbox_predictions,
+)
+from .config import Config
+from .dataset_metadata import DatasetMetadata
+from .exceptions import IncompatibleException
+from .general import (
+    get_with_default,
+    infer_upscale_factor,
+    make_divisible,
+    to_shape_packet,
+)
+from .graph import is_acyclic, traverse_graph
+from .keypoints import get_sigmas, process_keypoints_predictions
+from .tracker import LuxonisTrackerPL
+from .types import AttachIndexType, Kwargs, Labels, Packet
+
+__all__ = [
+    "Config",
+    "AttachIndexType",
+    "Kwargs",
+    "Labels",
+    "Packet",
+    "IncompatibleException",
+    "DatasetMetadata",
+    "make_divisible",
+    "infer_upscale_factor",
+    "to_shape_packet",
+    "get_with_default",
+    "LuxonisTrackerPL",
+    "match_to_anchor",
+    "dist2bbox",
+    "bbox2dist",
+    "bbox_iou",
+    "non_max_suppression",
+    "anchors_from_dataset",
+    "anchors_for_fpn_features",
+    "process_bbox_predictions",
+    "compute_iou_loss",
+    "process_keypoints_predictions",
+    "get_sigmas",
+    "is_acyclic",
+    "traverse_graph",
+]
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boundingbox.py
similarity index 87%
rename from luxonis_train/utils/boxutils.py
rename to luxonis_train/utils/boundingbox.py
index 3a206c75..9b97bfe6 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boundingbox.py
@@ -1,12 +1,10 @@
-"""This module contains various utility functions for working with bounding boxes."""
-
 import math
 from typing import Literal, TypeAlias
 
 import torch
+from luxonis_ml.data import LabelType
 from scipy.cluster.vq import kmeans
 from torch import Tensor
-from torch.utils.data import DataLoader
 from torchvision.ops import (
     batched_nms,
     box_convert,
@@ -15,24 +13,11 @@
     generalized_box_iou,
 )
 
-from luxonis_train.utils.types import LabelType
+from luxonis_train.loaders import BaseLoaderTorch
 
 IoUType: TypeAlias = Literal["none", "giou", "diou", "ciou", "siou"]
 BBoxFormatType: TypeAlias = Literal["xyxy", "xywh", "cxcywh"]
 
-__all__ = [
-    "anchors_for_fpn_features",
-    "anchors_from_dataset",
-    "bbox2dist",
-    "bbox_iou",
-    "compute_iou_loss",
-    "dist2bbox",
-    "match_to_anchor",
-    "non_max_suppression",
-    "process_bbox_predictions",
-    "process_keypoints_predictions",
-]
-
 
 def match_to_anchor(
     targets: Tensor,
@@ -178,8 +163,21 @@ def bbox_iou(
     @param bbox2: Second set of bboxes [M, 4].
     @type bbox_format: BBoxFormatType
     @param bbox_format: Input bbox format. Defaults to "xyxy".
-    @type iou_type: IoUType
+    @type iou_type: Literal["none", "giou", "diou", "ciou", "siou"]
     @param iou_type: IoU type. Defaults to "none".
+        Possible values are:
+            - "none": standard IoU
+            - "giou": Generalized IoU
+            - "diou": Distance IoU
+            - "ciou": Complete IoU. Introduced in U{
+                Enhancing Geometric Factors in Model Learning and
+                Inference for Object Detection and Instance
+                Segmentation<https://arxiv.org/pdf/2005.03572.pdf>}.
+                Implementation adapted from torchvision C{complete_box_iou}
+                with improved stability.
+            - "siou": Soft IoU. Introduced in U{
+                SIoU Loss: More Powerful Learning for Bounding Box
+                Regression<https://arxiv.org/pdf/2205.12740.pdf>}.
     @type element_wise: bool
     @param element_wise: If True returns element wise IoUs. Defaults to False.
     @rtype: Tensor
@@ -197,9 +195,6 @@ def bbox_iou(
     elif iou_type == "diou":
         iou = distance_box_iou(bbox1, bbox2)
     elif iou_type == "ciou":
-        # CIoU from `Enhancing Geometric Factors in Model Learning and Inference for
-        # Object Detection and Instance Segmentation`, https://arxiv.org/pdf/2005.03572.pdf.
-        # Implementation adapted from torchvision complete_box_iou with added eps for stability
         eps = 1e-7
 
         iou = bbox_iou(bbox1, bbox2, iou_type="none")
@@ -218,9 +213,6 @@ def bbox_iou(
         iou = diou - alpha * v
 
     elif iou_type == "siou":
-        # SIoU from `SIoU Loss: More Powerful Learning for Bounding Box Regression`,
-        # https://arxiv.org/pdf/2205.12740.pdf
-
         eps = 1e-7
         bbox1_xywh = box_convert(bbox1, in_fmt="xyxy", out_fmt="xywh")
         w1, h1 = bbox1_xywh[:, 2], bbox1_xywh[:, 3]
@@ -247,7 +239,9 @@ def bbox_iou(
         sin_alpha_1 = torch.abs(s_cw) / sigma
         sin_alpha_2 = torch.abs(s_ch) / sigma
         threshold = pow(2, 0.5) / 2
-        sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1)
+        sin_alpha = torch.where(
+            sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1
+        )
         angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2)
 
         # distance cost
@@ -287,7 +281,8 @@ def non_max_suppression(
     max_det: int = 300,
     predicts_objectness: bool = True,
 ) -> list[Tensor]:
-    """Non-maximum suppression on model's predictions to keep only best instances.
+    """Non-maximum suppression on model's predictions to keep only best
+    instances.
 
     @type preds: Tensor
     @param preds: Model's prediction tensor of shape [bs, N, M].
@@ -340,7 +335,9 @@ def non_max_suppression(
             torch.max(preds[..., 5 : 5 + n_classes], dim=-1)[0] > conf_thres,
         )
 
-    output = [torch.zeros((0, preds.size(-1)), device=preds.device)] * preds.size(0)
+    output = [
+        torch.zeros((0, preds.size(-1)), device=preds.device)
+    ] * preds.size(0)
 
     for i, x in enumerate(preds):
         curr_out = x[candidate_mask[i]]
@@ -363,7 +360,9 @@ def non_max_suppression(
 
         if multi_label:
             box_idx, class_idx = (
-                (curr_out[:, 5 : 5 + n_classes] > conf_thres).nonzero(as_tuple=False).T
+                (curr_out[:, 5 : 5 + n_classes] > conf_thres)
+                .nonzero(as_tuple=False)
+                .T
             )
             keep_mask[box_idx] = True
             curr_out = torch.cat(
@@ -375,9 +374,13 @@ def non_max_suppression(
                 1,
             )
         else:
-            conf, class_idx = curr_out[:, 5 : 5 + n_classes].max(1, keepdim=True)
+            conf, class_idx = curr_out[:, 5 : 5 + n_classes].max(
+                1, keepdim=True
+            )
             keep_mask[conf.view(-1) > conf_thres] = True
-            curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[keep_mask]
+            curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[
+                keep_mask
+            ]
 
         if has_additional:
             curr_out = torch.hstack(
@@ -409,41 +412,37 @@ def non_max_suppression(
 
 
 def anchors_from_dataset(
-    loader: DataLoader,
+    loader: BaseLoaderTorch,
     n_anchors: int = 9,
     n_generations: int = 1000,
     ratio_threshold: float = 4.0,
 ) -> tuple[Tensor, float]:
-    """Generates anchors based on bounding box annotations present in provided data
-    loader. It uses K-Means for initial proposals which are then refined with genetic
-    algorithm.
+    """Generates anchors based on bounding box annotations present in
+    provided data loader. It uses K-Means for initial proposals which
+    are then refined with genetic algorithm.
 
     @type loader: L{torch.utils.data.DataLoader}
     @param loader: Data loader.
     @type n_anchors: int
-    @param n_anchors: Number of anchors, this is normally num_heads * 3 which generates
-        3 anchors per layer. Defaults to 9.
+    @param n_anchors: Number of anchors, this is normally n_heads * 3
+        which generates 3 anchors per layer. Defaults to 9.
     @type n_generations: int
-    @param n_generations: Number of iterations for anchor improvement with genetic
-        algorithm. Defaults to 1000.
+    @param n_generations: Number of iterations for anchor improvement
+        with genetic algorithm. Defaults to 1000.
     @type ratio_threshold: float
-    @param ratio_threshold: Minimum threshold for ratio. Defaults to 4.0.
+    @param ratio_threshold: Minimum threshold for ratio. Defaults to
+        4.0.
     @rtype: tuple[Tensor, float]
     @return: Proposed anchors and the best possible recall.
     """
 
-    widths = []
-    inputs = None
-    for inp, labels in loader:
+    widths: list[Tensor] = []
+    for _, labels in loader:
         for tensor, label_type in labels.values():
             if label_type == LabelType.BOUNDINGBOX:
                 curr_wh = tensor[:, 4:]
                 widths.append(curr_wh)
-        inputs = inp
-    assert inputs is not None, "No inputs found in data loader"
-    _, _, h, w = inputs[
-        loader.dataset.image_source  # type: ignore
-    ].shape  # assuming all images are same size
+    _, h, w = loader.input_shape
     img_size = torch.tensor([w, h])
     wh = torch.vstack(widths) * img_size
 
@@ -463,7 +462,8 @@ def anchors_from_dataset(
     except Exception:
         print("Fallback to random anchor init")
         proposed_anchors = (
-            torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2) * img_size
+            torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2)
+            * img_size
         )
 
     proposed_anchors = proposed_anchors[
@@ -471,7 +471,8 @@ def anchors_from_dataset(
     ]  # sort small to large
 
     def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor:
-        """Calculate how well most suitable anchor box matches each target bbox."""
+        """Calculate how well most suitable anchor box matches each
+        target bbox."""
         symmetric_size_ratios = torch.min(
             wh[:, None] / anchors[None], anchors[None] / wh[:, None]
         )
@@ -480,17 +481,20 @@ def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor:
         return best_anchor_ratio
 
     def calc_best_possible_recall(anchors: Tensor, wh: Tensor) -> Tensor:
-        """Calculate best possible recall if every bbox is matched to an appropriate
-        anchor."""
+        """Calculate best possible recall if every bbox is matched to an
+        appropriate anchor."""
         best_anchor_ratio = calc_best_anchor_ratio(anchors, wh)
-        best_possible_recall = (best_anchor_ratio > 1 / ratio_threshold).float().mean()
+        best_possible_recall = (
+            (best_anchor_ratio > 1 / ratio_threshold).float().mean()
+        )
         return best_possible_recall
 
     def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor:
         """Fitness function used for anchor evolve."""
         best_anchor_ratio = calc_best_anchor_ratio(anchors, wh)
         return (
-            best_anchor_ratio * (best_anchor_ratio > 1 / ratio_threshold).float()
+            best_anchor_ratio
+            * (best_anchor_ratio > 1 / ratio_threshold).float()
         ).mean()
 
     # Genetic algorithm
@@ -508,7 +512,9 @@ def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor:
             + mutation_noise_mean
         ).clip(0.3, 3.0)
 
-        mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip(min=2.0)
+        mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip(
+            min=2.0
+        )
         mutated_fitness = anchor_fitness(mutated_anchors, wh)
         if mutated_fitness > best_fitness:
             best_fitness = mutated_fitness
@@ -529,20 +535,22 @@ def anchors_for_fpn_features(
     grid_cell_offset: float = 0.5,
     multiply_with_stride: bool = False,
 ) -> tuple[Tensor, Tensor, list[int], Tensor]:
-    """Generates anchor boxes, points and strides based on FPN feature shapes and
-    strides.
+    """Generates anchor boxes, points and strides based on FPN feature
+    shapes and strides.
 
     @type features: list[Tensor]
     @param features: List of FPN features.
     @type strides: Tensor
     @param strides: Strides of FPN features.
     @type grid_cell_size: float
-    @param grid_cell_size: Cell size in respect to input image size. Defaults to 5.0.
+    @param grid_cell_size: Cell size in respect to input image size.
+        Defaults to 5.0.
     @type grid_cell_offset: float
-    @param grid_cell_offset: Percent grid cell center's offset. Defaults to 0.5.
+    @param grid_cell_offset: Percent grid cell center's offset. Defaults
+        to 0.5.
     @type multiply_with_stride: bool
-    @param multiply_with_stride: Whether to multiply per FPN values with its stride.
-        Defaults to False.
+    @param multiply_with_stride: Whether to multiply per FPN values with
+        its stride. Defaults to False.
     @rtype: tuple[Tensor, Tensor, list[int], Tensor]
     @return: BBox anchors, center anchors, number of anchors, strides
     """
@@ -576,7 +584,9 @@ def anchors_for_fpn_features(
         anchors.append(anchor)
 
         anchor_point = (
-            torch.stack([shift_x, shift_y], dim=-1).reshape(-1, 2).to(feature.dtype)
+            torch.stack([shift_x, shift_y], dim=-1)
+            .reshape(-1, 2)
+            .to(feature.dtype)
         )
         anchor_points.append(anchor_point)
 
@@ -595,26 +605,6 @@ def anchors_for_fpn_features(
     )
 
 
-def process_keypoints_predictions(keypoints: Tensor) -> tuple[Tensor, Tensor, Tensor]:
-    """Extracts x, y and visibility from keypoints predictions.
-
-    @type keypoints: Tensor
-    @param keypoints: Keypoints predictions. The last dimension must be divisible by 3
-        and is expected to be in format [x1, y1, v1, x2, y2, v2, ...].
-
-    @rtype: tuple[Tensor, Tensor, Tensor]
-    @return: x, y and visibility tensors.
-    """
-    x = keypoints[..., ::3] * 2.0 - 0.5
-    y = keypoints[..., 1::3] * 2.0 - 0.5
-    visibility = keypoints[..., 2::3]
-    return (
-        x,
-        y,
-        visibility,
-    )
-
-
 def process_bbox_predictions(
     bbox: Tensor, anchor: Tensor
 ) -> tuple[Tensor, Tensor, Tensor]:
@@ -625,7 +615,8 @@ def process_bbox_predictions(
     @type anchor: Tensor
     @param anchor: Anchor boxes
     @rtype: tuple[Tensor, Tensor, Tensor]
-    @return: xy and wh predictions and tail. The tail is anything after xywh.
+    @return: xy and wh predictions and tail. The tail is anything after
+        xywh.
     """
     out_bbox = bbox.sigmoid()
     out_bbox_xy = out_bbox[..., 0:2] * 2.0 - 0.5
@@ -681,10 +672,12 @@ def compute_iou_loss(
         else:
             bbox_mask = torch.ones_like(pred_bboxes, dtype=torch.bool)
 
-        pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape([-1, 4])
-        target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).reshape(
+        pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape(
             [-1, 4]
         )
+        target_bboxes_pos = torch.masked_select(
+            target_bboxes, bbox_mask
+        ).reshape([-1, 4])
 
         iou = bbox_iou(
             pred_bboxes_pos,
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 31e4fe5b..b94f08a5 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -10,8 +10,13 @@
     LuxonisConfig,
     LuxonisFileSystem,
 )
-from pydantic import Field, field_validator, model_validator
-from pydantic.types import FilePath, NonNegativeFloat, NonNegativeInt, PositiveInt
+from pydantic import AliasChoices, Field, field_validator, model_validator
+from pydantic.types import (
+    FilePath,
+    NonNegativeFloat,
+    NonNegativeInt,
+    PositiveInt,
+)
 from typing_extensions import Self
 
 logger = logging.getLogger(__name__)
@@ -82,7 +87,9 @@ def check_predefined_model(self) -> Self:
         from luxonis_train.utils.registry import MODELS
 
         if self.predefined_model:
-            logger.info(f"Using predefined model: `{self.predefined_model.name}`")
+            logger.info(
+                f"Using predefined model: `{self.predefined_model.name}`"
+            )
             model = MODELS.get(self.predefined_model.name)(
                 **self.predefined_model.params
             )
@@ -122,14 +129,16 @@ def check_main_metric(self) -> Self:
 
     @model_validator(mode="after")
     def check_graph(self) -> Self:
-        from luxonis_train.utils.general import is_acyclic
+        from luxonis_train.utils import is_acyclic
 
         graph = {node.alias or node.name: node.inputs for node in self.nodes}
         if not is_acyclic(graph):
             raise ValueError("Model graph is not acyclic.")
         if not self.outputs:
             outputs: list[str] = []  # nodes which are not inputs to any nodes
-            inputs = set(node_name for node in self.nodes for node_name in node.inputs)
+            inputs = set(
+                node_name for node in self.nodes for node_name in node.inputs
+            )
             for node in self.nodes:
                 name = node.alias or node.name
                 if name not in inputs:
@@ -147,7 +156,7 @@ def check_unique_names(self) -> Self:
             ("metrics", self.metrics),
             ("visualizers", self.visualizers),
         ]:
-            names = set()
+            names: set[str] = set()
             for obj in objects:
                 obj: AttachedModuleConfig
                 name = obj.alias or obj.name
@@ -232,7 +241,9 @@ class PreprocessingConfig(BaseModelExtraForbid):
     def check_normalize(self) -> Self:
         if self.normalize.active:
             self.augmentations.append(
-                AugmentationConfig(name="Normalize", params=self.normalize.params)
+                AugmentationConfig(
+                    name="Normalize", params=self.normalize.params
+                )
             )
         return self
 
@@ -268,20 +279,34 @@ class TrainerConfig(BaseModelExtraForbid):
     accelerator: Literal["auto", "cpu", "gpu", "tpu"] = "auto"
     devices: int | list[int] | str = "auto"
     strategy: Literal["auto", "ddp"] = "auto"
-    num_sanity_val_steps: int = 2
+    n_sanity_val_steps: Annotated[
+        int,
+        Field(
+            validation_alias=AliasChoices(
+                "n_sanity_val_steps", "num_sanity_val_steps"
+            )
+        ),
+    ] = 2
     profiler: Literal["simple", "advanced"] | None = None
     matmul_precision: Literal["medium", "high", "highest"] | None = None
     verbose: bool = True
 
     seed: int | None = None
+    deterministic: bool | Literal["warn"] | None = None
     batch_size: PositiveInt = 32
     accumulate_grad_batches: PositiveInt = 1
     use_weighted_sampler: bool = False
     epochs: PositiveInt = 100
-    num_workers: NonNegativeInt = 4
+    n_workers: Annotated[
+        NonNegativeInt,
+        Field(validation_alias=AliasChoices("n_workers", "num_workers")),
+    ] = 4
     train_metrics_interval: Literal[-1] | PositiveInt = -1
     validation_interval: Literal[-1] | PositiveInt = 5
-    num_log_images: NonNegativeInt = 4
+    n_log_images: Annotated[
+        NonNegativeInt,
+        Field(validation_alias=AliasChoices("n_log_images", "num_log_images")),
+    ] = 4
     skip_last_batch: bool = True
     pin_memory: bool = True
     log_sub_losses: bool = True
@@ -293,13 +318,24 @@ class TrainerConfig(BaseModelExtraForbid):
     scheduler: SchedulerConfig = SchedulerConfig()
 
     @model_validator(mode="after")
-    def check_num_workes_platform(self) -> Self:
+    def validate_deterministic(self) -> Self:
+        if self.seed is not None and self.deterministic is None:
+            logger.warning(
+                "Setting `trainer.deterministic` to True because `trainer.seed` is set."
+                "This can cause certain layers to fail. "
+                "In such cases, set `trainer.deterministic` to `'warn'`."
+            )
+            self.deterministic = True
+        return self
+
+    @model_validator(mode="after")
+    def check_n_workes_platform(self) -> Self:
         if (
             sys.platform == "win32" or sys.platform == "darwin"
-        ) and self.num_workers != 0:
-            self.num_workers = 0
+        ) and self.n_workers != 0:
+            self.n_workers = 0
             logger.warning(
-                "Setting `num_workers` to 0 because of platform compatibility."
+                "Setting `n_workers` to 0 because of platform compatibility."
             )
         return self
 
@@ -321,7 +357,9 @@ class OnnxExportConfig(BaseModelExtraForbid):
 class BlobconverterExportConfig(BaseModelExtraForbid):
     active: bool = False
     shaves: int = 6
-    version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = "2022.1"
+    version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = (
+        "2022.1"
+    )
 
 
 class ArchiveConfig(BaseModelExtraForbid):
@@ -403,7 +441,9 @@ def get_config(
             return instance
         fs = LuxonisFileSystem(cfg)
         if fs.is_mlflow:
-            logger.info("Setting `project_id` and `run_id` to config's MLFlow run")
+            logger.info(
+                "Setting `project_id` and `run_id` to config's MLFlow run"
+            )
             instance.tracker.project_id = fs.experiment_id
             instance.tracker.run_id = fs.run_id
         return instance
diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py
new file mode 100644
index 00000000..35ebbef8
--- /dev/null
+++ b/luxonis_train/utils/dataset_metadata.py
@@ -0,0 +1,154 @@
+from luxonis_train.loaders import BaseLoaderTorch
+from luxonis_train.utils import anchors_from_dataset
+
+
+class DatasetMetadata:
+    """Metadata about the dataset."""
+
+    def __init__(
+        self,
+        *,
+        classes: dict[str, list[str]] | None = None,
+        n_keypoints: dict[str, int] | None = None,
+        loader: BaseLoaderTorch | None = None,
+    ):
+        """An object containing metadata about the dataset. Used to
+        infer the number of classes, number of keypoints, I{etc.}
+        instead of passing them as arguments to the model.
+
+        @type classes: dict[str, list[str]] | None
+        @param classes: Dictionary mapping tasks to lists of class
+            names.
+        @type n_keypoints: dict[str, int] | None
+        @param n_keypoints: Dictionary mapping tasks to the number of
+            keypoints.
+        @type loader: DataLoader | None
+        @param loader: Dataset loader.
+        """
+        self._classes = classes or {}
+        self._n_keypoints = n_keypoints or {}
+        self._loader = loader
+
+    def n_classes(self, task: str | None = None) -> int:
+        """Gets the number of classes for the specified task.
+
+        @type task: str | None
+        @param task: Task to get the number of classes for.
+        @rtype: int
+        @return: Number of classes for the specified label type.
+        @raises ValueError: If the C{task} is not present in the
+            dataset.
+        @raises RuntimeError: If the C{task} was not provided and the
+            dataset contains different number of classes for different
+            label types.
+        """
+        if task is not None:
+            if task not in self._classes:
+                raise ValueError(
+                    f"Task '{task}' is not present in the dataset."
+                )
+            return len(self._classes[task])
+        n_classes = len(list(self._classes.values())[0])
+        for classes in self._classes.values():
+            if len(classes) != n_classes:
+                raise RuntimeError(
+                    "The dataset contains different number of classes for different tasks."
+                    "Please specify the 'task' argument to get the number of classes."
+                )
+        return n_classes
+
+    def n_keypoints(self, task: str | None = None) -> int:
+        """Gets the number of keypoints for the specified task.
+
+        @type task: str | None
+        @param task: Task to get the number of keypoints for.
+        @rtype: int
+        @return: Number of keypoints for the specified label type.
+        @raises ValueError: If the C{task} is not present in the
+            dataset.
+        @raises RuntimeError: If the C{task} was not provided and the
+            dataset contains different number of keypoints for different
+            label types.
+        """
+        if task is not None:
+            if task not in self._n_keypoints:
+                raise ValueError(
+                    f"Task '{task}' is not present in the dataset."
+                )
+            return self._n_keypoints[task]
+        n_keypoints = next(iter(self._n_keypoints.values()))
+        for n in self._n_keypoints.values():
+            if n != n_keypoints:
+                raise RuntimeError(
+                    "The dataset contains different number of keypoints for different tasks."
+                    "Please specify the 'task' argument to get the number of keypoints."
+                )
+        return n_keypoints
+
+    def classes(self, task: str | None = None) -> list[str]:
+        """Gets the class names for the specified task.
+
+        @type task: str | None
+        @param task: Task to get the class names for.
+        @rtype: list[str]
+        @return: List of class names for the specified label type.
+        @raises ValueError: If the C{task} is not present in the
+            dataset.
+        @raises RuntimeError: If the C{task} was not provided and the
+            dataset contains different class names for different label
+            types.
+        """
+        if task is not None:
+            if task not in self._classes:
+                raise ValueError(
+                    f"Task type {task} is not present in the dataset."
+                )
+            return self._classes[task]
+        class_names = list(self._classes.values())[0]
+        for classes in self._classes.values():
+            if classes != class_names:
+                raise RuntimeError(
+                    "The dataset contains different class names for different tasks."
+                )
+        return class_names
+
+    def autogenerate_anchors(
+        self, n_heads: int
+    ) -> tuple[list[list[float]], float]:
+        """Automatically generates anchors for the provided dataset.
+
+        @type n_heads: int
+        @param n_heads: Number of heads to generate anchors for.
+        @rtype: tuple[list[list[float]], float]
+        @return: List of anchors in [-1,6] format and recall of the
+            anchors.
+        @raises RuntimeError: If the dataset loader was not provided
+            during initialization.
+        """
+        if self._loader is None:
+            raise RuntimeError(
+                "Cannot generate anchors without a dataset loader. "
+                "Please provide a dataset loader to the constructor "
+                "or call `set_loader` method."
+            )
+
+        proposed_anchors, recall = anchors_from_dataset(
+            self._loader, n_anchors=n_heads * 3
+        )
+        return proposed_anchors.reshape(-1, 6).tolist(), recall
+
+    @classmethod
+    def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
+        """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}.
+
+        @type dataset: LuxonisDataset
+        @param dataset: Dataset to create the metadata from.
+        @rtype: DatasetMetadata
+        @return: Instance of L{DatasetMetadata} created from the
+            provided dataset.
+        """
+        classes = loader.get_classes()
+        n_keypoints = loader.get_n_keypoints()
+
+        instance = cls(classes=classes, n_keypoints=n_keypoints, loader=loader)
+        return instance
diff --git a/luxonis_train/utils/exceptions.py b/luxonis_train/utils/exceptions.py
new file mode 100644
index 00000000..bab8c1aa
--- /dev/null
+++ b/luxonis_train/utils/exceptions.py
@@ -0,0 +1,12 @@
+class IncompatibleException(Exception):
+    """Raised when two parts of the model are incompatible with each
+    other."""
+
+    @classmethod
+    def from_missing_task(
+        cls, task: str, present_tasks: list[str], class_name: str
+    ):
+        return cls(
+            f"{class_name} requires '{task}' label, but it was not found in "
+            f"the label dictionary. Available labels: {present_tasks}."
+        )
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index 5ae3b43f..45013807 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -1,272 +1,141 @@
 import logging
 import math
-from copy import deepcopy
-from typing import Generator, TypeVar
+from typing import TypeVar
 
-from pydantic import BaseModel
 from torch import Size, Tensor
-from torch.utils.data import DataLoader
 
-from luxonis_train.utils.boxutils import anchors_from_dataset
-from luxonis_train.utils.loaders import BaseLoaderTorch
 from luxonis_train.utils.types import Packet
 
+logger = logging.getLogger(__name__)
 
-class DatasetMetadata:
-    """Metadata about the dataset."""
 
-    def __init__(
-        self,
-        *,
-        classes: dict[str, list[str]] | None = None,
-        n_keypoints: dict[str, int] | None = None,
-        loader: DataLoader | None = None,
-    ):
-        """An object containing metadata about the dataset. Used to infer the number of
-        classes, number of keypoints, I{etc.} instead of passing them as arguments to
-        the model.
-
-        @type classes: dict[str, list[str]] | None
-        @param classes: Dictionary mapping tasks to lists of class names.
-        @type n_keypoints: dict[str, int] | None
-        @param n_keypoints: Dictionary mapping tasks to the number of keypoints.
-        @type loader: DataLoader | None
-        @param loader: Dataset loader.
-        """
-        self._classes = classes or {}
-        self._n_keypoints = n_keypoints or {}
-        self._loader = loader
+def make_divisible(x: int | float, divisor: int) -> int:
+    """Upward revision the value x to make it evenly divisible by the
+    divisor.
 
-    @property
-    def classes(self) -> dict[str, list[str]]:
-        """Dictionary mapping label types to lists of class names.
+    Equivalent to M{ceil(x / divisor) * divisor}.
 
-        @type: dict[str, list[str]]
-        @raises ValueError: If classes were not provided during initialization.
-        """
-        if self._classes is None:
-            raise ValueError(
-                "Trying to access `classes`, byt they were not"
-                "provided during initialization."
-            )
-        return self._classes
+    @type x: int | float
+    @param x: Value to be revised.
+    @type divisor: int
+    @param divisor: Divisor.
+    @rtype: int
+    @return: Revised value.
+    """
+    return math.ceil(x / divisor) * divisor
 
-    def n_classes(self, task: str | None) -> int:
-        """Gets the number of classes for the specified task.
 
-        @type task: str | None
-        @param task: Task to get the number of classes for.
-        @rtype: int
-        @return: Number of classes for the specified label type.
-        @raises ValueError: If the dataset loader was not provided during
-            initialization.
-        @raises ValueError: If the dataset contains different number of classes for
-            different label types.
-        """
-        if task is not None:
-            if task not in self.classes:
-                raise ValueError(f"Task '{task}' is not present in the dataset.")
-            return len(self.classes[task])
-        n_classes = len(list(self.classes.values())[0])
-        for classes in self.classes.values():
-            if len(classes) != n_classes:
-                raise ValueError(
-                    "The dataset contains different number of classes for different tasks."
-                )
-        return n_classes
+def infer_upscale_factor(
+    in_size: tuple[int, int] | int, orig_size: tuple[int, int] | int
+) -> int:
+    """Infer the upscale factor from the input shape and the original
+    shape.
+
+    @type in_size: tuple[int, int] | int
+    @param in_size: Input shape as a tuple of (height, width) or just
+        one of them.
+    @type orig_size: tuple[int, int] | int
+    @param orig_size: Original shape as a tuple of (height, width) or
+        just one of them.
+    @rtype: int
+    @return: Upscale factor.
+    @raise ValueError: If the C{in_size} cannot be upscaled to the
+        C{orig_size}. This can happen if the upscale factors are not
+        integers or are different.
+    """
 
-    def n_keypoints(self, task: str | None) -> int:
-        if task is not None:
-            if task not in self._n_keypoints:
-                raise ValueError(f"Task '{task}' is not present in the dataset.")
-            return self._n_keypoints[task]
-        if len(self._n_keypoints) > 1:
+    def _infer_upscale_factor(in_size: int, orig_size: int) -> int | float:
+        factor = math.log2(orig_size) - math.log2(in_size)
+        if abs(round(factor) - factor) < 1e-6:
+            return int(round(factor))
+        return factor
+
+    if isinstance(in_size, int):
+        in_size = (in_size, in_size)
+    if isinstance(orig_size, int):
+        orig_size = (orig_size, orig_size)
+    in_height, in_width = in_size
+    orig_height, orig_width = orig_size
+
+    width_factor = _infer_upscale_factor(in_width, orig_width)
+    height_factor = _infer_upscale_factor(in_height, orig_height)
+
+    match (width_factor, height_factor):
+        case (int(wf), int(hf)) if wf == hf:
+            return wf
+        case (int(wf), int(hf)):
             raise ValueError(
-                "The dataset specifies multiple keypoint tasks, "
-                "please specify the 'task' argument to get the number of keypoints."
+                f"Width and height upscale factors are different. "
+                f"Width: {wf}, height: {hf}."
             )
-        return next(iter(self._n_keypoints.values()))
-
-    def class_names(self, task: str | None) -> list[str]:
-        """Gets the class names for the specified task.
-
-        @type task: str | None
-        @param task: Task to get the class names for.
-        @rtype: list[str]
-        @return: List of class names for the specified label type.
-        @raises ValueError: If the dataset loader was not provided during
-            initialization.
-        @raises ValueError: If the dataset contains different class names for different
-            label types.
-        """
-        if task is not None:
-            if task not in self.classes:
-                raise ValueError(f"Task type {task} is not present in the dataset.")
-            return self.classes[task]
-        class_names = list(self.classes.values())[0]
-        for classes in self.classes.values():
-            if classes != class_names:
-                raise ValueError(
-                    "The dataset contains different class names for different tasks."
-                )
-        return class_names
-
-    def autogenerate_anchors(self, n_heads: int) -> tuple[list[list[float]], float]:
-        """Automatically generates anchors for the provided dataset.
-
-        @type n_heads: int
-        @param n_heads: Number of heads to generate anchors for.
-        @rtype: tuple[list[list[float]], float]
-        @return: List of anchors in [-1,6] format and recall of the anchors.
-        @raises ValueError: If the dataset loader was not provided during
-            initialization.
-        """
-        if self.loader is None:
+        case (int(wf), float(hf)):
             raise ValueError(
-                "Cannot generate anchors without a dataset loader. "
-                "Please provide a dataset loader to the constructor "
-                "or call `set_loader` method."
+                f"Width upscale factor is an integer, but height upscale factor is not. "
+                f"Width: {wf}, height: {hf}."
             )
-
-        proposed_anchors, recall = anchors_from_dataset(
-            self.loader, n_anchors=n_heads * 3
-        )
-        return proposed_anchors.reshape(-1, 6).tolist(), recall
-
-    def set_loader(self, loader: DataLoader) -> None:
-        """Sets the dataset loader.
-
-        @type loader: DataLoader
-        @param loader: Dataset loader.
-        """
-        self.loader = loader
-
-    @classmethod
-    def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
-        """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}.
-
-        @type dataset: LuxonisDataset
-        @param dataset: Dataset to create the metadata from.
-        @rtype: DatasetMetadata
-        @return: Instance of L{DatasetMetadata} created from the provided dataset.
-        """
-        classes = loader.get_classes()
-        n_keypoints = loader.get_n_keypoints()
-
-        return cls(classes=classes, n_keypoints=n_keypoints)
-
-
-def make_divisible(x: int | float, divisor: int) -> int:
-    """Upward revision the value x to make it evenly divisible by the divisor."""
-    return math.ceil(x / divisor) * divisor
-
-
-def infer_upscale_factor(
-    in_height: int, orig_height: int, strict: bool = True, warn: bool = True
-) -> int:
-    """Infer the upscale factor from the input height and original height."""
-    num_up = math.log2(orig_height) - math.log2(in_height)
-    if abs(round(num_up) - num_up) < 1e-6:
-        return int(round(num_up))
-    elif not strict:
-        if warn:
-            logging.getLogger(__name__).warning(
-                f"Upscale factor is not an integer: {num_up}. "
-                "Output shape will not be the same as input shape."
+        case (float(wf), int(hf)):
+            raise ValueError(
+                f"Height upscale factor is an integer, but width upscale factor is not. "
+                f"Width: {wf}, height: {hf}."
+            )
+        case (float(wf), float(hf)):
+            raise ValueError(
+                "Width and height upscale factors are not integers. "
+                f"Width: {wf}, height: {hf}."
             )
-        return round(num_up)
-    else:
-        raise ValueError(
-            f"Upscale factor is not an integer: {num_up}. "
-            "Output shape will not be the same as input shape."
-        )
+
+    raise NotImplementedError(
+        f"Unexpected case: {width_factor}, {height_factor}"
+    )
 
 
 def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]:
+    """Converts a packet of tensors to a packet of shapes. Used for
+    debugging purposes.
+
+    @type packet: Packet[Tensor]
+    @param packet: Packet of tensors.
+    @rtype: Packet[Size]
+    @return: Packet of shapes.
+    """
     shape_packet: Packet[Size] = {}
     for name, value in packet.items():
         shape_packet[name] = [x.shape for x in value]
     return shape_packet
 
 
-def is_acyclic(graph: dict[str, list[str]]) -> bool:
-    """Tests if graph is acyclic.
-
-    @type graph: dict[str, list[str]]
-    @param graph: Graph in a format of a dictionary of predecessors. Keys are node
-        names, values are inputs to the node (list of node names).
-    @rtype: bool
-    @return: True if graph is acyclic, False otherwise.
-    """
-    graph = graph.copy()
-
-    def dfs(node: str, visited: set[str], recursion_stack: set[str]):
-        visited.add(node)
-        recursion_stack.add(node)
-
-        for predecessor in graph.get(node, []):
-            if predecessor in recursion_stack:
-                return True
-            if predecessor not in visited:
-                if dfs(predecessor, visited, recursion_stack):
-                    return True
-
-        recursion_stack.remove(node)
-        return False
-
-    visited: set[str] = set()
-    recursion_stack: set[str] = set()
-
-    for node in graph.keys():
-        if node not in visited:
-            if dfs(node, visited, recursion_stack):
-                return False
-
-    return True
-
-
-def validate_packet(data: Packet[Tensor], protocol: type[BaseModel]) -> Packet[Tensor]:
-    return protocol(**data).model_dump()
-
-
 T = TypeVar("T")
 
 
-# TEST:
-def traverse_graph(
-    graph: dict[str, list[str]], nodes: dict[str, T]
-) -> Generator[tuple[str, T, list[str], list[str]], None, None]:
-    """Traverses the graph in topological order.
-
-    @type graph: dict[str, list[str]]
-    @param graph: Graph in a format of a dictionary of predecessors. Keys are node
-        names, values are inputs to the node (list of node names).
-    @type nodes: dict[str, T]
-    @param nodes: Dictionary mapping node names to node objects.
-    @rtype: Generator[tuple[str, T, list[str], list[str]], None, None]
-    @return: Generator of tuples containing node name, node object, node dependencies
-        and unprocessed nodes.
-    @raises RuntimeError: If the graph is malformed.
+def get_with_default(
+    value: T | None,
+    action_name: str,
+    caller_name: str | None = None,
+    *,
+    default: T,
+) -> T:
+    """Returns value if it is not C{None}, otherwise returns the default
+    value and log an info.
+
+    @type value: T | None
+    @param value: Value to return.
+    @type action_name: str
+    @param action_name: Name of the action for which the default value
+        is being used. Used for logging.
+    @type caller_name: str | None
+    @param caller_name: Name of the caller function. Used for logging.
+    @type default: T
+    @param default: Default value to return if C{value} is C{None}.
+    @rtype: T
+    @return: C{value} if it is not C{None}, otherwise C{default}.
     """
-    unprocessed_nodes = sorted(
-        set(nodes.keys())
-    )  # sort the set to allow reproducibility
-    processed: set[str] = set()
+    if value is not None:
+        return value
 
-    graph = deepcopy(graph)
-    while unprocessed_nodes:
-        unprocessed_nodes_copy = unprocessed_nodes.copy()
-        for node_name in unprocessed_nodes_copy:
-            node_dependencies = graph[node_name]
-            if not node_dependencies or all(
-                dependency in processed for dependency in node_dependencies
-            ):
-                yield node_name, nodes[node_name], node_dependencies, unprocessed_nodes
-                processed.add(node_name)
-                unprocessed_nodes.remove(node_name)
+    msg = f"Default value of {value} is being used for {action_name}."
 
-        if unprocessed_nodes_copy == unprocessed_nodes:
-            raise RuntimeError(
-                "Malformed graph. "
-                "Please check that all nodes are connected in a directed acyclic graph."
-            )
+    if caller_name:
+        msg = f"[{caller_name}] {msg}"
+
+    logger.info(msg, stacklevel=2)
+    return default
diff --git a/luxonis_train/utils/graph.py b/luxonis_train/utils/graph.py
new file mode 100644
index 00000000..a2b72832
--- /dev/null
+++ b/luxonis_train/utils/graph.py
@@ -0,0 +1,92 @@
+from copy import deepcopy
+from typing import Iterator, TypeAlias, TypeVar
+
+Graph: TypeAlias = dict[str, list[str]]
+"""Graph in a format of a dictionary of predecessors.
+
+Keys are node names, values are inputs to the node (list of node names).
+"""
+
+
+def is_acyclic(graph: Graph) -> bool:
+    """Tests if graph is acyclic.
+
+    @type graph: dict[str, list[str]]
+    @param graph: Graph in a format of a dictionary of predecessors.
+        Keys are node names, values are inputs to the node (list of node
+        names).
+    @rtype: bool
+    @return: True if graph is acyclic, False otherwise.
+    """
+    graph = graph.copy()
+
+    def dfs(node: str, visited: set[str], recursion_stack: set[str]):
+        visited.add(node)
+        recursion_stack.add(node)
+
+        for predecessor in graph.get(node, []):
+            if predecessor in recursion_stack:
+                return True
+            if predecessor not in visited:
+                if dfs(predecessor, visited, recursion_stack):
+                    return True
+
+        recursion_stack.remove(node)
+        return False
+
+    visited: set[str] = set()
+    recursion_stack: set[str] = set()
+
+    for node in graph.keys():
+        if node not in visited:
+            if dfs(node, visited, recursion_stack):
+                return False
+
+    return True
+
+
+T = TypeVar("T")
+
+
+def traverse_graph(
+    graph: Graph, nodes: dict[str, T]
+) -> Iterator[tuple[str, T, list[str], list[str]]]:
+    """Traverses the graph in topological order.
+
+    @type graph: dict[str, list[str]]
+    @param graph: Graph in a format of a dictionary of predecessors.
+        Keys are node names, values are inputs to the node (list of node
+        names).
+    @type nodes: dict[str, T]
+    @param nodes: Dictionary mapping node names to node objects.
+    @rtype: Iterator[tuple[str, T, list[str], list[str]]]
+    @return: Iterator of tuples containing node name, node object, node
+        dependencies and unprocessed nodes.
+    @raises RuntimeError: If the graph is malformed.
+    """
+    # sort the set to allow reproducibility
+    unprocessed_nodes = sorted(set(nodes.keys()))
+    processed: set[str] = set()
+
+    graph = deepcopy(graph)
+    while unprocessed_nodes:
+        unprocessed_nodes_copy = unprocessed_nodes.copy()
+        for node_name in unprocessed_nodes_copy:
+            node_dependencies = graph[node_name]
+            if not node_dependencies or all(
+                dependency in processed for dependency in node_dependencies
+            ):
+                unprocessed_nodes.remove(node_name)
+                yield (
+                    node_name,
+                    nodes[node_name],
+                    node_dependencies,
+                    unprocessed_nodes.copy(),
+                )
+                processed.add(node_name)
+
+        if unprocessed_nodes_copy == unprocessed_nodes:
+            raise RuntimeError(
+                "Malformed graph. "
+                "Please check that all nodes are connected in a directed acyclic graph."
+            )
diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py
new file mode 100644
index 00000000..9fbc741d
--- /dev/null
+++ b/luxonis_train/utils/keypoints.py
@@ -0,0 +1,85 @@
+import logging
+
+import torch
+from torch import Tensor
+
+logger = logging.getLogger(__name__)
+
+
+def process_keypoints_predictions(
+    keypoints: Tensor,
+) -> tuple[Tensor, Tensor, Tensor]:
+    """Extracts x, y and visibility from keypoints predictions.
+
+    @type keypoints: Tensor
+    @param keypoints: Keypoints predictions. The last dimension must be divisible by 3
+        and is expected to be in format [x1, y1, v1, x2, y2, v2, ...].
+
+    @rtype: tuple[Tensor, Tensor, Tensor]
+    @return: x, y and visibility tensors.
+    """
+    x = keypoints[..., ::3]
+    y = keypoints[..., 1::3]
+    visibility = keypoints[..., 2::3]
+    return x, y, visibility
+
+
+def get_sigmas(
+    sigmas: list[float] | None,
+    n_keypoints: int,
+    caller_name: str | None = None,
+) -> Tensor:
+    """Validate or create sigma values for each keypoint.
+
+    @type sigmas: list[float] | None
+    @param sigmas: List of sigmas for each keypoint. If C{None}, then
+        default sigmas are used.
+    @type n_keypoints: int
+    @param n_keypoints: Number of keypoints.
+    @type caller_name: str | None
+    @param caller_name: Name of the caller function. Used for logging.
+    @rtype: Tensor
+    @return: Tensor of sigmas.
+    """
+    if sigmas is not None:
+        if len(sigmas) == n_keypoints:
+            return torch.tensor(sigmas, dtype=torch.float32)
+        else:
+            error_msg = "The length of the sigmas list must be the same as the number of keypoints."
+            if caller_name:
+                error_msg = f"[{caller_name}] {error_msg}"
+            raise ValueError(error_msg)
+    else:
+        if n_keypoints == 17:
+            msg = "Default COCO sigmas are being used."
+            if caller_name:
+                msg = f"[{caller_name}] {msg}"
+            logger.warning(msg)
+            return torch.tensor(
+                [
+                    0.026,
+                    0.025,
+                    0.025,
+                    0.035,
+                    0.035,
+                    0.079,
+                    0.079,
+                    0.072,
+                    0.072,
+                    0.062,
+                    0.062,
+                    0.107,
+                    0.107,
+                    0.087,
+                    0.087,
+                    0.089,
+                    0.089,
+                ],
+                dtype=torch.float32,
+            )
+        else:
+            msg = "Default sigma of 0.04 is being used for each keypoint."
+            if caller_name:
+                msg = f"[{caller_name}] {msg}"
+            logger.info(msg)
+            return torch.tensor([0.04] * n_keypoints, dtype=torch.float32)
diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py
index 2222ecbd..02532d32 100644
--- a/luxonis_train/utils/registry.py
+++ b/luxonis_train/utils/registry.py
@@ -1,46 +1,46 @@
-"""This module implements a metaclass for automatic registration of classes."""
+"""This module implements a metaclass for automatic registration of
+classes."""
 
 import lightning.pytorch as pl
-import torch
 from luxonis_ml.utils.registry import Registry
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.optim.optimizer import Optimizer
 
-import luxonis_train
+import luxonis_train as lt
 
 CALLBACKS: Registry[type[pl.Callback]] = Registry(name="callbacks")
 """Registry for all callbacks."""
 
-LOADERS: Registry[type["luxonis_train.utils.loaders.BaseLoaderTorch"]] = Registry(
+LOADERS: Registry[type["lt.loaders.BaseLoaderTorch"]] = Registry(
     name="loaders"
 )
 """Registry for all loaders."""
 
-LOSSES: Registry[type["luxonis_train.attached_modules.BaseLoss"]] = Registry(
+LOSSES: Registry[type["lt.attached_modules.BaseLoss"]] = Registry(
     name="losses"
 )
 """Registry for all losses."""
 
-METRICS: Registry[type["luxonis_train.attached_modules.BaseMetric"]] = Registry(
+METRICS: Registry[type["lt.attached_modules.BaseMetric"]] = Registry(
     name="metrics"
 )
 """Registry for all metrics."""
 
-MODELS: Registry[type["luxonis_train.models.BasePredefinedModel"]] = Registry(
+MODELS: Registry[type["lt.models.BasePredefinedModel"]] = Registry(
     name="models"
 )
 """Registry for all models."""
 
-NODES: Registry[type["luxonis_train.nodes.BaseNode"]] = Registry(name="nodes")
+NODES: Registry[type["lt.nodes.BaseNode"]] = Registry(name="nodes")
 """Registry for all nodes."""
 
-OPTIMIZERS: Registry[type[torch.optim.Optimizer]] = Registry(name="optimizers")
+OPTIMIZERS: Registry[type[Optimizer]] = Registry(name="optimizers")
 """Registry for all optimizers."""
 
-SCHEDULERS: Registry[type[torch.optim.lr_scheduler._LRScheduler]] = Registry(
-    name="schedulers"
-)
+SCHEDULERS: Registry[type[_LRScheduler]] = Registry(name="schedulers")
 """Registry for all schedulers."""
 
-VISUALIZERS: Registry[type["luxonis_train.visualizers.BaseVisualizer"]] = Registry(
+VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer"]] = Registry(
     "visualizers"
 )
 """Registry for all visualizers."""
diff --git a/luxonis_train/utils/tracker.py b/luxonis_train/utils/tracker.py
index 4df76edd..35d7af70 100644
--- a/luxonis_train/utils/tracker.py
+++ b/luxonis_train/utils/tracker.py
@@ -1,12 +1,15 @@
+from typing import Any
+
 from lightning.pytorch.loggers.logger import Logger
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from luxonis_ml.tracker import LuxonisTracker
 
 
 class LuxonisTrackerPL(LuxonisTracker, Logger):
-    """Implementation of LuxonisTracker that is compatible with PytorchLightning."""
+    """Implementation of LuxonisTracker that is compatible with
+    PytorchLightning."""
 
-    def __init__(self, *, _auto_finalize: bool = True, **kwargs):
+    def __init__(self, *, _auto_finalize: bool = True, **kwargs: Any):
         """
         @type _auto_finalize: bool
         @param _auto_finalize: If True, the run will be finalized automatically when the training ends.
@@ -21,7 +24,7 @@ def __init__(self, *, _auto_finalize: bool = True, **kwargs):
             self.finalize = self._finalize
 
     @rank_zero_only
-    def _finalize(self, status: str = "success") -> None:
+    def _finalize(self, status: str = "success") -> None:  # pragma: no cover
         """Finalizes current run."""
         if self.is_tensorboard:
             self.experiment["tensorboard"].flush()
diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py
index 84b8e019..3a7ca7f4 100644
--- a/luxonis_train/utils/types.py
+++ b/luxonis_train/utils/types.py
@@ -1,19 +1,21 @@
-from typing import Annotated, Any, Literal, TypeVar
+from typing import Any, Literal, TypeVar
 
 from luxonis_ml.data import LabelType
-from pydantic import BaseModel, Field, ValidationError
 from torch import Size, Tensor
 
 Kwargs = dict[str, Any]
-OutputTypes = Literal["boundingbox", "class", "keypoints", "segmentation", "features"]
+"""Kwargs is a dictionary containing keyword arguments."""
+
 Labels = dict[str, tuple[Tensor, LabelType]]
+"""Labels is a dictionary containing a tuple of tensors and their
+corresponding label type."""
 
 AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int]
-"""AttachIndexType is used to specify to which output of the prevoius node does the
-current node attach to.
+"""AttachIndexType is used to specify to which output of the prevoius
+node does the current node attach to.
 
-It can be either "all" (all outputs), an index of the output or a tuple of indices of
-the output (specifying a range of outputs).
+It can be either "all" (all outputs), an index of the output or a tuple
+of indices of the output (specifying a range of outputs).
 """
 
 T = TypeVar("T", Tensor, Size)
@@ -22,31 +24,3 @@
 
 It is used to pass data between different nodes of the network graph.
 """
-
-
-class IncompatibleException(Exception):
-    """Raised when two parts of the model are incompatible with each other."""
-
-    @classmethod
-    def from_validation_error(cls, val_error: ValidationError, class_name: str):
-        return cls(
-            f"{class_name} received an input not conforming to the protocol. "
-            f"Validation error: {val_error.errors(include_input=False, include_url=False)}."
-        )
-
-    @classmethod
-    def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str):
-        return cls(
-            f"{class_name} requires '{task}' label, but it was not found in "
-            f"the label dictionary. Available labels: {present_tasks}."
-        )
-
-
-class BaseProtocol(BaseModel):
-    class Config:
-        arbitrary_types_allowed = True
-        extra = "forbid"
-
-
-class FeaturesProtocol(BaseProtocol):
-    features: Annotated[list[Tensor], Field(min_length=1)]
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 8e21255a..34387324 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -9,13 +9,13 @@
     </mask>
     <g mask="url(#a)">
         <path fill="#555" d="M0 0h63v20H0z"/>
-        <path fill="#a4a61d" d="M63 0h36v20H63z"/>
+        <path fill="#4c1" d="M63 0h36v20H63z"/>
         <path fill="url(#b)" d="M0 0h99v20H0z"/>
     </g>
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">84%</text>
-        <text x="80" y="14">84%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">97%</text>
+        <text x="80" y="14">97%</text>
     </g>
 </svg>
diff --git a/pyproject.toml b/pyproject.toml
index 2093e25b..d65978d4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,5 @@
 [project]
 name = "luxonis-train"
-version = "0.0.1"
 description = "Luxonis training framework for seamless training of various neural networks."
 readme = "README.md"
 requires-python = ">=3.10"
@@ -8,7 +7,7 @@ license = { file = "LICENSE" }
 authors = [{ name = "Luxonis", email = "support@luxonis.com" }]
 maintainers = [{ name = "Luxonis", email = "support@luxonis.com" }]
 keywords = ["ml", "training", "luxonis", "oak"]
-dynamic = ["dependencies", "optional-dependencies"]
+dynamic = ["dependencies", "optional-dependencies", "version"]
 classifiers = [
   "License :: OSI Approved :: Apache Software License",
   "Development Status :: 3 - Alpha",
@@ -35,10 +34,11 @@ where = ["."]
 [tool.setuptools.dynamic]
 dependencies = { file = ["requirements.txt"] }
 optional-dependencies = { dev = { file = ["requirements-dev.txt"] } }
+version = {attr = "luxonis_train.__version__"}
 
 [tool.ruff]
 target-version = "py310"
-line-length = 88
+line-length = 79
 indent-width = 4
 
 [tool.ruff.lint]
@@ -47,10 +47,44 @@ select = ["E4", "E7", "E9", "F", "W", "B", "I"]
 
 [tool.docformatter]
 black = true
-
-[tool.mypy]
-python_version = "3.10"
-ignore_missing_imports = true
+wrap-summaries = 72
+wrap-descriptions = 72
 
 [tool.pyright]
 typeCheckingMode = "basic"
+reportMissingTypeStubs = "none"
+reportPrivateImportUsage = "none"
+reportPrivateUsage = "none"
+reportIncompatibleVariableOverride = "none"
+reportIncompatibleMethodOverride = "none"
+reportUnnecessaryIsInstance = "none"
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "--disable-warnings"
+markers = [
+    "unit: mark a test as a unit test",
+    "integration: mark a test as an integration test",
+]
+
+[tool.coverage.run]
+omit = [
+    "**/__main__.py",
+    "**/gpu_stats_monitor.py"
+]
+
+[tool.coverage.report]
+exclude_also = [
+    "def __repr__",
+    "def __rich_repr__",
+    "def __str__",
+    "assert",
+    "raise NotImplementedError",
+    "except ImportError",
+    "@abstractmethod",
+    "@overload",
+    "exit\\(\\)",
+    "cv2\\.imshow",
+    "cv2\\.waitKey",
+    "logger\\.",
+]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 7f915575..e4dbd194 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -4,3 +4,5 @@ pre-commit>=3.2.1
 opencv-stubs>=0.0.8
 pytest-cov>=4.1.0
 pytest-subtests>=0.12.1
+pytest-md>=0.2.0
+pytest-order>=1.3.0
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/configs/archive_config.yaml b/tests/configs/archive_config.yaml
new file mode 100644
index 00000000..71589f4d
--- /dev/null
+++ b/tests/configs/archive_config.yaml
@@ -0,0 +1,43 @@
+
+model:
+  name: archive_test
+  nodes:
+    - name: EfficientRep
+
+    - name: EfficientBBoxHead
+      inputs:
+        - EfficientRep
+
+    - name: EfficientKeypointBBoxHead
+      inputs:
+        - EfficientRep
+
+    - name: ImplicitKeypointBBoxHead
+      inputs:
+        - EfficientRep
+
+    - name: SegmentationHead
+      inputs:
+        - EfficientRep
+
+    - name: BiSeNetHead
+      inputs:
+        - EfficientRep
+
+    - name: ClassificationHead
+      inputs:
+        - EfficientRep
+
+exporter:
+  output_names:
+    - seg0
+    - class0
+    - bbox0
+    - bbox1
+    - bbox2
+    - effkpt0
+    - effkpt1
+    - effkpt2
+    - impl
+    - seg1
+
diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml
index ae9f8069..bb15ac37 100644
--- a/tests/configs/parking_lot_config.yaml
+++ b/tests/configs/parking_lot_config.yaml
@@ -3,54 +3,26 @@ model:
   name: parking_lot_model
   nodes:
 
-    - name: ReXNetV1_lite
-      alias: rexnet-detection-backbone
-
     - name: EfficientRep
-      alias: efficient-detection-backbone
-      params:
-        channels_list: [64, 128, 256, 512, 1024]
-        num_repeats: [1, 6, 12, 18, 6]
-        depth_mul: 0.33
-        width_mul: 0.33
+      alias: backbone
 
     - name: RepPANNeck
-      alias: efficient-detection-neck
+      alias: neck
       inputs:
-        - efficient-detection-backbone
-      params:
-        channels_list: [256, 128, 128, 256, 256, 512]
-        num_repeats: [12, 12, 12, 12]
-        depth_mul: 0.33
-        width_mul: 0.33
-
-    - name: MicroNet
-      alias: color-segmentation-backbone
-
-    - name: MobileOne
-      alias: brand-segmentation-backbone
-
-    - name: MobileNetV2
-      alias: vehicle-type-segmentation-backbone
-
-    - name: ContextSpatial
-      alias: context-brand-segmentation-backbone
+        - backbone
 
     - name: EfficientBBoxHead
       alias: bbox-head
       inputs:
-        - efficient-detection-neck
+        - neck
 
     - name: ImplicitKeypointBBoxHead
       alias: car-detection-head
       inputs:
-        - rexnet-detection-backbone
+        - neck
       task:
         keypoints: car-keypoints
         boundingbox: car-boundingbox
-      params:
-        conf_thres: 0.25
-        iou_thres: 0.45
 
     - name: EfficientKeypointBBoxHead
       alias: motorbike-detection-head
@@ -58,40 +30,31 @@ model:
         keypoints: motorbike-keypoints
         boundingbox: motorbike-boundingbox
       inputs:
-        - efficient-detection-neck
-      params:
-        conf_thres: 0.25
-        iou_thres: 0.45
-
-    - name: BiSeNetHead
-      alias: context-brand-segmentation-head
-      task: brand_segmentation
-      inputs:
-        - context-brand-segmentation-backbone
+        - neck
 
     - name: SegmentationHead
       alias: color-segmentation-head
-      task: color_segmentation
+      task: color-segmentation
       inputs:
-        - color-segmentation-backbone
+        - neck
 
     - name: SegmentationHead
       alias: any-vehicle-segmentation-head
-      task: vehicle_segmentation
+      task: vehicle-segmentation
       inputs:
-        - vehicle-type-segmentation-backbone
+        - neck
 
     - name: BiSeNetHead
       alias: brand-segmentation-head
-      task: brand_segmentation
+      task: brand-segmentation
       inputs:
-        - brand-segmentation-backbone
+        - neck
 
     - name: BiSeNetHead
       alias: vehicle-type-segmentation-head
-      task: vehicle_type_segmentation
+      task: vehicle_type-segmentation
       inputs:
-        - vehicle-type-segmentation-backbone
+        - neck
 
   losses:
     - name: AdaptiveDetectionLoss
@@ -100,12 +63,8 @@ model:
       attached_to: any-vehicle-segmentation-head
     - name: CrossEntropyLoss
       attached_to: vehicle-type-segmentation-head
-    - name: CrossEntropyLoss
-      attached_to: context-brand-segmentation-head
     - name: CrossEntropyLoss
       attached_to: color-segmentation-head
-    - name: SoftmaxFocalLoss
-      attached_to: brand-segmentation-head
     - name: ImplicitKeypointBBoxLoss
       attached_to: car-detection-head
     - name: EfficientKeypointBBoxLoss
@@ -127,8 +86,6 @@ model:
       attached_to: vehicle-type-segmentation-head
     - name: Precision
       attached_to: brand-segmentation-head
-    - name: Recall
-      attached_to: context-brand-segmentation-head
 
   visualizers:
     - name: MultiVisualizer
@@ -160,9 +117,6 @@ model:
     - name: SegmentationVisualizer
       alias: vehicle-segmentation-visualizer
       attached_to: any-vehicle-segmentation-head
-    - name: SegmentationVisualizer
-      alias: context-brand-segmentation-visualizer
-      attached_to: context-brand-segmentation-head
     - name: SegmentationVisualizer
       alias: brand-segmentation-visualizer
       attached_to: brand-segmentation-head
@@ -184,16 +138,16 @@ trainer:
   devices: auto
   strategy: auto
 
-  num_sanity_val_steps: 1
+  n_sanity_val_steps: 1
   profiler: null
   verbose: True
   batch_size: 2
   accumulate_grad_batches: 1
   epochs: 200
-  num_workers: 8
+  n_workers: 8
   train_metrics_interval: -1
   validation_interval: 10
-  num_log_images: 8
+  n_log_images: 8
   skip_last_batch: True
   log_sub_losses: True
   save_top_k: 3
@@ -214,6 +168,5 @@ trainer:
 
   callbacks:
     - name: ExportOnTrainEnd
-    - name: TestOnTrainEnd
     - name: ArchiveOnTrainEnd
 
diff --git a/tests/configs/segmentation_parse_loader.yaml b/tests/configs/segmentation_parse_loader.yaml
index 60f7a30d..14814571 100644
--- a/tests/configs/segmentation_parse_loader.yaml
+++ b/tests/configs/segmentation_parse_loader.yaml
@@ -22,6 +22,6 @@ trainer:
 
   batch_size: 4
   epochs: &epochs 1
-  num_workers: 4
+  n_workers: 4
   validation_interval: 1
-  num_log_images: 8
+  n_log_images: 8
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..4a8a492c
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,18 @@
+import pytest
+
+
+def pytest_collection_modifyitems(items):
+    for item in items:
+        if "/unittests/" in str(item.fspath):
+            item.add_marker(pytest.mark.unit)
+            # ensure unittests run before integration tests
+            item.add_marker(pytest.mark.order(0))
+        elif "/integration/" in str(item.fspath):
+            item.add_marker(pytest.mark.integration)
+
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "unit: mark test as a unit test")
+    config.addinivalue_line(
+        "markers", "integration: mark test as an integration test"
+    )
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 9b24271b..ef5a2142 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,9 +1,14 @@
 import json
+import multiprocessing as mp
+import os
+import shutil
 from collections import defaultdict
 from pathlib import Path
+from typing import Any
 
 import cv2
 import gdown
+import numpy as np
 import pytest
 import torchvision
 from luxonis_ml.data import LuxonisDataset
@@ -12,15 +17,34 @@
 from luxonis_ml.utils import LuxonisFileSystem, environ
 
 WORK_DIR = Path("tests", "data")
-WORK_DIR.mkdir(parents=True, exist_ok=True)
 
-environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml"
+
+@pytest.fixture(scope="session")
+def test_output_dir() -> Path:
+    return Path("tests/integration/save-directory")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def setup(test_output_dir: Path):
+    WORK_DIR.mkdir(parents=True, exist_ok=True)
+    shutil.rmtree(WORK_DIR / "luxonisml", ignore_errors=True)
+    shutil.rmtree(test_output_dir, ignore_errors=True)
+    environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml"
+    test_output_dir.mkdir(exist_ok=True)
 
 
 @pytest.fixture
+def train_overfit() -> bool:
+    return bool(os.getenv("LUXONIS_TRAIN_OVERFIT"))
+
+
+@pytest.fixture(scope="session")
 def parking_lot_dataset() -> LuxonisDataset:
     url = "gs://luxonis-test-bucket/luxonis-ml-test-data/D1_ParkingSlotTest"
-    base_path = LuxonisFileSystem.download(url, WORK_DIR)
+    base_path = WORK_DIR / "D1_ParkingSlotTest"
+    if not base_path.exists():
+        base_path = LuxonisFileSystem.download(url, WORK_DIR)
+
     mask_brand_path = base_path / "mask_brand"
     mask_color_path = base_path / "mask_color"
     kpt_mask_path = base_path / "keypoints_mask_vehicle"
@@ -28,7 +52,7 @@ def parking_lot_dataset() -> LuxonisDataset:
     def generator():
         filenames: dict[int, Path] = {}
         for base_path in [kpt_mask_path, mask_brand_path, mask_color_path]:
-            for sequence_path in list(sorted(base_path.glob("sequence.*"))):
+            for sequence_path in sorted(list(base_path.glob("sequence.*"))):
                 frame_data = sequence_path / "step0.frame_data.json"
                 with open(frame_data) as f:
                     data = json.load(f)["captures"][0]
@@ -52,7 +76,9 @@ def generator():
                 for bbox_annotation in annotations.get(
                     "BoundingBox2DAnnotation", defaultdict(list)
                 )["values"]:
-                    class_ = bbox_annotation["labelName"].split("-")[-1].lower()
+                    class_ = (
+                        bbox_annotation["labelName"].split("-")[-1].lower()
+                    )
                     if class_ == "motorbiek":
                         class_ = "motorbike"
                     x, y = bbox_annotation["origin"]
@@ -113,7 +139,10 @@ def generator():
                 ]
                 mask = cv2.cvtColor(
                     cv2.imread(
-                        str(sequence_path / vehicle_type_segmentation["filename"])
+                        str(
+                            sequence_path
+                            / vehicle_type_segmentation["filename"]
+                        )
                     ),
                     cv2.COLOR_BGR2RGB,
                 )
@@ -122,11 +151,11 @@ def generator():
                     for inst in vehicle_type_segmentation["instances"]
                 }
                 if base_path == kpt_mask_path:
-                    task = "vehicle_type_segmentation"
+                    task = "vehicle_type-segmentation"
                 elif base_path == mask_brand_path:
-                    task = "brand_segmentation"
+                    task = "brand-segmentation"
                 else:
-                    task = "color_segmentation"
+                    task = "color-segmentation"
                 for class_, mask_ in rgb_to_bool_masks(
                     mask, classes, add_background_class=True
                 ):
@@ -145,36 +174,40 @@ def generator():
                         "annotation": {
                             "type": "mask",
                             "class": "vehicle",
-                            "task": "vehicle_segmentation",
+                            "task": "vehicle-segmentation",
                             "mask": mask.astype(bool)[..., 0]
                             | mask.astype(bool)[..., 1]
                             | mask.astype(bool)[..., 2],
                         },
                     }
 
-    dataset = LuxonisDataset("__D1ParkingSLot-test", delete_existing=True)
+    dataset = LuxonisDataset("_ParkingLot", delete_existing=True)
     dataset.add(generator())
+    np.random.seed(42)
     dataset.make_splits()
     return dataset
 
 
-@pytest.fixture(scope="session", autouse=True)
-def create_coco_dataset():
+@pytest.fixture(scope="session")
+def coco_dataset() -> LuxonisDataset:
     dataset_name = "coco_test"
     url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
     output_zip = WORK_DIR / "COCO_people_subset.zip"
 
-    if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists():
+    if (
+        not output_zip.exists()
+        and not (WORK_DIR / "COCO_people_subset").exists()
+    ):
         gdown.download(url, str(output_zip), quiet=False)
 
     parser = LuxonisParser(
         str(output_zip), dataset_name=dataset_name, delete_existing=True
     )
-    parser.parse(random_split=True)
+    return parser.parse(random_split=True)
 
 
-@pytest.fixture(scope="session", autouse=True)
-def create_cifar10_dataset():
+@pytest.fixture(scope="session")
+def cifar10_dataset() -> LuxonisDataset:
     dataset = LuxonisDataset("cifar10_test", delete_existing=True)
     output_folder = WORK_DIR / "cifar10"
     output_folder.mkdir(parents=True, exist_ok=True)
@@ -210,3 +243,40 @@ def CIFAR10_subset_generator():
 
     dataset.add(CIFAR10_subset_generator())
     dataset.make_splits()
+    return dataset
+
+
+@pytest.fixture
+def config(train_overfit: bool) -> dict[str, Any]:
+    if train_overfit:
+        epochs = 100
+    else:
+        epochs = 1
+
+    return {
+        "tracker": {
+            "save_directory": "tests/integration/save-directory",
+        },
+        "loader": {
+            "train_view": "val",
+            "params": {
+                "dataset_name": "_ParkingLot",
+            },
+        },
+        "trainer": {
+            "batch_size": 4,
+            "epochs": epochs,
+            "n_workers": mp.cpu_count(),
+            "validation_interval": epochs,
+            "save_top_k": 0,
+            "preprocessing": {
+                "train_image_size": [256, 320],
+                "keep_aspect_ratio": False,
+                "normalize": {"active": True},
+            },
+            "callbacks": [
+                {"name": "ExportOnTrainEnd"},
+            ],
+            "matmul_precision": "medium",
+        },
+    }
diff --git a/tests/integration/multi_input_modules.py b/tests/integration/multi_input_modules.py
index dbc5a449..e6fd0476 100644
--- a/tests/integration/multi_input_modules.py
+++ b/tests/integration/multi_input_modules.py
@@ -1,9 +1,10 @@
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
+from luxonis_train.loaders import BaseLoaderTorch
 from luxonis_train.nodes import BaseNode
-from luxonis_train.utils.loaders import BaseLoaderTorch
-from luxonis_train.utils.types import FeaturesProtocol, LabelType, Packet
+from luxonis_train.utils import Packet
 
 
 class CustomMultiInputLoader(BaseLoaderTorch):
@@ -60,29 +61,23 @@ def unwrap(self, inputs: list[dict[str, list[Tensor]]]):
         return [item for inp in inputs for key in inp for item in inp[key]]
 
 
-class FullBackbone(MultiInputTestBaseNode):
-    input_protocols = [FeaturesProtocol] * 4
+class FullBackbone(MultiInputTestBaseNode): ...
 
 
-class RGBDBackbone(MultiInputTestBaseNode):
-    input_protocols = [FeaturesProtocol] * 3
+class RGBDBackbone(MultiInputTestBaseNode): ...
 
 
-class PointcloudBackbone(MultiInputTestBaseNode):
-    input_protocols = [FeaturesProtocol]
+class PointcloudBackbone(MultiInputTestBaseNode): ...
 
 
-class FusionNeck(MultiInputTestBaseNode):
-    input_protocols = [FeaturesProtocol] * 3
+class FusionNeck(MultiInputTestBaseNode): ...
 
 
-class FusionNeck2(MultiInputTestBaseNode):
-    input_protocols = [FeaturesProtocol] * 3
+class FusionNeck2(MultiInputTestBaseNode): ...
 
 
 class CustomSegHead1(MultiInputTestBaseNode):
     tasks = {LabelType.SEGMENTATION: "segmentation"}
-    input_protocols = [FeaturesProtocol]
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
@@ -98,7 +93,6 @@ def forward(self, inputs: Tensor):
 
 class CustomSegHead2(MultiInputTestBaseNode):
     tasks = {LabelType.SEGMENTATION: "segmentation"}
-    input_protocols = [FeaturesProtocol] * 3
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json
index d9599642..0059241e 100644
--- a/tests/integration/parking_lot.json
+++ b/tests/integration/parking_lot.json
@@ -36,7 +36,7 @@
         ],
         "outputs": [
             {
-                "name": "any-vehicle-segmentation-head/vehicle_segmentation/0",
+                "name": "any-vehicle-segmentation-head/vehicle-segmentation/0",
                 "dtype": "float32",
                 "shape": [
                     1,
@@ -80,7 +80,7 @@
                 "layout": "NCHW"
             },
             {
-                "name": "brand-segmentation-head/brand_segmentation/0",
+                "name": "brand-segmentation-head/brand-segmentation/0",
                 "dtype": "float32",
                 "shape": [
                     1,
@@ -95,13 +95,13 @@
                 "dtype": "float32",
                 "shape": [
                     1,
-                    66240,
+                    5040,
                     24
                 ],
                 "layout": "NCD"
             },
             {
-                "name": "color-segmentation-head/color_segmentation/0",
+                "name": "color-segmentation-head/color-segmentation/0",
                 "dtype": "float32",
                 "shape": [
                     1,
@@ -111,17 +111,6 @@
                 ],
                 "layout": "NCHW"
             },
-            {
-                "name": "context-brand-segmentation-head/brand_segmentation/0",
-                "dtype": "float32",
-                "shape": [
-                    1,
-                    23,
-                    256,
-                    320
-                ],
-                "layout": "NCHW"
-            },
             {
                 "name": "motorbike-detection-head/outputs/0",
                 "dtype": "float32",
@@ -156,7 +145,7 @@
                 "layout": "NCDE"
             },
             {
-                "name": "vehicle-type-segmentation-head/vehicle_type_segmentation/0",
+                "name": "vehicle-type-segmentation-head/vehicle_type-segmentation/0",
                 "dtype": "float32",
                 "shape": [
                     1,
@@ -227,42 +216,6 @@
                     "motorbike-detection-head/outputs/2"
                 ]
             },
-            {
-                "parser": "SegmentationParser",
-                "metadata": {
-                    "postprocessor_path": null,
-                    "classes": [
-                        "background",
-                        "chrysler",
-                        "bmw",
-                        "ducati",
-                        "dodge",
-                        "ferrari",
-                        "infiniti",
-                        "land-rover",
-                        "roll-royce",
-                        "saab",
-                        "Kawasaki",
-                        "moto",
-                        "truimph",
-                        "alfa-romeo",
-                        "harley",
-                        "honda",
-                        "jeep",
-                        "aprilia",
-                        "piaggio",
-                        "yamaha",
-                        "buick",
-                        "pontiac",
-                        "isuzu"
-                    ],
-                    "n_classes": 23,
-                    "is_softmax": false
-                },
-                "outputs": [
-                    "context-brand-segmentation-head/brand_segmentation/0"
-                ]
-            },
             {
                 "parser": "SegmentationParser",
                 "metadata": {
@@ -277,7 +230,7 @@
                     "is_softmax": false
                 },
                 "outputs": [
-                    "color-segmentation-head/color_segmentation/0"
+                    "color-segmentation-head/color-segmentation/0"
                 ]
             },
             {
@@ -291,7 +244,7 @@
                     "is_softmax": false
                 },
                 "outputs": [
-                    "any-vehicle-segmentation-head/vehicle_segmentation/0"
+                    "any-vehicle-segmentation-head/vehicle-segmentation/0"
                 ]
             },
             {
@@ -327,7 +280,7 @@
                     "is_softmax": false
                 },
                 "outputs": [
-                    "brand-segmentation-head/brand_segmentation/0"
+                    "brand-segmentation-head/brand-segmentation/0"
                 ]
             },
             {
@@ -343,7 +296,7 @@
                     "is_softmax": false
                 },
                 "outputs": [
-                    "vehicle-type-segmentation-head/vehicle_type_segmentation/0"
+                    "vehicle-type-segmentation-head/vehicle_type-segmentation/0"
                 ]
             }
         ]
diff --git a/tests/integration/test_detection.py b/tests/integration/test_detection.py
new file mode 100644
index 00000000..fb184b6f
--- /dev/null
+++ b/tests/integration/test_detection.py
@@ -0,0 +1,95 @@
+from typing import Any
+
+import pytest
+from luxonis_ml.data import LuxonisDataset
+
+from luxonis_train.core import LuxonisModel
+from luxonis_train.nodes.backbones import __all__ as BACKBONES
+
+
+def get_opts(backbone: str) -> dict[str, Any]:
+    return {
+        "model": {
+            "nodes": [
+                {
+                    "name": backbone,
+                },
+                {
+                    "name": "EfficientBBoxHead",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "EfficientKeypointBBoxHead",
+                    "task": {
+                        "keypoints": "car-keypoints",
+                        "boundingbox": "car-boundingbox",
+                    },
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "ImplicitKeypointBBoxHead",
+                    "task": {
+                        "keypoints": "car-keypoints",
+                        "boundingbox": "car-boundingbox",
+                    },
+                    "inputs": [backbone],
+                },
+            ],
+            "losses": [
+                {
+                    "name": "AdaptiveDetectionLoss",
+                    "attached_to": "EfficientBBoxHead",
+                },
+                {
+                    "name": "EfficientKeypointBBoxLoss",
+                    "attached_to": "EfficientKeypointBBoxHead",
+                    "params": {"area_factor": 0.5},
+                },
+                {
+                    "name": "ImplicitKeypointBBoxLoss",
+                    "attached_to": "ImplicitKeypointBBoxHead",
+                },
+            ],
+            "metrics": [
+                {
+                    "name": "MeanAveragePrecision",
+                    "attached_to": "EfficientBBoxHead",
+                },
+                {
+                    "name": "MeanAveragePrecisionKeypoints",
+                    "alias": "EfficientKeypointBBoxHead-MaP",
+                    "attached_to": "EfficientKeypointBBoxHead",
+                },
+                {
+                    "name": "MeanAveragePrecisionKeypoints",
+                    "alias": "ImplicitKeypointBBoxHead-MaP",
+                    "attached_to": "ImplicitKeypointBBoxHead",
+                },
+            ],
+        }
+    }
+
+
+def train_and_test(
+    config: dict[str, Any],
+    opts: dict[str, Any],
+    train_overfit: bool = False,
+):
+    model = LuxonisModel(config, opts)
+    model.train()
+    results = model.test(view="val")
+    if train_overfit:
+        for name, value in results.items():
+            if "/map_50" in name or "/kpt_map_medium" in name:
+                assert value > 0.8, f"{name} = {value} (expected > 0.8)"
+
+
+@pytest.mark.parametrize("backbone", BACKBONES)
+def test_backbones(
+    backbone: str,
+    config: dict[str, Any],
+    parking_lot_dataset: LuxonisDataset,
+):
+    opts = get_opts(backbone)
+    opts["loader.params.dataset_name"] = parking_lot_dataset.identifier
+    train_and_test(config, opts)
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
deleted file mode 100644
index 5afa385b..00000000
--- a/tests/integration/test_sanity.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import json
-import shutil
-import sys
-import tarfile
-from copy import deepcopy
-from pathlib import Path
-
-import pytest
-from luxonis_ml.data import LuxonisDataset
-from multi_input_modules import *
-
-from luxonis_train.core import LuxonisModel
-
-TEST_OUTPUT = Path("tests/integration/_test-output")
-INFER_PATH = Path("tests/integration/_infer_save_dir")
-ONNX_PATH = Path("tests/integration/_model.onnx")
-STUDY_PATH = Path("study_local.db")
-
-OPTS = {
-    "trainer.epochs": 1,
-    "trainer.batch_size": 1,
-    "trainer.validation_interval": 1,
-    "trainer.callbacks": "[]",
-    "tracker.save_directory": str(TEST_OUTPUT),
-    "tuner.n_trials": 4,
-}
-
-
-@pytest.fixture(scope="session", autouse=True)
-def manage_out_dir():
-    shutil.rmtree(TEST_OUTPUT, ignore_errors=True)
-    TEST_OUTPUT.mkdir(exist_ok=True)
-
-
-@pytest.fixture(scope="function", autouse=True)
-def clear_files():
-    yield
-    STUDY_PATH.unlink(missing_ok=True)
-    ONNX_PATH.unlink(missing_ok=True)
-    shutil.rmtree(INFER_PATH, ignore_errors=True)
-
-
-@pytest.mark.parametrize(
-    "config_file",
-    [
-        "classification_model",
-        "segmentation_model",
-        "detection_model",
-        "keypoint_bbox_model",
-        "resnet_model",
-        "coco_model",
-        "efficient_coco_model",
-    ],
-)
-def test_simple_models(config_file: str):
-    config_file = f"configs/{config_file}.yaml"
-    model = LuxonisModel(config_file, opts=OPTS)
-    model.train()
-    model.test()
-    model.export()
-    assert (
-        Path(model.run_save_dir, "export", model.cfg.model.name)
-        .with_suffix(".onnx")
-        .exists()
-    )
-    model.archive()
-    assert (
-        Path(
-            model.run_save_dir,
-            "archive",
-            model.cfg.archiver.name or model.cfg.model.name,
-        )
-        .with_suffix(".onnx.tar.xz")
-        .exists()
-    )
-    del model
-
-
-def test_multi_input():
-    config_file = "configs/example_multi_input.yaml"
-    model = LuxonisModel(config_file, opts=OPTS)
-    model.train()
-    model.test(view="val")
-
-    assert not ONNX_PATH.exists()
-    model.export(str(ONNX_PATH))
-    assert ONNX_PATH.exists()
-
-    assert not INFER_PATH.exists()
-    model.infer(view="val", save_dir=INFER_PATH)
-    assert INFER_PATH.exists()
-    del model
-
-
-def test_custom_tasks(parking_lot_dataset: LuxonisDataset, subtests):
-    config_file = "tests/configs/parking_lot_config.yaml"
-    opts = deepcopy(OPTS) | {
-        "loader.params.dataset_name": parking_lot_dataset.dataset_name,
-        "trainer.batch_size": 2,
-    }
-    del opts["trainer.callbacks"]
-    model = LuxonisModel(config_file, opts=opts)
-    model.train()
-    archive_path = Path(
-        model.run_save_dir, "archive", model.cfg.model.name
-    ).with_suffix(".onnx.tar.xz")
-    correct_archive_config = json.loads(
-        Path("tests/integration/parking_lot.json").read_text()
-    )
-
-    with subtests.test("test_archive"):
-        assert archive_path.exists()
-        with tarfile.open(archive_path) as tar:
-            extracted_cfg = tar.extractfile("config.json")
-
-            assert extracted_cfg is not None, "Config JSON not found in the archive."
-            generated_config = json.loads(extracted_cfg.read().decode())
-
-        del generated_config["model"]["heads"][1]["metadata"]["anchors"]
-        assert generated_config == correct_archive_config
-
-    del model
-
-
-def test_parsing_loader():
-    model = LuxonisModel("tests/configs/segmentation_parse_loader.yaml")
-    model.train()
-    del model
-
-
-@pytest.mark.skipif(sys.platform == "win32", reason="Tuning not supported on Windows")
-def test_tuner():
-    model = LuxonisModel("configs/example_tuning.yaml", opts=OPTS)
-    model.tune()
-    assert STUDY_PATH.exists()
-    del model
diff --git a/tests/integration/test_segmentation.py b/tests/integration/test_segmentation.py
new file mode 100644
index 00000000..c24e6fb9
--- /dev/null
+++ b/tests/integration/test_segmentation.py
@@ -0,0 +1,134 @@
+from typing import Any
+
+import pytest
+from luxonis_ml.data import LuxonisDataset
+
+from luxonis_train.core import LuxonisModel
+from luxonis_train.nodes.backbones import __all__ as BACKBONES
+
+
+def get_opts(backbone: str) -> dict[str, Any]:
+    opts = {
+        "model": {
+            "nodes": [
+                {
+                    "name": backbone,
+                },
+                {
+                    "name": "SegmentationHead",
+                    "alias": "seg-color-segmentation",
+                    "task": "color-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "BiSeNetHead",
+                    "alias": "bi-color-segmentation",
+                    "task": "color-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "SegmentationHead",
+                    "alias": "seg-vehicle-segmentation",
+                    "task": "vehicle-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "BiSeNetHead",
+                    "alias": "bi-vehicle-segmentation",
+                    "task": "vehicle-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "SegmentationHead",
+                    "alias": "seg-vehicle-segmentation-2",
+                    "task": "vehicle-segmentation",
+                    "inputs": [backbone],
+                },
+                {
+                    "name": "SegmentationHead",
+                    "alias": "seg-vehicle-segmentation-3",
+                    "task": "vehicle-segmentation",
+                    "inputs": [backbone],
+                },
+            ],
+            "losses": [
+                {
+                    "name": "CrossEntropyLoss",
+                    "attached_to": "seg-color-segmentation",
+                },
+                {
+                    "name": "CrossEntropyLoss",
+                    "attached_to": "bi-color-segmentation",
+                },
+                {
+                    "name": "BCEWithLogitsLoss",
+                    "attached_to": "seg-vehicle-segmentation",
+                },
+                {
+                    "name": "SigmoidFocalLoss",
+                    "attached_to": "bi-vehicle-segmentation",
+                    "params": {"alpha": 0.5, "gamma": 1.0},
+                },
+                {
+                    "name": "SoftmaxFocalLoss",
+                    "attached_to": "seg-vehicle-segmentation-2",
+                    "params": {"alpha": 0.5, "gamma": 1.0},
+                },
+                {
+                    "name": "SmoothBCEWithLogitsLoss",
+                    "attached_to": "seg-vehicle-segmentation-3",
+                    "params": {"label_smoothing": 0.1},
+                },
+            ],
+            "metrics": [],
+            "visualizers": [],
+        }
+    }
+    aliases = [head["alias"] for head in opts["model"]["nodes"][1:]]
+    for alias in aliases:
+        opts["model"]["metrics"].extend(
+            [
+                {
+                    "name": "JaccardIndex",
+                    "alias": f"JaccardIndex_{alias}",
+                    "attached_to": alias,
+                },
+                {
+                    "name": "F1Score",
+                    "alias": f"F1Score_{alias}",
+                    "attached_to": alias,
+                },
+            ]
+        )
+        opts["model"]["visualizers"].append(
+            {
+                "name": "SegmentationVisualizer",
+                "attached_to": alias,
+            }
+        )
+    return opts
+
+
+def train_and_test(
+    config: dict[str, Any],
+    opts: dict[str, Any],
+    train_overfit: bool = False,
+):
+    model = LuxonisModel(config, opts)
+    model.train()
+    results = model.test(view="val")
+    if train_overfit:
+        for name, value in results.items():
+            if "metric" in name:
+                assert value > 0.8, f"{name} = {value} (expected > 0.8)"
+
+
+@pytest.mark.parametrize("backbone", BACKBONES)
+def test_backbones(
+    backbone: str,
+    config: dict[str, Any],
+    parking_lot_dataset: LuxonisDataset,
+):
+    opts = get_opts(backbone)
+    opts["loader.params.dataset_name"] = parking_lot_dataset.identifier
+    train_and_test(config, opts)
diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py
new file mode 100644
index 00000000..784db01a
--- /dev/null
+++ b/tests/integration/test_simple.py
@@ -0,0 +1,215 @@
+import json
+import shutil
+import sys
+import tarfile
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+import pytest
+from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.utils import environ
+
+from luxonis_train.core import LuxonisModel
+
+from .multi_input_modules import *
+
+INFER_PATH = Path("tests/integration/infer-save-directory")
+ONNX_PATH = Path("tests/integration/_model.onnx")
+STUDY_PATH = Path("study_local.db")
+
+
+@pytest.fixture
+def opts(test_output_dir: Path) -> dict[str, Any]:
+    return {
+        "trainer.epochs": 1,
+        "trainer.batch_size": 1,
+        "trainer.validation_interval": 1,
+        "trainer.callbacks": "[]",
+        "tracker.save_directory": str(test_output_dir),
+        "tuner.n_trials": 4,
+    }
+
+
+@pytest.fixture(scope="function", autouse=True)
+def clear_files():
+    # todo
+    yield
+    STUDY_PATH.unlink(missing_ok=True)
+    ONNX_PATH.unlink(missing_ok=True)
+    shutil.rmtree(INFER_PATH, ignore_errors=True)
+
+
+@pytest.mark.parametrize(
+    "config_file",
+    [
+        "classification_model",
+        "segmentation_model",
+        "detection_model",
+        "keypoint_bbox_model",
+    ],
+)
+def test_predefined_models(
+    opts: dict[str, Any],
+    config_file: str,
+    coco_dataset: LuxonisDataset,
+    cifar10_dataset: LuxonisDataset,
+):
+    config_file = f"configs/{config_file}.yaml"
+    opts |= {
+        "loader.params.dataset_name": cifar10_dataset.dataset_name
+        if "classification_model" in config_file
+        else coco_dataset.dataset_name,
+    }
+    model = LuxonisModel(config_file, opts)
+    model.train()
+    model.test()
+
+
+def test_multi_input(opts: dict[str, Any]):
+    config_file = "configs/example_multi_input.yaml"
+    model = LuxonisModel(config_file, opts)
+    model.train()
+    model.test(view="val")
+
+    assert not ONNX_PATH.exists()
+    model.export(str(ONNX_PATH))
+    assert ONNX_PATH.exists()
+
+    assert not INFER_PATH.exists()
+    model.infer(view="val", save_dir=INFER_PATH)
+    assert INFER_PATH.exists()
+
+
+def test_custom_tasks(
+    opts: dict[str, Any], parking_lot_dataset: LuxonisDataset, subtests
+):
+    config_file = "tests/configs/parking_lot_config.yaml"
+    opts |= {
+        "loader.params.dataset_name": parking_lot_dataset.dataset_name,
+        "trainer.batch_size": 2,
+    }
+    del opts["trainer.callbacks"]
+    model = LuxonisModel(config_file, opts)
+    model.train()
+    archive_path = Path(
+        model.run_save_dir, "archive", model.cfg.model.name
+    ).with_suffix(".onnx.tar.xz")
+    correct_archive_config = json.loads(
+        Path("tests/integration/parking_lot.json").read_text()
+    )
+
+    with subtests.test("test_archive"):
+        assert archive_path.exists()
+        with tarfile.open(archive_path) as tar:
+            extracted_cfg = tar.extractfile("config.json")
+
+            assert (
+                extracted_cfg is not None
+            ), "Config JSON not found in the archive."
+            generated_config = json.loads(extracted_cfg.read().decode())
+
+        del generated_config["model"]["heads"][1]["metadata"]["anchors"]
+        assert generated_config == correct_archive_config
+
+
+@pytest.mark.skipif(
+    environ.GOOGLE_APPLICATION_CREDENTIALS is None,
+    reason="GCP credentials not set",
+)
+def test_parsing_loader():
+    model = LuxonisModel("tests/configs/segmentation_parse_loader.yaml")
+    model.train()
+
+
+@pytest.mark.skipif(
+    sys.platform == "win32",
+    reason="Tuning not supported on Windows",
+)
+def test_tune(opts: dict[str, Any], coco_dataset: LuxonisDataset):
+    opts["tuner.params"] = {
+        "trainer.optimizer.name_categorical": ["Adam", "SGD"],
+        "trainer.optimizer.params.lr_float": [0.0001, 0.001],
+        "trainer.batch_size_int": [4, 16, 4],
+        "trainer.preprocessing.augmentations_subset": [
+            ["Defocus", "Sharpen", "Flip", "Normalize", "invalid"],
+            2,
+        ],
+        "model.losses.0.weight_uniform": [0.1, 0.9],
+        "model.nodes.0.freezing.unfreeze_after_loguniform": [0.1, 0.9],
+    }
+    opts["loader.params.dataset_name"] = coco_dataset.identifier
+    model = LuxonisModel("configs/example_tuning.yaml", opts)
+    model.tune()
+    assert STUDY_PATH.exists()
+
+
+def test_archive(test_output_dir: Path, coco_dataset: LuxonisDataset):
+    opts = {
+        "tracker.save_directory": str(test_output_dir),
+        "loader.params.dataset_name": coco_dataset.identifier,
+    }
+    model = LuxonisModel("tests/configs/archive_config.yaml", opts)
+    model.archive()
+    assert (
+        Path(
+            model.run_save_dir,
+            "archive",
+            model.cfg.archiver.name or model.cfg.model.name,
+        )
+        .with_suffix(".onnx.tar.xz")
+        .exists()
+    )
+
+
+def test_callbacks(opts: dict[str, Any], parking_lot_dataset: LuxonisDataset):
+    config_file = "tests/configs/parking_lot_config.yaml"
+    opts = deepcopy(opts)
+    del opts["trainer.callbacks"]
+    opts |= {
+        "trainer.use_rich_progress_bar": False,
+        "trainer.seed": 42,
+        "trainer.deterministic": "warn",
+        "trainer.callbacks": [
+            {
+                "name": "MetadataLogger",
+                "params": {
+                    "hyperparams": ["trainer.epochs", "trainer.batch_size"],
+                },
+            },
+            {"name": "TestOnTrainEnd"},
+            {"name": "UploadCheckpoint"},
+            {
+                "name": "ExportOnTrainEnd",
+            },
+            {
+                "name": "ArchiveOnTrainEnd",
+                "params": {"preferred_checkpoint": "loss"},
+            },
+        ],
+        "exporter.scale_values": [0.5, 0.5, 0.5],
+        "exporter.mean_values": [0.5, 0.5, 0.5],
+        "exporter.blobconverter.active": True,
+    }
+    opts["loader.params.dataset_name"] = parking_lot_dataset.identifier
+    model = LuxonisModel(config_file, opts)
+    model.train()
+
+
+def test_freezing(opts: dict[str, Any], coco_dataset: LuxonisDataset):
+    config_file = "configs/segmentation_model.yaml"
+    opts = deepcopy(opts)
+    opts |= {
+        "model.predefined_model.params": {
+            "head_params": {
+                "freezing": {
+                    "active": True,
+                    "unfreeze_after": 2,
+                },
+            }
+        }
+    }
+    opts["trainer.epochs"] = 3
+    opts["loader.params.dataset_name"] = coco_dataset.identifier
+    model = LuxonisModel(config_file, opts)
+    model.train()
diff --git a/tests/unittests/__init__.py b/tests/unittests/__init__.py
index f9269fdf..e69de29b 100644
--- a/tests/unittests/__init__.py
+++ b/tests/unittests/__init__.py
@@ -1,2 +0,0 @@
-# import warnings
-# warnings.filterwarnings("module", category=DeprecationWarning)
diff --git a/tests/unittests/test_assigners/__init__.py b/tests/unittests/test_assigners/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py b/tests/unittests/test_assigners/test_atts_assigner.py
similarity index 88%
rename from tests/unittests/test_utils/test_assigners/test_atts_assigner.py
rename to tests/unittests/test_assigners/test_atts_assigner.py
index a3801ebb..4ab6f939 100644
--- a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py
+++ b/tests/unittests/test_assigners/test_atts_assigner.py
@@ -1,6 +1,6 @@
 import torch
 
-from luxonis_train.utils.assigners.atts_assigner import ATSSAssigner
+from luxonis_train.assigners import ATSSAssigner
 
 
 def test_init():
@@ -25,7 +25,12 @@ def test_forward():
     pred_bboxes = torch.rand(bs, n_anchors, 4)
 
     labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward(
-        anchor_bboxes, n_level_bboxes, gt_labels, gt_bboxes, mask_gt, pred_bboxes
+        anchor_bboxes,
+        n_level_bboxes,
+        gt_labels,
+        gt_bboxes,
+        mask_gt,
+        pred_bboxes,
     )
 
     assert labels.shape == (bs, n_anchors)
@@ -59,7 +64,11 @@ def test_select_topk_candidates():
     )
 
     assert is_in_topk.shape == (batch_size, n_max_boxes, n_anchors)
-    assert topk_idxs.shape == (batch_size, n_max_boxes, topk * len(n_level_bboxes))
+    assert topk_idxs.shape == (
+        batch_size,
+        n_max_boxes,
+        topk * len(n_level_bboxes),
+    )
 
 
 def test_get_positive_samples():
@@ -97,7 +106,11 @@ def test_get_final_assignments():
     assigned_gt_idx = torch.randint(0, n_max_boxes, (batch_size, n_anchors))
     mask_pos_sum = torch.randint(0, 2, (batch_size, n_anchors))
 
-    assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments(
+    (
+        assigned_labels,
+        assigned_bboxes,
+        assigned_scores,
+    ) = assigner._get_final_assignments(
         gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
     )
 
diff --git a/tests/unittests/test_assigners/test_tal_assigner.py b/tests/unittests/test_assigners/test_tal_assigner.py
new file mode 100644
index 00000000..cb94b62d
--- /dev/null
+++ b/tests/unittests/test_assigners/test_tal_assigner.py
@@ -0,0 +1,135 @@
+import torch
+
+from luxonis_train.assigners import TaskAlignedAssigner
+
+
+def test_init():
+    assigner = TaskAlignedAssigner(
+        n_classes=80, topk=13, alpha=1.0, beta=6.0, eps=1e-9
+    )
+    assert assigner.n_classes == 80
+    assert assigner.topk == 13
+    assert assigner.alpha == 1.0
+    assert assigner.beta == 6.0
+    assert assigner.eps == 1e-9
+
+
+def test_forward():
+    batch_size = 10
+    n_anchors = 100
+    n_max_boxes = 5
+    n_classes = 80
+
+    assigner = TaskAlignedAssigner(n_classes=n_classes, topk=13)
+
+    # Create mock inputs
+    pred_scores = torch.rand(batch_size, n_anchors, 1)
+    pred_bboxes = torch.rand(batch_size, n_anchors, 4)
+    anchor_points = torch.rand(n_anchors, 2)
+    gt_labels = torch.rand(batch_size, n_max_boxes, 1)
+    gt_bboxes = torch.zeros(batch_size, n_max_boxes, 4)  # no gt bboxes
+    mask_gt = torch.rand(batch_size, n_max_boxes, 1)
+
+    labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward(
+        pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt
+    )
+
+    assert labels.shape == (batch_size, n_anchors)
+    assert bboxes.shape == (batch_size, n_anchors, 4)
+    assert scores.shape == (
+        batch_size,
+        n_anchors,
+        n_classes,
+    )
+    assert mask.shape == (batch_size, n_anchors)
+    assert assigned_gt_idx.shape == (batch_size, n_anchors)
+
+    # Labels should be `n_classes` as there are no GT boxes
+    assert labels.unique().tolist() == [n_classes]
+
+    # All results should be zero as there are no GT boxes
+    assert torch.equal(bboxes, torch.zeros_like(bboxes))
+    assert torch.equal(scores, torch.zeros_like(scores))
+    assert torch.equal(mask, torch.zeros_like(mask))
+    assert torch.equal(assigned_gt_idx, torch.zeros_like(assigned_gt_idx))
+
+
+def test_get_alignment_metric():
+    batch_size = 2
+    n_anchors = 5
+    n_max_boxes = 3
+    n_classes = 80
+
+    pred_scores = torch.rand(batch_size, n_anchors, n_classes)
+    pred_bboxes = torch.rand(batch_size, n_anchors, 4)
+    gt_labels = torch.randint(0, n_classes, (batch_size, n_max_boxes, 1))
+    gt_bboxes = torch.rand(batch_size, n_max_boxes, 4)
+
+    assigner = TaskAlignedAssigner(
+        n_classes=n_classes, topk=13, alpha=1.0, beta=6.0, eps=1e-9
+    )
+    assigner.bs = pred_scores.size(0)
+    assigner.n_max_boxes = gt_bboxes.size(1)
+
+    align_metric, overlaps = assigner._get_alignment_metric(
+        pred_scores, pred_bboxes, gt_labels, gt_bboxes
+    )
+
+    assert align_metric.shape == (batch_size, n_max_boxes, n_anchors)
+    assert overlaps.shape == (batch_size, n_max_boxes, n_anchors)
+    assert align_metric.dtype == torch.float32
+    assert overlaps.dtype == torch.float32
+    assert align_metric.min() >= 0 and align_metric.max() <= 1
+    assert overlaps.min() >= 0 and overlaps.max() <= 1
+
+
+def test_select_topk_candidates():
+    batch_size = 2
+    n_max_boxes = 3
+    n_anchors = 5
+    topk = 2
+
+    metrics = torch.rand(batch_size, n_max_boxes, n_anchors)
+    mask_gt = torch.rand(batch_size, n_max_boxes, 1)
+
+    assigner = TaskAlignedAssigner(n_classes=80, topk=topk)
+
+    is_in_topk = assigner._select_topk_candidates(metrics)
+    topk_mask = mask_gt.repeat([1, 1, topk]).bool()
+    assert torch.equal(
+        assigner._select_topk_candidates(metrics),
+        assigner._select_topk_candidates(metrics, topk_mask=topk_mask),
+    )
+    assert is_in_topk.shape == (batch_size, n_max_boxes, n_anchors)
+    assert is_in_topk.dtype == torch.float32
+
+    assert is_in_topk.sum(dim=-1).max() <= topk
+
+
+def test_get_final_assignments():
+    batch_size = 2
+    n_max_boxes = 3
+    n_anchors = 5
+    n_classes = 80
+
+    gt_labels = torch.randint(0, n_classes, (batch_size, n_max_boxes, 1))
+    gt_bboxes = torch.rand(batch_size, n_max_boxes, 4)
+    assigned_gt_idx = torch.randint(0, n_max_boxes, (batch_size, n_anchors))
+    mask_pos_sum = torch.randint(0, 2, (batch_size, n_anchors))
+
+    assigner = TaskAlignedAssigner(n_classes=n_classes, topk=13)
+    assigner.bs = batch_size  # Set batch size
+    assigner.n_max_boxes = gt_bboxes.size(1)
+
+    (
+        assigned_labels,
+        assigned_bboxes,
+        assigned_scores,
+    ) = assigner._get_final_assignments(
+        gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
+    )
+
+    assert assigned_labels.shape == (batch_size, n_anchors)
+    assert assigned_bboxes.shape == (batch_size, n_anchors, 4)
+    assert assigned_scores.shape == (batch_size, n_anchors, n_classes)
+    assert assigned_labels.min() >= 0 and assigned_labels.max() <= n_classes
diff --git a/tests/unittests/test_utils/test_assigners/test_utils.py b/tests/unittests/test_assigners/test_utils.py
similarity index 96%
rename from tests/unittests/test_utils/test_assigners/test_utils.py
rename to tests/unittests/test_assigners/test_utils.py
index bf849e25..d10e1d47 100644
--- a/tests/unittests/test_utils/test_assigners/test_utils.py
+++ b/tests/unittests/test_assigners/test_utils.py
@@ -1,6 +1,6 @@
 import torch
 
-from luxonis_train.utils.assigners.utils import (
+from luxonis_train.assigners.utils import (
     batch_iou,
     candidates_in_gt,
     fix_collisions,
diff --git a/tests/unittests/test_base_attached_module.py b/tests/unittests/test_base_attached_module.py
new file mode 100644
index 00000000..c6ffdd48
--- /dev/null
+++ b/tests/unittests/test_base_attached_module.py
@@ -0,0 +1,153 @@
+import pytest
+from luxonis_ml.data import LabelType
+
+from luxonis_train import BaseLoss, BaseNode
+from luxonis_train.utils.exceptions import IncompatibleException
+
+
+class DummyBackbone(BaseNode):
+    def forward(self, _): ...
+
+
+class DummySegmentationHead(BaseNode):
+    tasks = [LabelType.SEGMENTATION]
+
+    def forward(self, _): ...
+
+
+class DummyBBoxHead(BaseNode):
+    tasks = [LabelType.BOUNDINGBOX]
+
+    def forward(self, _): ...
+
+
+class DummyDetectionHead(BaseNode):
+    tasks = [LabelType.BOUNDINGBOX, LabelType.KEYPOINTS]
+
+    def forward(self, _): ...
+
+
+class DummyLoss(BaseLoss):
+    supported_labels = [
+        LabelType.SEGMENTATION,
+        (LabelType.KEYPOINTS, LabelType.BOUNDINGBOX),
+    ]
+
+    def forward(self, _): ...
+
+
+class NoLabelLoss(BaseLoss):
+    def forward(self, _): ...
+
+
+@pytest.fixture
+def labels():
+    return {
+        "segmentation": ("segmentation", LabelType.SEGMENTATION),
+        "keypoints": ("keypoints", LabelType.KEYPOINTS),
+        "boundingbox": ("boundingbox", LabelType.BOUNDINGBOX),
+        "classification": ("classification", LabelType.CLASSIFICATION),
+    }
+
+
+@pytest.fixture
+def inputs():
+    return {
+        "features": ["features"],
+        "segmentation": ["segmentation"],
+    }
+
+
+def test_valid_properties():
+    head = DummySegmentationHead()
+    loss = DummyLoss(node=head)
+    no_labels_loss = NoLabelLoss(node=head)
+    assert loss.node == head
+    assert loss.node_tasks == {LabelType.SEGMENTATION: "segmentation"}
+    assert loss.required_labels == [LabelType.SEGMENTATION]
+    assert no_labels_loss.node == head
+    assert no_labels_loss.node_tasks == {
+        LabelType.SEGMENTATION: "segmentation"
+    }
+    assert no_labels_loss.required_labels == []
+
+
+def test_invalid_properties():
+    backbone = DummyBackbone()
+    with pytest.raises(IncompatibleException):
+        DummyLoss(node=backbone)
+    with pytest.raises(IncompatibleException):
+        DummyLoss(node=DummyBBoxHead())
+    with pytest.raises(RuntimeError):
+        _ = DummyLoss().node
+    with pytest.raises(RuntimeError):
+        _ = NoLabelLoss(node=backbone).node_tasks
+
+
+def test_get_label(labels):
+    seg_head = DummySegmentationHead()
+    det_head = DummyDetectionHead()
+    seg_loss = DummyLoss(node=seg_head)
+    assert seg_loss.get_label(labels) == "segmentation"
+    assert seg_loss.get_label(labels, LabelType.SEGMENTATION) == "segmentation"
+
+    del labels["segmentation"]
+    labels["segmentation-task"] = ("segmentation", LabelType.SEGMENTATION)
+
+    with pytest.raises(IncompatibleException):
+        seg_loss.get_label(labels)
+
+    det_loss = DummyLoss(node=det_head)
+    assert det_loss.get_label(labels, LabelType.KEYPOINTS) == "keypoints"
+    assert det_loss.get_label(labels, LabelType.BOUNDINGBOX) == "boundingbox"
+
+    with pytest.raises(ValueError):
+        det_loss.get_label(labels)
+
+    with pytest.raises(ValueError):
+        det_loss.get_label(labels, LabelType.SEGMENTATION)
+
+
+def test_input_tensors(inputs):
+    seg_head = DummySegmentationHead()
+    seg_loss = DummyLoss(node=seg_head)
+    assert seg_loss.get_input_tensors(inputs) == ["segmentation"]
+    assert seg_loss.get_input_tensors(inputs, "segmentation") == [
+        "segmentation"
+    ]
+    assert seg_loss.get_input_tensors(inputs, LabelType.SEGMENTATION) == [
+        "segmentation"
+    ]
+
+    with pytest.raises(IncompatibleException):
+        seg_loss.get_input_tensors(inputs, LabelType.KEYPOINTS)
+    with pytest.raises(IncompatibleException):
+        seg_loss.get_input_tensors(inputs, "keypoints")
+
+    det_head = DummyDetectionHead()
+    det_loss = DummyLoss(node=det_head)
+    with pytest.raises(ValueError):
+        det_loss.get_input_tensors(inputs)
+
+
+def test_prepare(inputs, labels):
+    backbone = DummyBackbone()
+    seg_head = DummySegmentationHead()
+    seg_loss = DummyLoss(node=seg_head)
+    det_head = DummyDetectionHead()
+
+    assert seg_loss.prepare(inputs, labels) == ("segmentation", "segmentation")
+    inputs["segmentation"].append("segmentation2")
+    assert seg_loss.prepare(inputs, labels) == (
+        "segmentation2",
+        "segmentation",
+    )
+
+    with pytest.raises(RuntimeError):
+        NoLabelLoss(node=backbone).prepare(inputs, labels)
+
+    with pytest.raises(RuntimeError):
+        NoLabelLoss(node=seg_head).prepare(inputs, labels)
+
+    with pytest.raises(RuntimeError):
+        DummyLoss(node=det_head).prepare(inputs, labels)
diff --git a/tests/unittests/test_base_node.py b/tests/unittests/test_base_node.py
new file mode 100644
index 00000000..68386f73
--- /dev/null
+++ b/tests/unittests/test_base_node.py
@@ -0,0 +1,160 @@
+import pytest
+import torch
+from luxonis_ml.data import LabelType
+from torch import Size, Tensor
+
+from luxonis_train.nodes import AttachIndexType, BaseNode
+from luxonis_train.utils import DatasetMetadata, Packet
+from luxonis_train.utils.exceptions import IncompatibleException
+
+
+class DummyNode(BaseNode, register=False):
+    def forward(self, _): ...
+
+
+@pytest.fixture
+def packet() -> Packet[Tensor]:
+    return {
+        "features": [torch.rand(3, 224, 224)],
+    }
+
+
+@pytest.mark.parametrize(
+    ("attach_index", "expected"),
+    [
+        (-1, 5),
+        (0, 1),
+        ("all", [1, 2, 3, 4, 5]),
+        ((0, 2), [1, 2]),
+        ((0, 4, 2), [1, 3]),
+        ((-1, -3, -1), [5, 4]),
+        ((4, 2), [5, 4]),
+        ((-1, -3), [5, 4]),
+        ((-4, 4), [2, 3, 4]),
+        ((1, -1), [2, 3, 4]),
+    ],
+)
+def test_attach_index(
+    attach_index: AttachIndexType, expected: list[int] | int
+):
+    lst = [1, 2, 3, 4, 5]
+
+    class DummyBaseNode:
+        attach_index: AttachIndexType
+
+    DummyBaseNode.attach_index = attach_index
+
+    assert BaseNode.get_attached(DummyBaseNode, lst) == expected  # type: ignore
+
+
+def test_attach_index_error():
+    lst = [1, 2, 3, 4, 5]
+
+    class DummyNode(BaseNode, register=False):
+        attach_index: AttachIndexType
+
+    with pytest.raises(ValueError):
+        DummyNode.attach_index = 10
+        BaseNode.get_attached(DummyNode, lst)  # type: ignore
+
+    with pytest.raises(ValueError):
+        DummyNode.attach_index = "none"  # type: ignore
+        BaseNode.get_attached(DummyNode, lst)  # type: ignore
+
+
+def test_invalid(packet: Packet[Tensor]):
+    node = DummyNode()
+    with pytest.raises(RuntimeError):
+        _ = node.input_shapes
+    with pytest.raises(RuntimeError):
+        _ = node.original_in_shape
+    with pytest.raises(RuntimeError):
+        _ = node.dataset_metadata
+    with pytest.raises(ValueError):
+        node.unwrap([packet, packet])
+    with pytest.raises(ValueError):
+        node.wrap({"inp": torch.rand(3, 224, 224)})
+
+
+def tets_in_sizes():
+    node = DummyNode(
+        input_shapes=[{"features": [Size((3, 224, 224)) for _ in range(3)]}]
+    )
+    assert node.in_sizes == [Size((3, 224, 224)) for _ in range(3)]
+    node = DummyNode(in_sizes=Size((3, 224, 224)))
+    assert node.in_sizes == Size((3, 224, 224))
+    with pytest.raises(RuntimeError):
+        node = DummyNode(input_shapes=[{"feats": [Size((3, 224, 224))]}])
+        _ = node.in_sizes
+
+
+def test_check_type_override():
+    class DummyNode(BaseNode, register=False):
+        in_channels: int
+
+        def forward(self, _): ...
+
+    with pytest.raises(IncompatibleException):
+        DummyNode(
+            input_shapes=[
+                {"features": [Size((3, 224, 224)) for _ in range(3)]}
+            ]
+        )
+
+
+def test_tasks():
+    class DummyHead(DummyNode):
+        tasks = [LabelType.CLASSIFICATION]
+
+    class DummyMultiHead(DummyNode):
+        tasks = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+
+    dummy_head = DummyHead()
+    dummy_node = DummyNode()
+    dummy_multi_head = DummyMultiHead(n_keypoints=4)
+    assert (
+        dummy_head.get_task_name(LabelType.CLASSIFICATION) == "classification"
+    )
+    assert dummy_head.task == "classification"
+    with pytest.raises(ValueError):
+        dummy_head.get_task_name(LabelType.SEGMENTATION)
+
+    with pytest.raises(RuntimeError):
+        dummy_node.get_task_name(LabelType.SEGMENTATION)
+
+    with pytest.raises(RuntimeError):
+        _ = dummy_node.task
+
+    with pytest.raises(ValueError):
+        _ = dummy_multi_head.task
+
+    metadata = DatasetMetadata(
+        classes={
+            "segmentation": ["car", "person", "dog"],
+            "classification": ["car-class", "person-class"],
+        },
+        n_keypoints={"color-segmentation": 0, "detection": 0},
+    )
+
+    dummy_multi_head._dataset_metadata = metadata
+    assert dummy_multi_head.get_class_names(LabelType.SEGMENTATION) == [
+        "car",
+        "person",
+        "dog",
+    ]
+    assert dummy_multi_head.get_class_names(LabelType.CLASSIFICATION) == [
+        "car-class",
+        "person-class",
+    ]
+    assert dummy_multi_head.get_n_classes(LabelType.SEGMENTATION) == 3
+    assert dummy_multi_head.get_n_classes(LabelType.CLASSIFICATION) == 2
+    assert dummy_multi_head.n_keypoints == 4
+    with pytest.raises(ValueError):
+        _ = dummy_head.n_keypoints
+    with pytest.raises(RuntimeError):
+        _ = dummy_node.n_keypoints
+
+    dummy_head = DummyHead(n_classes=5)
+    assert dummy_head.n_classes == 5
+    with pytest.raises(ValueError):
+        _ = dummy_multi_head.n_classes
diff --git a/tests/unittests/test_blocks.py b/tests/unittests/test_blocks.py
new file mode 100644
index 00000000..8b6110d4
--- /dev/null
+++ b/tests/unittests/test_blocks.py
@@ -0,0 +1,15 @@
+import torch
+
+from luxonis_train.nodes.blocks import SqueezeExciteBlock, autopad
+
+
+def test_autopad():
+    assert autopad(1, 2) == 2
+    assert autopad(2) == 1
+    assert autopad((2, 4)) == (1, 2)
+
+
+def test_squeeze_excite_block():
+    se_block = SqueezeExciteBlock(64, 32)
+    x = torch.rand(1, 64, 224, 224)
+    assert se_block(x).shape == (1, 64, 224, 224)
diff --git a/tests/unittests/test_callbacks/__init__.py b/tests/unittests/test_callbacks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unittests/test_callbacks/test_needs_checkpoint.py b/tests/unittests/test_callbacks/test_needs_checkpoint.py
new file mode 100644
index 00000000..bd296dea
--- /dev/null
+++ b/tests/unittests/test_callbacks/test_needs_checkpoint.py
@@ -0,0 +1,6 @@
+from luxonis_train.callbacks.needs_checkpoint import NeedsCheckpoint
+
+
+def test_other_type():
+    assert NeedsCheckpoint._get_other_type("loss") == "metric"
+    assert NeedsCheckpoint._get_other_type("metric") == "loss"
diff --git a/tests/unittests/test_loaders/__init__.py b/tests/unittests/test_loaders/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unittests/test_loaders/test_base_loader.py b/tests/unittests/test_loaders/test_base_loader.py
new file mode 100644
index 00000000..dee1ecef
--- /dev/null
+++ b/tests/unittests/test_loaders/test_base_loader.py
@@ -0,0 +1,94 @@
+import pytest
+import torch
+from luxonis_ml.data import LabelType
+from torch import Size
+
+from luxonis_train.loaders import collate_fn
+
+
+@pytest.mark.parametrize(
+    "input_names_and_shapes",
+    [
+        [("features", Size([3, 224, 224]))],
+        [
+            ("features", Size([3, 224, 224])),
+            ("segmentation", Size([1, 224, 224])),
+        ],
+        [
+            ("features", Size([3, 224, 224])),
+            ("segmentation", Size([1, 224, 224])),
+            ("disparity", Size([1, 224, 224])),
+        ],
+        [
+            ("features", Size([3, 224, 224])),
+            ("pointcloud", Size([1000, 3])),
+        ],
+        [
+            ("features", Size([3, 224, 224])),
+            ("pointcloud", Size([1000, 3])),
+            ("foobar", Size([2, 3, 4, 5, 6])),
+        ],
+    ],
+)
+@pytest.mark.parametrize("batch_size", [1, 2])
+def test_collate_fn(
+    input_names_and_shapes: list[tuple[str, Size]], batch_size: int, subtests
+):
+    def build_batch_element():
+        inputs = {}
+        for name, shape in input_names_and_shapes:
+            inputs[name] = torch.rand(shape, dtype=torch.float32)
+
+        labels = {
+            "classification": (
+                torch.randint(0, 2, (2,), dtype=torch.int64),
+                LabelType.CLASSIFICATION,
+            ),
+            "segmentation": (
+                torch.randint(0, 2, (1, 224, 224), dtype=torch.int64),
+                LabelType.SEGMENTATION,
+            ),
+            "keypoints": (
+                torch.rand(1, 52, dtype=torch.float32),
+                LabelType.KEYPOINTS,
+            ),
+            "boundingbox": (
+                torch.rand(1, 5, dtype=torch.float32),
+                LabelType.BOUNDINGBOX,
+            ),
+        }
+
+        return inputs, labels
+
+    batch = [build_batch_element() for _ in range(batch_size)]
+
+    inputs, annotations = collate_fn(batch)  # type: ignore
+
+    with subtests.test("inputs"):
+        assert inputs["features"].shape == (batch_size, 3, 224, 224)
+        assert inputs["features"].dtype == torch.float32
+
+    with subtests.test("classification"):
+        assert "classification" in annotations
+        assert annotations["classification"][0].shape == (batch_size, 2)
+        assert annotations["classification"][0].dtype == torch.int64
+
+    with subtests.test("segmentation"):
+        assert "segmentation" in annotations
+        assert annotations["segmentation"][0].shape == (
+            batch_size,
+            1,
+            224,
+            224,
+        )
+        assert annotations["segmentation"][0].dtype == torch.int64
+
+    with subtests.test("keypoints"):
+        assert "keypoints" in annotations
+        assert annotations["keypoints"][0].shape == (batch_size, 53)
+        assert annotations["keypoints"][0].dtype == torch.float32
+
+    with subtests.test("boundingbox"):
+        assert "boundingbox" in annotations
+        assert annotations["boundingbox"][0].shape == (batch_size, 6)
+        assert annotations["boundingbox"][0].dtype == torch.float32
diff --git a/tests/unittests/test_losses/test_bce_with_logits_loss.py b/tests/unittests/test_losses/test_bce_with_logits_loss.py
index 27871019..f94b5cb1 100644
--- a/tests/unittests/test_losses/test_bce_with_logits_loss.py
+++ b/tests/unittests/test_losses/test_bce_with_logits_loss.py
@@ -16,7 +16,9 @@ def test_forward_pass():
             predictions = torch.full([bs, n_cl], 1.5)  # logit
             loss_fn = BCEWithLogitsLoss()
 
-            loss = loss_fn.forward(predictions, targets)  # -log(sigmoid(1.5)) = 0.2014
+            loss = loss_fn.forward(
+                predictions, targets
+            )  # -log(sigmoid(1.5)) = 0.2014
 
             assert isinstance(loss, torch.Tensor)
             assert loss.shape == torch.Size([])
@@ -57,5 +59,7 @@ def test_weights():
     assert loss_weight != loss_no_weight
 
 
-if __name__ == "__main__":
-    pytest.main()
+def test_invalid():
+    loss_fn = BCEWithLogitsLoss()
+    with pytest.raises(RuntimeError):
+        loss_fn.forward(torch.rand(10, 10), torch.rand(15, 15))
diff --git a/tests/unittests/test_metrics/test_torchmetrics.py b/tests/unittests/test_metrics/test_torchmetrics.py
new file mode 100644
index 00000000..141a3785
--- /dev/null
+++ b/tests/unittests/test_metrics/test_torchmetrics.py
@@ -0,0 +1,52 @@
+import pytest
+import torchmetrics
+from luxonis_ml.data import LabelType
+
+from luxonis_train.attached_modules.metrics.torchmetrics import (
+    TorchMetricWrapper,
+)
+from luxonis_train.nodes import BaseNode
+
+
+def test_torchmetrics():
+    class DummyNode(BaseNode):
+        tasks = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+
+        def forward(self, _): ...
+
+    class DummyMetric(TorchMetricWrapper):
+        supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]
+        Metric = torchmetrics.Accuracy
+
+    node_1_class = DummyNode(n_classes=1)
+    node_2_classes = DummyNode(n_classes=2)
+    node = DummyNode()
+    assert DummyMetric(node=node_1_class)._task == "binary"
+    assert DummyMetric(node=node_2_classes)._task == "multiclass"
+    assert DummyMetric(node=node_2_classes, task="multilabel")
+    assert DummyMetric(num_classes=1)._task == "binary"
+    assert DummyMetric(num_classes=2)._task == "multiclass"
+    assert DummyMetric(num_labels=2)._task == "multilabel"
+
+    assert DummyMetric(task="binary")
+
+    with pytest.raises(ValueError):
+        DummyMetric()
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="multiclass")
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="invalid")
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="binary", node=node_2_classes)
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="multiclass", node=node_1_class)
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="multiclass", node=node)
+
+    with pytest.raises(ValueError):
+        DummyMetric(task="multilabel", node=node)
diff --git a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py b/tests/unittests/test_utils/test_assigners/test_tal_assigner.py
deleted file mode 100644
index 8f291615..00000000
--- a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import torch
-
-from luxonis_train.utils.assigners.tal_assigner import TaskAlignedAssigner
-
-
-def test_init():
-    assigner = TaskAlignedAssigner(n_classes=80, topk=13, alpha=1.0, beta=6.0, eps=1e-9)
-    assert assigner.n_classes == 80
-    assert assigner.topk == 13
-    assert assigner.alpha == 1.0
-    assert assigner.beta == 6.0
-    assert assigner.eps == 1e-9
-
-
-def test_forward():
-    # Constants for clarity
-    batch_size = 10
-    num_anchors = 100
-    num_max_boxes = 5
-    num_classes = 80
-
-    # Initialize the TaskAlignedAssigner
-    assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13)
-
-    # Create mock inputs
-    pred_scores = torch.rand(batch_size, num_anchors, 1)
-    pred_bboxes = torch.rand(batch_size, num_anchors, 4)
-    anchor_points = torch.rand(num_anchors, 2)
-    gt_labels = torch.rand(batch_size, num_max_boxes, 1)
-    gt_bboxes = torch.zeros(batch_size, num_max_boxes, 4)  # no gt bboxes
-    mask_gt = torch.rand(batch_size, num_max_boxes, 1)
-
-    # Call the forward method
-    labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward(
-        pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt
-    )
-
-    # Assert the expected outcomes
-    assert labels.shape == (batch_size, num_anchors)
-    assert labels.unique().tolist() == [
-        num_classes
-    ]  # All labels should be num_classes as there are no GT boxes
-    assert bboxes.shape == (batch_size, num_anchors, 4)
-    assert torch.equal(
-        bboxes, torch.zeros_like(bboxes)
-    )  # All bboxes should be zero as there are no GT boxes
-    assert (
-        scores.shape
-        == (
-            batch_size,
-            num_anchors,
-            num_classes,
-        )
-    )  # TODO: We have this in doc string: Returns: ... assigned scores of shape [bs, n_anchors, 1],
-    # it returns tensor of shape [bs, n_anchors, n_classes] instead
-    assert torch.equal(
-        scores, torch.zeros_like(scores)
-    )  # All scores should be zero as there are no GT boxes
-    assert mask.shape == (batch_size, num_anchors)
-    assert torch.equal(
-        mask, torch.zeros_like(mask)
-    )  # All mask values should be zero as there are no GT boxes
-    assert assigned_gt_idx.shape == (batch_size, num_anchors)
-    assert torch.equal(
-        assigned_gt_idx, torch.zeros_like(assigned_gt_idx)
-    )  # All assigned_gt_idx values should be zero as there are no GT boxes
-
-
-def test_get_alignment_metric():
-    # Create mock inputs
-    bs = 2  # batch size
-    n_anchors = 5
-    n_max_boxes = 3
-    n_classes = 80
-
-    pred_scores = torch.rand(
-        bs, n_anchors, n_classes
-    )  # TODO: Same issue: works with n_classes instead of 1, change it in the doc string in the method itself!!!
-    pred_bboxes = torch.rand(bs, n_anchors, 4)
-    gt_labels = torch.randint(0, n_classes, (bs, n_max_boxes, 1))
-    gt_bboxes = torch.rand(bs, n_max_boxes, 4)
-
-    # Initialize the TaskAlignedAssigner
-    assigner = TaskAlignedAssigner(
-        n_classes=n_classes, topk=13, alpha=1.0, beta=6.0, eps=1e-9
-    )
-    assigner.bs = pred_scores.size(0)
-    assigner.n_max_boxes = gt_bboxes.size(1)
-
-    # Call the method
-    align_metric, overlaps = assigner._get_alignment_metric(
-        pred_scores, pred_bboxes, gt_labels, gt_bboxes
-    )
-
-    # Assert the expected outcomes
-    assert align_metric.shape == (bs, n_max_boxes, n_anchors)
-    assert overlaps.shape == (bs, n_max_boxes, n_anchors)
-    assert align_metric.dtype == torch.float32
-    assert overlaps.dtype == torch.float32
-    assert (align_metric >= 0).all() and (
-        align_metric <= 1
-    ).all()  # Alignment metric should be in the range [0, 1]
-    assert (overlaps >= 0).all() and (
-        overlaps <= 1
-    ).all()  # IoU should be in the range [0, 1]
-
-
-def test_select_topk_candidates():
-    # Constants for the test
-    batch_size = 2
-    num_max_boxes = 3
-    num_anchors = 5
-    topk = 2
-
-    metrics = torch.rand(batch_size, num_max_boxes, num_anchors)
-    mask_gt = torch.rand(batch_size, num_max_boxes, 1)
-
-    # Initialize the TaskAlignedAssigner
-    assigner = TaskAlignedAssigner(n_classes=80, topk=topk)
-
-    # Call the method
-    is_in_topk = assigner._select_topk_candidates(
-        metrics,
-    )
-    topk_mask = mask_gt.repeat([1, 1, topk]).bool()
-    assert torch.equal(
-        assigner._select_topk_candidates(metrics),
-        assigner._select_topk_candidates(metrics, topk_mask=topk_mask),
-    )
-    # Assert the expected outcomes
-    assert is_in_topk.shape == (batch_size, num_max_boxes, num_anchors)
-    assert is_in_topk.dtype == torch.float32
-
-    # Check that each ground truth has at most 'topk' anchors selected
-    assert (is_in_topk.sum(dim=-1) <= topk).all()
-
-
-def test_get_final_assignments():
-    # Constants for the test
-    batch_size = 2
-    num_max_boxes = 3
-    num_anchors = 5
-    num_classes = 80
-
-    # Mock inputs
-    gt_labels = torch.randint(0, num_classes, (batch_size, num_max_boxes, 1))
-    gt_bboxes = torch.rand(batch_size, num_max_boxes, 4)
-    assigned_gt_idx = torch.randint(0, num_max_boxes, (batch_size, num_anchors))
-    mask_pos_sum = torch.randint(0, 2, (batch_size, num_anchors))
-
-    # Initialize the TaskAlignedAssigner
-    assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13)
-    assigner.bs = batch_size  # Set batch size
-    assigner.n_max_boxes = gt_bboxes.size(1)
-
-    # Call the method
-    assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments(
-        gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum
-    )
-
-    # Assert the expected outcomes
-    assert assigned_labels.shape == (batch_size, num_anchors)
-    assert assigned_bboxes.shape == (batch_size, num_anchors, 4)
-    assert assigned_scores.shape == (batch_size, num_anchors, num_classes)
-    assert (assigned_labels >= 0).all() and (assigned_labels <= num_classes).all()
diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py
index 2cb3df24..2b05a428 100644
--- a/tests/unittests/test_utils/test_boxutils.py
+++ b/tests/unittests/test_utils/test_boxutils.py
@@ -1,39 +1,42 @@
+import pytest
 import torch
 
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils.boundingbox import (
+    IoUType,
     anchors_for_fpn_features,
     bbox2dist,
     bbox_iou,
     compute_iou_loss,
     dist2bbox,
     process_bbox_predictions,
-    process_keypoints_predictions,
 )
 
 
-def generate_random_bboxes(num_bboxes, max_width, max_height, format="xyxy"):
-    # Generate top-left corners (x1, y1)
-    x1y1 = torch.rand(num_bboxes, 2) * torch.tensor([max_width - 1, max_height - 1])
+def generate_random_bboxes(
+    n_bboxes: int, max_width: int, max_height: int, format: str = "xyxy"
+):
+    x1y1 = torch.rand(n_bboxes, 2) * torch.tensor(
+        [max_width - 1, max_height - 1]
+    )
 
-    # Generate widths and heights ensuring x2 > x1 and y2 > y1
     wh = (
-        torch.rand(num_bboxes, 2) * (torch.tensor([max_width, max_height]) - 1 - x1y1)
+        torch.rand(n_bboxes, 2)
+        * (torch.tensor([max_width, max_height]) - 1 - x1y1)
         + 1
     )
 
     if format == "xyxy":
-        # Calculate bottom-right corners (x2, y2) for xyxy format
         x2y2 = x1y1 + wh
         bboxes = torch.cat((x1y1, x2y2), dim=1)
     elif format == "xywh":
-        # Use x1y1 as top-left corner and wh as width and height for xywh format
         bboxes = torch.cat((x1y1, wh), dim=1)
     elif format == "cxcywh":
-        # Calculate center coordinates and use wh as width and height for cxcywh format
         cxcy = x1y1 + wh / 2
         bboxes = torch.cat((cxcy, wh), dim=1)
     else:
-        raise ValueError("Unsupported format. Choose from 'xyxy', 'xywh', 'cxcywh'.")
+        raise ValueError(
+            "Unsupported format. Choose from 'xyxy', 'xywh', 'cxcywh'."
+        )
 
     return bboxes
 
@@ -44,6 +47,8 @@ def test_dist2bbox():
     bbox = dist2bbox(distance, anchor_points)
 
     assert bbox.shape == distance.shape
+    with pytest.raises(ValueError):
+        dist2bbox(distance, anchor_points, out_format="invalid")  # type: ignore
 
 
 def test_bbox2dist():
@@ -56,22 +61,41 @@ def test_bbox2dist():
     assert distance.shape == bbox.shape
 
 
-def test_bbox_iou():
+@pytest.mark.parametrize("iou_type", ["none", "giou", "diou", "ciou", "siou"])
+def test_bbox_iou(iou_type: IoUType):
     for format in ["xyxy", "cxcywh", "xywh"]:
         bbox1 = generate_random_bboxes(5, 640, 640, format)
-        bbox2 = generate_random_bboxes(8, 640, 640, format)
-
-        iou = bbox_iou(bbox1, bbox2)
-
-        assert iou.shape == (5, 8)
-        assert iou.min() >= 0 and iou.max() <= 1
+        if iou_type == "siou":
+            bbox2 = generate_random_bboxes(5, 640, 640, format)
+        else:
+            bbox2 = generate_random_bboxes(8, 640, 640, format)
+
+        iou = bbox_iou(
+            bbox1,
+            bbox2,
+            bbox_format=format,  # type: ignore
+            iou_type=iou_type,
+        )
+
+        assert iou.shape == (bbox1.shape[0], bbox2.shape[0])
+        if iou_type == "none":
+            min = 0
+        else:
+            min = -1.5
+        assert iou.min() >= min and iou.max() <= 1
+
+    if iou_type == "none":
+        with pytest.raises(ValueError):
+            bbox_iou(bbox1, bbox2, iou_type="invalid")  # type: ignore
 
 
 def test_compute_iou_loss():
     pred_bboxes = generate_random_bboxes(8, 640, 640, "xyxy")
     target_bboxes = generate_random_bboxes(8, 640, 640, "xyxy")
 
-    loss_iou, iou = compute_iou_loss(pred_bboxes, target_bboxes, iou_type="giou")
+    loss_iou, iou = compute_iou_loss(
+        pred_bboxes, target_bboxes, iou_type="giou"
+    )
 
     assert isinstance(loss_iou, torch.Tensor)
     assert isinstance(iou, torch.Tensor)
@@ -93,21 +117,16 @@ def test_process_bbox_predictions():
     assert out_bbox_tail.shape == (10, 4)
 
 
-def test_process_keypoints_predictions():
-    keypoints = torch.rand(10, 15)  # 5 keypoints * 3 (x, y, visibility)
-
-    x, y, visibility = process_keypoints_predictions(keypoints)
-
-    assert x.shape == y.shape == visibility.shape == (10, 5)
-
-
 def test_anchors_for_fpn_features():
     features = [torch.rand(1, 256, 14, 14), torch.rand(1, 256, 28, 28)]
     strides = torch.tensor([8, 16])
 
-    anchors, anchor_points, n_anchors_list, stride_tensor = anchors_for_fpn_features(
-        features, strides
-    )
+    (
+        anchors,
+        anchor_points,
+        n_anchors_list,
+        stride_tensor,
+    ) = anchors_for_fpn_features(features, strides)
 
     assert isinstance(anchors, torch.Tensor)
     assert isinstance(anchor_points, torch.Tensor)
diff --git a/tests/unittests/test_utils/test_dataset_metadata.py b/tests/unittests/test_utils/test_dataset_metadata.py
new file mode 100644
index 00000000..8dba11a8
--- /dev/null
+++ b/tests/unittests/test_utils/test_dataset_metadata.py
@@ -0,0 +1,53 @@
+import pytest
+
+from luxonis_train.utils import DatasetMetadata
+
+
+@pytest.fixture
+def metadata():
+    return DatasetMetadata(
+        classes={
+            "color-segmentation": ["car", "person"],
+            "detection": ["car", "person"],
+        },
+        n_keypoints={"color-segmentation": 0, "detection": 0},
+    )
+
+
+def test_n_classes(metadata):
+    assert metadata.n_classes("color-segmentation") == 2
+    assert metadata.n_classes("detection") == 2
+    assert metadata.n_classes() == 2
+    with pytest.raises(ValueError):
+        metadata.n_classes("segmentation")
+    metadata._classes["segmentation"] = ["car", "person", "tree"]
+    with pytest.raises(RuntimeError):
+        metadata.n_classes()
+
+
+def test_n_keypoints(metadata):
+    assert metadata.n_keypoints("color-segmentation") == 0
+    assert metadata.n_keypoints("detection") == 0
+    assert metadata.n_keypoints() == 0
+    with pytest.raises(ValueError):
+        metadata.n_keypoints("segmentation")
+    metadata._n_keypoints["segmentation"] = 1
+    with pytest.raises(RuntimeError):
+        metadata.n_keypoints()
+
+
+def test_class_names(metadata):
+    assert metadata.classes("color-segmentation") == ["car", "person"]
+    assert metadata.classes("detection") == ["car", "person"]
+    assert metadata.classes() == ["car", "person"]
+    with pytest.raises(ValueError):
+        metadata.classes("segmentation")
+    metadata._classes["segmentation"] = ["car", "person", "tree"]
+    with pytest.raises(RuntimeError):
+        metadata.classes()
+
+
+def test_no_loader():
+    metadata = DatasetMetadata()
+    with pytest.raises(RuntimeError):
+        metadata.autogenerate_anchors(3)
diff --git a/tests/unittests/test_utils/test_general.py b/tests/unittests/test_utils/test_general.py
new file mode 100644
index 00000000..7f13f796
--- /dev/null
+++ b/tests/unittests/test_utils/test_general.py
@@ -0,0 +1,44 @@
+import pytest
+
+from luxonis_train.utils.general import infer_upscale_factor
+
+
+@pytest.mark.parametrize(
+    ("in_size", "orig_size", "expected"),
+    [
+        ((1, 1), (1, 1), 0),
+        ((1, 1), (2, 2), 1),
+        ((2, 2), (1, 1), -1),
+        ((2, 2), (4, 4), 1),
+        ((4, 4), (2, 2), -1),
+        ((4, 4), (8, 8), 1),
+        ((8, 8), (4, 4), -1),
+        ((2, 2), (16, 16), 3),
+        ((16, 16), (4, 4), -2),
+        (4, 8, 1),
+    ],
+)
+def test_infer_upscale_factor(
+    in_size: tuple[int, int] | int,
+    orig_size: tuple[int, int] | int,
+    expected: int,
+):
+    assert infer_upscale_factor(in_size, orig_size) == expected
+
+
+@pytest.mark.parametrize(
+    ("in_size", "orig_size"),
+    [
+        ((1, 1), (2, 1)),
+        ((1, 1), (1, 2)),
+        ((2, 3), (16, 16)),
+        ((3, 2), (16, 16)),
+        ((3, 3), (16, 16)),
+    ],
+)
+def test_infer_upscale_factor_fail(
+    in_size: tuple[int, int] | int,
+    orig_size: tuple[int, int] | int,
+):
+    with pytest.raises(ValueError):
+        infer_upscale_factor(in_size, orig_size)
diff --git a/tests/unittests/test_utils/test_graph.py b/tests/unittests/test_utils/test_graph.py
new file mode 100644
index 00000000..c63e4b72
--- /dev/null
+++ b/tests/unittests/test_utils/test_graph.py
@@ -0,0 +1,79 @@
+import pytest
+
+from luxonis_train.utils.graph import Graph, is_acyclic, traverse_graph
+
+
+@pytest.mark.parametrize(
+    ("graph", "acyclic"),
+    [
+        ({}, True),
+        ({"a": []}, True),
+        ({"a": ["b"], "b": ["a"]}, False),
+        ({"a": ["b"], "b": []}, True),
+        ({"a": ["b"], "b": ["c"], "c": ["a"]}, False),
+        ({"a": ["b"], "b": ["c"], "c": []}, True),
+        ({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []}, True),
+        ({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": ["a"]}, False),
+    ],
+)
+def test_acyclic(graph: Graph, acyclic: bool):
+    assert is_acyclic(graph) == acyclic
+
+
+@pytest.mark.parametrize(
+    ("graph", "nodes", "expected"),
+    [
+        ({}, {}, []),
+        (
+            {"a": []},
+            {"a": 1},
+            [("a", 1, [], [])],
+        ),
+        (
+            {"a": ["b"], "b": []},
+            {"a": 1, "b": 2},
+            [("b", 2, [], ["a"]), ("a", 1, ["b"], [])],
+        ),
+        (
+            {"a": ["b"], "b": ["c"], "c": []},
+            {"a": 1, "b": 2, "c": 3},
+            [
+                ("c", 3, [], ["a", "b"]),
+                ("b", 2, ["c"], ["a"]),
+                ("a", 1, ["b"], []),
+            ],
+        ),
+        (
+            {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []},
+            {"a": 1, "b": 2, "c": 3, "d": 4},
+            [
+                ("d", 4, [], ["a", "b", "c"]),
+                ("b", 2, ["d"], ["a", "c"]),
+                ("c", 3, ["d"], ["a"]),
+                ("a", 1, ["b", "c"], []),
+            ],
+        ),
+    ],
+)
+def test_traverse(
+    graph: Graph,
+    nodes: dict[str, int],
+    expected: list[tuple[str, int, list[str], list[str]]],
+):
+    result = list(traverse_graph(graph, nodes))
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    ("graph", "nodes"),
+    [
+        ({"a": ["b"], "b": ["a"]}, {"a": 1, "b": 2}),
+        (
+            {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": ["a"]},
+            {"a": 1, "b": 2, "c": 3, "d": 4},
+        ),
+    ],
+)
+def test_traverse_fail(graph: Graph, nodes: dict[str, int]):
+    with pytest.raises(RuntimeError):
+        list(traverse_graph(graph, nodes))
diff --git a/tests/unittests/test_utils/test_keypoints.py b/tests/unittests/test_utils/test_keypoints.py
new file mode 100644
index 00000000..3d20dae6
--- /dev/null
+++ b/tests/unittests/test_utils/test_keypoints.py
@@ -0,0 +1,24 @@
+import pytest
+import torch
+
+from luxonis_train.utils.keypoints import (
+    get_sigmas,
+    process_keypoints_predictions,
+)
+
+
+def test_get_sigmas():
+    sigmas = [0.1, 0.2, 0.3]
+    pytest.approx(get_sigmas(sigmas, 3).tolist(), sigmas)
+    with pytest.raises(ValueError):
+        get_sigmas(sigmas, 2)
+    assert len(get_sigmas(None, 17)) == 17
+    assert len(get_sigmas(None, 5)) == 5
+
+
+def test_process_keypoints_predictions():
+    keypoints = torch.tensor([[0.1, 0.2, 1.0, 0.4, 0.5, 0.0]])
+    x, y, visibility = process_keypoints_predictions(keypoints)
+    pytest.approx(x[0].tolist(), [0.1, 0.4])
+    pytest.approx(y[0].tolist(), [0.2, 0.5])
+    pytest.approx(visibility[0].tolist(), [1.0, 0.0])
diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py
deleted file mode 100644
index 0209c192..00000000
--- a/tests/unittests/test_utils/test_loaders/test_base_loader.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import pytest
-import torch
-
-from luxonis_train.utils.loaders import collate_fn
-from luxonis_train.utils.types import LabelType
-
-
-@pytest.mark.parametrize(
-    "input_names_and_shapes",
-    [
-        [("features", torch.Size([3, 224, 224]))],
-        [
-            ("features", torch.Size([3, 224, 224])),
-            ("segmentation", torch.Size([1, 224, 224])),
-        ],
-        [
-            ("features", torch.Size([3, 224, 224])),
-            ("segmentation", torch.Size([1, 224, 224])),
-            ("disparity", torch.Size([1, 224, 224])),
-        ],
-        [
-            ("features", torch.Size([3, 224, 224])),
-            ("pointcloud", torch.Size([1000, 3])),
-        ],
-        [
-            ("features", torch.Size([3, 224, 224])),
-            ("pointcloud", torch.Size([1000, 3])),
-            ("foobar", torch.Size([2, 3, 4, 5, 6])),
-        ],
-    ],
-)
-@pytest.mark.parametrize("batch_size", [1, 2])
-def test_collate_fn(input_names_and_shapes, batch_size):
-    # Mock batch data
-
-    def build_batch_element():
-        inputs = {}
-        for name, shape in input_names_and_shapes:
-            inputs[name] = torch.rand(shape, dtype=torch.float32)
-
-        labels = {
-            "classification": (
-                torch.randint(0, 2, (2,), dtype=torch.int64),
-                LabelType.CLASSIFICATION,
-            )
-        }
-
-        return inputs, labels
-
-    batch = [build_batch_element() for _ in range(batch_size)]
-
-    # Call collate_fn
-    inputs, annotations = collate_fn(batch)  # type: ignore
-
-    # Check images tensor
-    assert inputs["features"].shape == (batch_size, 3, 224, 224)
-    assert inputs["features"].dtype == torch.float32
-
-    # Check annotations
-    assert "classification" in annotations
-    assert annotations["classification"][0].shape == (batch_size, 2)
-    assert annotations["classification"][0].dtype == torch.int64
-
-
-# TODO: test also segmentation, boundingbox and keypoint
-
-
-if __name__ == "__main__":
-    pytest.main()

From 1297a044b0031bc0be62d2c5f75c7dd463bf24f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 19 Sep 2024 01:21:16 -0400
Subject: [PATCH 70/75] Update CI Badge (#75)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 873fe2c9..8b645c06 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 ![PyBadge](https://github.com/luxonis/luxonis-train/blob/main/media/pybadge.svg)
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
-![UnitTests](https://github.com/luxonis/luxonis-train/actions/workflows/tests.yaml/badge.svg)
+![CI](https://github.com/luxonis/luxonis-train/actions/workflows/ci.yaml/badge.svg)
 ![Docs](https://github.com/luxonis/luxonis-train/actions/workflows/docs.yaml/badge.svg)
 [![Coverage](media/coverage_badge.svg)](https://github.com/luxonis/luxonis-train/actions)
 

From af776268d3c8948ff505b1d38ac3e3c7b7c573ea Mon Sep 17 00:00:00 2001
From: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
Date: Thu, 19 Sep 2024 07:43:01 +0200
Subject: [PATCH 71/75] formatting

---
 .../losses/obb_detection_loss.py              | 57 ++++++++-----
 .../metrics/mean_average_precision_obb.py     | 79 ++++++++++++-------
 .../visualizers/obbox_visualizer.py           | 49 +++++++-----
 .../predefined_models/detection_model_obb.py  |  4 +-
 luxonis_train/nodes/blocks/blocks.py          | 14 ++--
 .../nodes/heads/efficient_obbox_head.py       | 15 +++-
 luxonis_train/utils/__init__.py               |  6 ++
 luxonis_train/utils/boundingbox.py            | 60 +++++++++-----
 8 files changed, 189 insertions(+), 95 deletions(-)

diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
index baa80d3d..c2b08b88 100644
--- a/luxonis_train/attached_modules/losses/obb_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -15,12 +15,19 @@
     xywh2xyxy,
     xyxyxyxy2xywhr,
 )
-from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet
+from luxonis_train.utils.types import (
+    IncompatibleException,
+    Labels,
+    LabelType,
+    Packet,
+)
 
 from .base_loss import BaseLoss
 
 
-class OBBDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]):
+class OBBDetectionLoss(
+    BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]
+):
     node: EfficientOBBoxHead
     supported_labels = [LabelType.OBOUNDINGBOX]
 
@@ -127,7 +134,9 @@ def prepare(
                 self.grid_cell_offset,
                 multiply_with_stride=True,
             )
-            self.anchor_points_strided = self.anchor_points / self.stride_tensor
+            self.anchor_points_strided = (
+                self.anchor_points / self.stride_tensor
+            )
 
         target = self._preprocess_target(
             target, batch_size
@@ -144,7 +153,9 @@ def prepare(
         )
         pred_bboxes = torch.cat(
             (
-                dist2rbbox(pred_distri_tensor, pred_angles, self.anchor_points_strided),
+                dist2rbbox(
+                    pred_distri_tensor, pred_angles, self.anchor_points_strided
+                ),
                 pred_angles,
             ),
             dim=-1,
@@ -198,10 +209,14 @@ def forward(
         assigned_scores: Tensor,
         mask_positive: Tensor,
     ):
-        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1]
+        one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[
+            ..., :-1
+        ]
 
         # CLS loss
-        loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label)
+        loss_cls = self.varifocal_loss(
+            pred_scores, assigned_scores, one_hot_label
+        )
         # loss_cls = self.bce(pred_scores, assigned_scores)
         if assigned_scores.sum() > 1:
             loss_cls /= assigned_scores.sum()
@@ -234,8 +249,8 @@ def forward(
         return loss, sub_losses
 
     def _preprocess_target(self, target: Tensor, batch_size: int):
-        """Preprocess target in shape [batch_size, N, 6] where N is maximum number of
-        instances in one image."""
+        """Preprocess target in shape [batch_size, N, 6] where N is
+        maximum number of instances in one image."""
         idx_cls = target[:, :2]
         xyxyxyxy = target[:, 2:]
         cxcywhr = xyxyxyxy2xywhr(xyxyxyxy)
@@ -244,7 +259,8 @@ def _preprocess_target(self, target: Tensor, batch_size: int):
         else:
             target = torch.cat([idx_cls, torch.tensor(cxcywhr)], dim=-1)
         sample_ids, counts = cast(
-            tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True)
+            tuple[Tensor, Tensor],
+            torch.unique(target[:, 0].int(), return_counts=True),
         )
         c_max = int(counts.max()) if counts.numel() > 0 else 0
         out_target = torch.zeros(batch_size, c_max, 6, device=target.device)
@@ -283,7 +299,8 @@ def forward(
         self, pred_score: Tensor, target_score: Tensor, label: Tensor
     ) -> Tensor:
         weight = (
-            self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label
+            self.alpha * pred_score.pow(self.gamma) * (1 - label)
+            + target_score * label
         )
         ce_loss = F.binary_cross_entropy(
             pred_score.float(), target_score.float(), reduction="none"
@@ -296,8 +313,8 @@ class DFLoss(nn.Module):
     """Criterion class for computing DFL losses during training.
 
     @type reg_max: int
-    @param reg_max: Number of bins for predicting the distributions of bounding box
-        coordinates.
+    @param reg_max: Number of bins for predicting the distributions of
+        bounding box coordinates.
     """
 
     def __init__(self, reg_max=16) -> None:
@@ -318,9 +335,13 @@ def __call__(self, pred_dist, target):
         wl = tr - target  # weight left
         wr = 1 - wl  # weight right
         return (
-            F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape)
+            F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(
+                tl.shape
+            )
             * wl
-            + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape)
+            + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(
+                tl.shape
+            )
             * wr
         ).mean(-1, keepdim=True)
 
@@ -329,13 +350,13 @@ class RotatedBboxLoss(nn.Module):
     """Criterion class for computing training losses during training.
 
     @type reg_max: int
-    @param reg_max: Number of bins for predicting the distributions of bounding box
-        coordinates.
+    @param reg_max: Number of bins for predicting the distributions of
+        bounding box coordinates.
     """
 
     def __init__(self, reg_max):
-        """Initialize the BboxLoss module with regularization maximum and DFL
-        settings."""
+        """Initialize the BboxLoss module with regularization maximum
+        and DFL settings."""
         super().__init__()
         self.dfl_loss = DFLoss(reg_max) if reg_max > 1 else None
 
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
index ea9dfac4..5e98bc71 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
@@ -9,8 +9,9 @@
 
 
 class MeanAveragePrecisionOBB(BaseMetric):
-    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object
-    detection predictions using oriented bounding boxes.
+    """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall
+    (mAR) for object detection predictions using oriented bounding
+    boxes.
 
     Partially adapted from U{YOLOv8 OBBMetrics
     <https://github.com/ultralytics/ultralytics/blob/ba438aea5ae4d0e7c28d59ed8408955d16ca71ec/ultralytics/utils/metrics.py#L1223>}.
@@ -39,15 +40,16 @@ def update(
         outputs: list[Tensor],  # preds
         labels: list[Tensor],  # batch
     ):
-        """Update metrics without erasing stats from the previous batch, i.e. the
-        metrics are calculated cumulatively.
+        """Update metrics without erasing stats from the previous batch,
+        i.e. the metrics are calculated cumulatively.
 
         @type outputs: list[Tensor]
-        @param outputs: Network predictions [x1, y1, x2, y2, conf, cls_idx, r]
-            unnormalized (not in [0, 1] range) [Tensor(n_bboxes, 7)]
+        @param outputs: Network predictions [x1, y1, x2, y2, conf,
+            cls_idx, r] unnormalized (not in [0, 1] range)
+            [Tensor(n_bboxes, 7)]
         @type labels: list[Tensor]
-        @param labels: [cls_idx, x1, y1, x2, y2, r] unnormalized (not in [0, 1] range)
-            [Tensor(n_bboxes, 6)]
+        @param labels: [cls_idx, x1, y1, x2, y2, r] unnormalized (not in
+            [0, 1] range) [Tensor(n_bboxes, 6)]
         """
         for si, output in enumerate(outputs):
             self.stats["conf"].append(output[:, 4])
@@ -97,9 +99,11 @@ def prepare(
 
         return output_nms, output_labels
 
-    def _preprocess_target(self, target: Tensor, batch_size: int, img_size) -> Tensor:
-        """Preprocess target in shape [batch_size, N, 6] where N is maximum number of
-        instances in one image."""
+    def _preprocess_target(
+        self, target: Tensor, batch_size: int, img_size
+    ) -> Tensor:
+        """Preprocess target in shape [batch_size, N, 6] where N is
+        maximum number of instances in one image."""
         cls_idx = target[:, 1].unsqueeze(-1)
         xyxyxyxy = target[:, 2:]
         xyxyxyxy[:, 0::2] *= img_size[1]  # scale x
@@ -120,7 +124,8 @@ def reset(self) -> None:
     def compute(
         self,
     ) -> tuple[Tensor, dict[str, Tensor]]:
-        """Process predicted results for object detection and update metrics."""
+        """Process predicted results for object detection and update
+        metrics."""
         results = self._process(
             torch.cat(self.stats["tp"]).cpu().numpy(),
             torch.cat(self.stats["conf"]).cpu().numpy(),
@@ -143,8 +148,9 @@ def compute(
     def _process_batch(
         self, detections: Tensor, gt_bboxes: Tensor, gt_cls: Tensor
     ) -> Tensor:
-        """Perform computation of the correct prediction matrix for a batch of # "fp":
-        torch.from_numpy(results[1]), detections and ground truth bounding boxes.
+        """Perform computation of the correct prediction matrix for a
+        batch of # "fp": torch.from_numpy(results[1]), detections and
+        ground truth bounding boxes.
 
         @type detections: Tensor
         @param detections: A tensor of shape (N, 7) representing the detected bounding boxes and associated
@@ -182,23 +188,26 @@ def match_predictions(
         iou: Tensor,
         use_scipy: bool = False,
     ) -> Tensor:
-        """Matches predictions to ground truth objects (pred_classes, true_classes)
-        using IoU.
+        """Matches predictions to ground truth objects (pred_classes,
+        true_classes) using IoU.
 
         @type pred_classes: Tensor
         @param pred_classes: Predicted class indices of shape(N,).
         @type true_classes: Tensor
         @param true_classes: Target class indices of shape(M,).
         @type iou: Tensor
-        @param iou: An NxM tensor containing the pairwise IoU values for predictions and
-            ground of truth
+        @param iou: An NxM tensor containing the pairwise IoU values for
+            predictions and ground of truth
         @type use_scipy: bool
-        @param use_scipy: Whether to use scipy for matching (more precise).
+        @param use_scipy: Whether to use scipy for matching (more
+            precise).
         @rtype: Tensor
         @return: Correct tensor of shape(N,10) for 10 IoU thresholds.
         """
         # Dx10 matrix, where D - detections, 10 - IoU thresholds
-        correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool)
+        correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(
+            bool
+        )
         # LxD matrix where L - labels (rows), D - detections (columns)
         correct_class = true_classes[:, None] == pred_classes
         iou = iou * correct_class  # zero out the wrong classes
@@ -210,8 +219,10 @@ def match_predictions(
 
                 cost_matrix = iou * (iou >= threshold)
                 if cost_matrix.any():
-                    labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(
-                        cost_matrix, maximize=True
+                    labels_idx, detections_idx = (
+                        scipy.optimize.linear_sum_assignment(
+                            cost_matrix, maximize=True
+                        )
                     )
                     valid = cost_matrix[labels_idx, detections_idx] > 0
                     if valid.any():
@@ -234,10 +245,13 @@ def match_predictions(
                             np.unique(matches[:, 0], return_index=True)[1]
                         ]
                     correct[matches[:, 1].astype(int), i] = True
-        return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device)
+        return torch.tensor(
+            correct, dtype=torch.bool, device=pred_classes.device
+        )
 
     def _update_metrics(self, results: tuple[np.ndarray, ...]):
-        """Updates the evaluation metrics of the model with a new set of results.
+        """Updates the evaluation metrics of the model with a new set of
+        results.
 
         @type results: tuple[np.ndarray, ...]
         @param results: A tuple containing the following evaluation metrics:
@@ -277,7 +291,8 @@ def _process(
         pred_cls: np.ndarray,
         target_cls: np.ndarray,
     ) -> tuple[np.ndarray, ...]:
-        """Process predicted results for object detection and update metrics."""
+        """Process predicted results for object detection and update
+        metrics."""
         results = MeanAveragePrecisionOBB.ap_per_class(
             tp,
             conf,
@@ -303,7 +318,8 @@ def ap_per_class(
         eps: float = 1e-16,
         # prefix="",
     ) -> tuple[np.ndarray, ...]:
-        """Computes the average precision per class for object detection evaluation.
+        """Computes the average precision per class for object detection
+        evaluation.
 
         Args:
             tp (np.ndarray): Binary array indicating whether the detection is correct (True) or not (False).
@@ -414,7 +430,8 @@ def ap_per_class(
     def compute_ap(
         recall: list[float], precision: list[float]
     ) -> tuple[float, np.ndarray, np.ndarray]:
-        """Compute the average precision (AP) given the recall and precision curves.
+        """Compute the average precision (AP) given the recall and
+        precision curves.
 
         Args:
             recall (list): The recall curve.
@@ -441,14 +458,18 @@ def compute_ap(
             i = np.where(mrec[1:] != mrec[:-1])[
                 0
             ]  # points where x-axis (recall) changes
-            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
+            ap = np.sum(
+                (mrec[i + 1] - mrec[i]) * mpre[i + 1]
+            )  # area under curve
 
         return ap, mpre, mrec
 
     @staticmethod
     def smooth(y: np.ndarray, f: float = 0.05) -> np.ndarray:
         """Box filter of fraction f."""
-        nf = round(len(y) * f * 2) // 2 + 1  # number of filter elements (must be odd)
+        nf = (
+            round(len(y) * f * 2) // 2 + 1
+        )  # number of filter elements (must be odd)
         p = np.ones(nf // 2)  # ones padding
         yp = np.concatenate((p * y[0], y, p * y[-1]), 0)  # y padded
         return np.convolve(yp, np.ones(nf) / nf, mode="valid")  # y-smoothed
diff --git a/luxonis_train/attached_modules/visualizers/obbox_visualizer.py b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
index 1da141c5..94557a2a 100644
--- a/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/obbox_visualizer.py
@@ -25,26 +25,33 @@ def __init__(
     ):
         """Visualizer for oriented bounding box predictions.
 
-        Creates a visualization of the oriented bounding box predictions and labels.
+        Creates a visualization of the oriented bounding box predictions
+        and labels.
 
         @type labels: dict[int, str] | list[str] | None
-        @param labels: Either a dictionary mapping class indices to names, or a list of
-            names. If list is provided, the label mapping is done by index. By default,
-            no labels are drawn.
+        @param labels: Either a dictionary mapping class indices to
+            names, or a list of names. If list is provided, the label
+            mapping is done by index. By default, no labels are drawn.
         @type draw_labels: bool
-        @param draw_labels: Whether or not to draw labels. Defaults to C{True}.
+        @param draw_labels: Whether or not to draw labels. Defaults to
+            C{True}.
         @type colors: dict[int, Color] | list[Color] | None
-        @param colors: Either a dictionary mapping class indices to colors, or a list of
-            colors. If list is provided, the color mapping is done by index. By default,
-            random colors are used.
+        @param colors: Either a dictionary mapping class indices to
+            colors, or a list of colors. If list is provided, the color
+            mapping is done by index. By default, random colors are
+            used.
         @type fill: bool
-        @param fill: Whether or not to fill the bounding boxes. Defaults to C{False}.
+        @param fill: Whether or not to fill the bounding boxes. Defaults
+            to C{False}.
         @type width: int | None
-        @param width: The width of the bounding box lines. Defaults to C{1}.
+        @param width: The width of the bounding box lines. Defaults to
+            C{1}.
         @type font: str | None
-        @param font: A filename containing a TrueType font. Defaults to C{None}.
+        @param font: A filename containing a TrueType font. Defaults to
+            C{None}.
         @type font_size: int | None
-        @param font_size: The font size to use for the labels. Defaults to C{None}.
+        @param font_size: The font size to use for the labels. Defaults
+            to C{None}.
         """
         super().__init__(**kwargs)
         if isinstance(labels, list):
@@ -55,9 +62,13 @@ def __init__(
         }
 
         if colors is None:
-            colors = {label: get_color(i) for i, label in self.bbox_labels.items()}
+            colors = {
+                label: get_color(i) for i, label in self.bbox_labels.items()
+            }
         if isinstance(colors, list):
-            colors = {self.bbox_labels[i]: color for i, color in enumerate(colors)}
+            colors = {
+                self.bbox_labels[i]: color for i, color in enumerate(colors)
+            }
         self.colors = colors
         self.fill = fill
         self.width = width
@@ -159,16 +170,18 @@ def forward(
         predictions: list[Tensor],
         targets: Tensor,
     ) -> tuple[Tensor, Tensor]:
-        """Creates a visualization of the oriented bounding box predictions and labels.
+        """Creates a visualization of the oriented bounding box
+        predictions and labels.
 
         @type label_canvas: Tensor
         @param label_canvas: The canvas containing the labels.
         @type prediction_canvas: Tensor
         @param prediction_canvas: The canvas containing the predictions.
         @type predictions: Tensor
-        @param predictions: The predicted bounding boxes. The shape should be [N, 7],
-            where N is the number of bounding boxes and the last dimension is [xc, yc,
-            w, h, conf, class]. # NOTE: check it
+        @param predictions: The predicted bounding boxes. The shape
+            should be [N, 7], where N is the number of bounding boxes
+            and the last dimension is [xc, yc, w, h, conf, class]. #
+            NOTE: check it
         @type targets: Tensor
         @param targets: The target bounding boxes.
         """
diff --git a/luxonis_train/models/predefined_models/detection_model_obb.py b/luxonis_train/models/predefined_models/detection_model_obb.py
index 9ba44e02..dd02901f 100644
--- a/luxonis_train/models/predefined_models/detection_model_obb.py
+++ b/luxonis_train/models/predefined_models/detection_model_obb.py
@@ -47,7 +47,9 @@ def nodes(self) -> list[ModelNodeConfig]:
                 name="EfficientOBBoxHead",
                 alias="detection_obb_head",
                 freezing=self.head_params.pop("freezing", {}),
-                inputs=["detection_neck"] if self.use_neck else ["detection_backbone"],
+                inputs=["detection_neck"]
+                if self.use_neck
+                else ["detection_backbone"],
                 params=self.head_params,
                 task=self.task_name,
             )
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 8e035f16..b0193830 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -82,16 +82,16 @@ def _initialize_weights_and_biases(self, prior_prob: float) -> None:
 
 class EfficientOBBDecoupledBlock(EfficientDecoupledBlock):
     def __init__(self, n_classes: int, in_channels: int, reg_max: int = 16):
-        """Efficient Decoupled block used for angle, class and regression predictions in
-        OBB (oriented bounding box) tasks.
+        """Efficient Decoupled block used for angle, class and
+        regression predictions in OBB (oriented bounding box) tasks.
 
         @type n_classes: int
         @param n_classes: Number of classes.
         @type in_channels: int
         @param in_channels: Number of input channels.
         @type reg_max: int
-        @param reg_max: Number of bins for predicting the distributions of bounding box
-            coordinates.
+        @param reg_max: Number of bins for predicting the distributions
+            of bounding box coordinates.
         """
         super().__init__(n_classes, in_channels)
 
@@ -104,7 +104,11 @@ def __init__(self, n_classes: int, in_channels: int, reg_max: int = 16):
                 padding=1,
                 activation=nn.SiLU(),
             ),
-            nn.Conv2d(in_channels=in_channels, out_channels=4 * reg_max, kernel_size=1),
+            nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=4 * reg_max,
+                kernel_size=1,
+            ),
         )
 
         self.angle_branch = nn.Sequential(
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
index 436c8854..14bb587d 100644
--- a/luxonis_train/nodes/heads/efficient_obbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -82,7 +82,8 @@ def forward(
         return features, cls_score_list, reg_distri_list, angles_list
 
     def wrap(
-        self, output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]
+        self,
+        output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]],
     ) -> Packet[Tensor]:
         features, cls_score_list, reg_distri_list, angles_list = output
 
@@ -100,10 +101,15 @@ def wrap(
             [angles_list[i].flatten(2) for i in range(len(angles_list))], dim=2
         ).permute(0, 2, 1)
         cls_tensor = torch.cat(
-            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
+            [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))],
+            dim=2,
         ).permute(0, 2, 1)
         reg_tensor = torch.cat(
-            [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2
+            [
+                reg_distri_list[i].flatten(2)
+                for i in range(len(reg_distri_list))
+            ],
+            dim=2,
         ).permute(0, 2, 1)
 
         if self.training:
@@ -129,7 +135,8 @@ def wrap(
     def _process_to_bbox(
         self, output: tuple[list[Tensor], Tensor, Tensor, Tensor]
     ) -> list[Tensor]:
-        """Performs post-processing of the output and returns bboxs after NMS."""
+        """Performs post-processing of the output and returns bboxs
+        after NMS."""
         features, cls_score_tensor, reg_dist_tensor, angles_tensor = output
         _, anchor_points, _, stride_tensor = anchors_for_fpn_features(
             features,
diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py
index c47d3d33..2235e11e 100644
--- a/luxonis_train/utils/__init__.py
+++ b/luxonis_train/utils/__init__.py
@@ -1,6 +1,7 @@
 from .boundingbox import (
     anchors_for_fpn_features,
     anchors_from_dataset,
+    batch_probiou,
     bbox2dist,
     bbox_iou,
     compute_iou_loss,
@@ -8,6 +9,8 @@
     match_to_anchor,
     non_max_suppression,
     process_bbox_predictions,
+    xywhr2xyxyxyxy,
+    xyxyxyxy2xywhr,
 )
 from .config import Config
 from .dataset_metadata import DatasetMetadata
@@ -40,6 +43,9 @@
     "dist2bbox",
     "bbox2dist",
     "bbox_iou",
+    "batch_probiou",
+    "xywhr2xyxyxyxy",
+    "xyxyxyxy2xywhr",
     "non_max_suppression",
     "anchors_from_dataset",
     "anchors_for_fpn_features",
diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py
index 94f21951..a18bd823 100644
--- a/luxonis_train/utils/boundingbox.py
+++ b/luxonis_train/utils/boundingbox.py
@@ -136,7 +136,8 @@ def dist2rbbox(
     pred_angles: Tensor,
     anchor_points: Tensor,
 ) -> Tensor:
-    """Transform distance (ltrb) to a rotated bounding box in "xcycwh" format.
+    """Transform distance (ltrb) to a rotated bounding box in "xcycwh"
+    format.
 
     @type distance: Tensor
     @param distance: Distance predictions
@@ -173,8 +174,9 @@ def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor:
 
 
 def xyxyxyxy2xywhr(x: Tensor) -> Tensor | np.ndarray:
-    """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh,
-    rotation]. Rotation values are returned in radians from 0 to pi/2.
+    """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2,
+    xy3, xy4] to [xywh, rotation]. Rotation values are returned in
+    radians from 0 to pi/2.
 
     Args:
         x (numpy.ndarray | torch.Tensor): Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8).
@@ -199,8 +201,9 @@ def xyxyxyxy2xywhr(x: Tensor) -> Tensor | np.ndarray:
 
 
 def xywhr2xyxyxyxy(x: Tensor) -> Tensor | np.ndarray:
-    """Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2,
-    xy3, xy4]. Rotation values should be in radians from 0 to pi/2.
+    """Convert batched Oriented Bounding Boxes (OBB) from [xywh,
+    rotation] to [xy1, xy2, xy3, xy4]. Rotation values should be in
+    radians from 0 to pi/2.
 
     Args:
         x (numpy.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format of shape (n, 5) or (b, n, 5).
@@ -229,9 +232,9 @@ def xywhr2xyxyxyxy(x: Tensor) -> Tensor | np.ndarray:
 
 
 def xyxy2xywh(x: Tensor) -> Tensor:
-    """Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width,
-    height) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-
-    right corner.
+    """Convert bounding box coordinates from (x1, y1, x2, y2) format to
+    (x, y, width, height) format where (x1, y1) is the top-left corner
+    and (x2, y2) is the bottom- right corner.
 
     Args:
         x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
@@ -243,7 +246,9 @@ def xyxy2xywh(x: Tensor) -> Tensor:
         x.shape[-1] == 4
     ), f"input shape last dimension expected 4 but input shape is {x.shape}"
     y = (
-        torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
+        torch.empty_like(x)
+        if isinstance(x, torch.Tensor)
+        else np.empty_like(x)
     )  # faster than clone/copy
     y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
     y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
@@ -253,9 +258,10 @@ def xyxy2xywh(x: Tensor) -> Tensor:
 
 
 def xywh2xyxy(x: Tensor) -> Tensor:
-    """Convert bounding box coordinates from (x, y, width, height) format to (x1, y1,
-    x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-
-    right corner. Note: ops per 2 channels faster than per channel.
+    """Convert bounding box coordinates from (x, y, width, height)
+    format to (x1, y1, x2, y2) format where (x1, y1) is the top-left
+    corner and (x2, y2) is the bottom- right corner. Note: ops per 2
+    channels faster than per channel.
 
     Args:
         x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
@@ -267,7 +273,9 @@ def xywh2xyxy(x: Tensor) -> Tensor:
         x.shape[-1] == 4
     ), f"input shape last dimension expected 4 but input shape is {x.shape}"
     y = (
-        torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
+        torch.empty_like(x)
+        if isinstance(x, torch.Tensor)
+        else np.empty_like(x)
     )  # faster than clone/copy
     xy = x[..., :2]  # centers
     wh = x[..., 2:] / 2  # half width-height
@@ -434,7 +442,10 @@ def probiou(
         ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
         / (
             4
-            * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt()
+            * (
+                (a1 * b1 - c1.pow(2)).clamp_(0)
+                * (a2 * b2 - c2.pow(2)).clamp_(0)
+            ).sqrt()
             + eps
         )
         + eps
@@ -484,7 +495,10 @@ def batch_probiou(obb1: Tensor, obb2: Tensor, eps: float = 1e-7) -> Tensor:
         ((a1 + a2) * (b1 + b2) - (c1 + c2).pow(2))
         / (
             4
-            * ((a1 * b1 - c1.pow(2)).clamp_(0) * (a2 * b2 - c2.pow(2)).clamp_(0)).sqrt()
+            * (
+                (a1 * b1 - c1.pow(2)).clamp_(0)
+                * (a2 * b2 - c2.pow(2)).clamp_(0)
+            ).sqrt()
             + eps
         )
         + eps
@@ -666,8 +680,8 @@ def non_max_suppression_obb(
     max_det: int = 300,
     predicts_objectness: bool = True,
 ) -> list[Tensor]:
-    """Non-maximum suppression on model's predictions to keep only best instances for
-    oriented bounding boxes (obb).
+    """Non-maximum suppression on model's predictions to keep only best
+    instances for oriented bounding boxes (obb).
 
     @type preds: Tensor
     @param preds: Model's prediction tensor of shape [bs, N, M]. Bounding boxes are in xywhr format.
@@ -744,7 +758,9 @@ def non_max_suppression_obb(
 
         if multi_label:
             box_idx, class_idx = (
-                (curr_out[:, 6 : 6 + n_classes] > conf_thres).nonzero(as_tuple=False).T
+                (curr_out[:, 6 : 6 + n_classes] > conf_thres)
+                .nonzero(as_tuple=False)
+                .T
             )
             keep_mask[box_idx] = True
             curr_out = torch.cat(
@@ -756,9 +772,13 @@ def non_max_suppression_obb(
                 1,
             )
         else:
-            conf, class_idx = curr_out[:, 6 : 6 + n_classes].max(1, keepdim=True)
+            conf, class_idx = curr_out[:, 6 : 6 + n_classes].max(
+                1, keepdim=True
+            )
             keep_mask[conf.view(-1) > conf_thres] = True
-            curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[keep_mask]
+            curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[
+                keep_mask
+            ]
 
         if keep_classes is not None:
             curr_out = curr_out[

From a037c50e8ac7e4620e2babc8a1ba4417e116b7fb Mon Sep 17 00:00:00 2001
From: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
Date: Thu, 19 Sep 2024 22:24:02 +0200
Subject: [PATCH 72/75] leftover merge discrepancies

---
 .../losses/obb_detection_loss.py              | 33 ++++++++-----------
 .../metrics/mean_average_precision_obb.py     | 22 ++++++-------
 .../nodes/heads/efficient_obbox_head.py       |  2 +-
 luxonis_train/utils/__init__.py               | 10 ++++++
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/luxonis_train/attached_modules/losses/obb_detection_loss.py b/luxonis_train/attached_modules/losses/obb_detection_loss.py
index c2b08b88..5d9a622c 100644
--- a/luxonis_train/attached_modules/losses/obb_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/obb_detection_loss.py
@@ -2,12 +2,15 @@
 
 import torch
 import torch.nn.functional as F
+from luxonis_ml.data import LabelType
 from torch import Tensor, nn
 
+from luxonis_train.assigners import RotatedTaskAlignedAssigner
 from luxonis_train.nodes.heads import EfficientOBBoxHead
-from luxonis_train.utils.assigners import RotatedTaskAlignedAssigner
-from luxonis_train.utils.boxutils import (
-    IoUType,
+from luxonis_train.utils import (
+    IncompatibleException,
+    Labels,
+    Packet,
     anchors_for_fpn_features,
     bbox2dist,
     dist2rbbox,
@@ -15,12 +18,7 @@
     xywh2xyxy,
     xyxyxyxy2xywhr,
 )
-from luxonis_train.utils.types import (
-    IncompatibleException,
-    Labels,
-    LabelType,
-    Packet,
-)
+from luxonis_train.utils.boundingbox import IoUType
 
 from .base_loss import BaseLoss
 
@@ -75,7 +73,6 @@ def __init__(
             )
         self.iou_type: IoUType = iou_type
         self.reduction = reduction
-        self.n_classes = self.node.n_classes
         self.stride = self.node.stride
         self.grid_cell_size = self.node.grid_cell_size
         self.grid_cell_offset = self.node.grid_cell_offset
@@ -347,16 +344,14 @@ def __call__(self, pred_dist, target):
 
 
 class RotatedBboxLoss(nn.Module):
-    """Criterion class for computing training losses during training.
-
-    @type reg_max: int
-    @param reg_max: Number of bins for predicting the distributions of
-        bounding box coordinates.
-    """
-
     def __init__(self, reg_max):
-        """Initialize the BboxLoss module with regularization maximum
-        and DFL settings."""
+        """Criterion class for computing training losses during
+        training.
+
+        @type reg_max: int
+        @param reg_max: Number of bins for predicting the distributions
+            of bounding box coordinates.
+        """
         super().__init__()
         self.dfl_loss = DFLoss(reg_max) if reg_max > 1 else None
 
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
index 5e98bc71..d3434535 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
@@ -1,9 +1,9 @@
 import numpy as np
 import torch
+from luxonis_ml.data import LabelType
 from torch import Tensor
 
-from luxonis_train.utils.boxutils import batch_probiou, xyxyxyxy2xywhr
-from luxonis_train.utils.types import Labels, LabelType, Packet
+from luxonis_train.utils import Labels, Packet, batch_probiou, xyxyxyxy2xywhr
 
 from .base_metric import BaseMetric
 
@@ -152,6 +152,13 @@ def _process_batch(
         batch of # "fp": torch.from_numpy(results[1]), detections and
         ground truth bounding boxes.
 
+        Example:
+
+            >>> detections = torch.rand(100, 7)  # 100 sample detections
+            >>> gt_bboxes = torch.rand(50, 5)  # 50 sample ground truth boxes
+            >>> gt_cls = torch.randint(0, 5, (50,))  # 50 ground truth class labels
+            >>> correct_matrix = OBBValidator._process_batch(detections, gt_bboxes, gt_cls)
+
         @type detections: Tensor
         @param detections: A tensor of shape (N, 7) representing the detected bounding boxes and associated
             data. Each detection is represented as (x1, y1, x2, y2, conf, class, angle).
@@ -164,16 +171,7 @@ def _process_batch(
         @return: The correct prediction matrix with shape (N, 10), which includes 10 IoU (Intersection over
             Union) levels for each detection, indicating the accuracy of predictions compared to the ground truth.
 
-        Example:
-            ```python
-            detections = torch.rand(100, 7)  # 100 sample detections
-            gt_bboxes = torch.rand(50, 5)  # 50 sample ground truth boxes
-            gt_cls = torch.randint(0, 5, (50,))  # 50 ground truth class labels
-            correct_matrix = OBBValidator._process_batch(detections, gt_bboxes, gt_cls)
-            ```
-
-        Note:
-            This method relies on `batch_probiou` to calculate IoU between detections and ground truth bounding boxes.
+        @note: This method relies on C{batch_probiou} to calculate IoU between detections and ground truth bounding boxes.
         """
         iou = batch_probiou(
             gt_bboxes,
diff --git a/luxonis_train/nodes/heads/efficient_obbox_head.py b/luxonis_train/nodes/heads/efficient_obbox_head.py
index 14bb587d..ff9eb06d 100644
--- a/luxonis_train/nodes/heads/efficient_obbox_head.py
+++ b/luxonis_train/nodes/heads/efficient_obbox_head.py
@@ -6,7 +6,7 @@
 
 from luxonis_train.nodes.blocks import EfficientOBBDecoupledBlock
 from luxonis_train.nodes.heads import EfficientBBoxHead
-from luxonis_train.utils.boxutils import (
+from luxonis_train.utils import (
     anchors_for_fpn_features,
     dist2rbbox,
     non_max_suppression_obb,
diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py
index 2235e11e..0a4861a5 100644
--- a/luxonis_train/utils/__init__.py
+++ b/luxonis_train/utils/__init__.py
@@ -6,10 +6,15 @@
     bbox_iou,
     compute_iou_loss,
     dist2bbox,
+    dist2rbbox,
     match_to_anchor,
     non_max_suppression,
+    non_max_suppression_obb,
+    probiou,
     process_bbox_predictions,
+    xywh2xyxy,
     xywhr2xyxyxyxy,
+    xyxy2xywh,
     xyxyxyxy2xywhr,
 )
 from .config import Config
@@ -46,6 +51,11 @@
     "batch_probiou",
     "xywhr2xyxyxyxy",
     "xyxyxyxy2xywhr",
+    "probiou",
+    "xywh2xyxy",
+    "xyxy2xywh",
+    "dist2rbbox",
+    "non_max_suppression_obb",
     "non_max_suppression",
     "anchors_from_dataset",
     "anchors_for_fpn_features",

From e7ab0c031c2e755130ad8386446c7495b2e5d964 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Fri, 20 Sep 2024 11:41:06 +0200
Subject: [PATCH 73/75] [Fix] Updated  `EfficientBBoxHead` parser to `YOLO`
 (#74)

Co-authored-by: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
---
 luxonis_train/nodes/enums/head_categorization.py | 2 +-
 tests/integration/parking_lot.json               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py
index d36c9647..90f75725 100644
--- a/luxonis_train/nodes/enums/head_categorization.py
+++ b/luxonis_train/nodes/enums/head_categorization.py
@@ -5,7 +5,7 @@ class ImplementedHeads(Enum):
     """Task categorization for the implemented heads."""
 
     ClassificationHead = "ClassificationParser"
-    EfficientBBoxHead = "YoloDetectionNetwork"
+    EfficientBBoxHead = "YOLO"
     ImplicitKeypointBBoxHead = "YoloDetectionNetwork"
     EfficientKeypointBBoxHead = "YoloDetectionNetwork"
     SegmentationHead = "SegmentationParser"
diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json
index 0059241e..a42b82b2 100644
--- a/tests/integration/parking_lot.json
+++ b/tests/integration/parking_lot.json
@@ -158,7 +158,7 @@
         ],
         "heads": [
             {
-                "parser": "YoloDetectionNetwork",
+                "parser": "YOLO",
                 "metadata": {
                     "postprocessor_path": null,
                     "classes": [

From 2449850b7c13a08800dbd7bbfa1b5058aada1bdb Mon Sep 17 00:00:00 2001
From: Nikita Sokovnin <49622375+sokovninn@users.noreply.github.com>
Date: Fri, 20 Sep 2024 13:35:03 +0200
Subject: [PATCH 74/75] DDRNet for Semantic Segmentation (#70)

Co-authored-by: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/ddrnet_segmentation_model.yaml        |  45 +++
 .../models/predefined_models/__init__.py      |   2 +
 .../ddrnet_segmentation_model.py              |  77 ++++
 luxonis_train/nodes/backbones/__init__.py     |   2 +
 .../nodes/backbones/ddrnet/__init__.py        |   3 +
 .../nodes/backbones/ddrnet/blocks.py          | 358 ++++++++++++++++++
 .../nodes/backbones/ddrnet/ddrnet.py          | 294 ++++++++++++++
 .../nodes/backbones/ddrnet/variants.py        |  27 ++
 luxonis_train/nodes/blocks/__init__.py        |   8 +
 luxonis_train/nodes/blocks/blocks.py          | 236 ++++++++++++
 luxonis_train/nodes/heads/__init__.py         |   2 +
 .../nodes/heads/ddrnet_segmentation_head.py   | 109 ++++++
 luxonis_train/utils/config.py                 |   3 +-
 tests/integration/test_simple.py              |   1 +
 14 files changed, 1166 insertions(+), 1 deletion(-)
 create mode 100644 configs/ddrnet_segmentation_model.yaml
 create mode 100644 luxonis_train/models/predefined_models/ddrnet_segmentation_model.py
 create mode 100644 luxonis_train/nodes/backbones/ddrnet/__init__.py
 create mode 100644 luxonis_train/nodes/backbones/ddrnet/blocks.py
 create mode 100644 luxonis_train/nodes/backbones/ddrnet/ddrnet.py
 create mode 100644 luxonis_train/nodes/backbones/ddrnet/variants.py
 create mode 100644 luxonis_train/nodes/heads/ddrnet_segmentation_head.py

diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml
new file mode 100644
index 00000000..2bd3b7e8
--- /dev/null
+++ b/configs/ddrnet_segmentation_model.yaml
@@ -0,0 +1,45 @@
+# DDRNet-23-slim model for segmentation
+# Refer to here for optimal hyperparameters for this model: https://github.com/Deci-AI/super-gradients/blob/4797c974c7c445d12e2575c468848d9c3e04becd/src/super_gradients/recipes/cityscapes_ddrnet.yaml#L4
+
+model:
+  name: ddrnet_segmentation
+  predefined_model:
+    name: DDRNetSegmentationModel
+    params:
+      task: binary
+      backbone_params:
+        use_aux_heads: True # set to False to disable auxiliary heads (for export)
+        variant: '23-slim'
+
+loader:
+  params:
+    dataset_name: coco_test
+
+trainer:
+  preprocessing:
+    train_image_size: [&height 256, &width 320]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  batch_size: 4
+  epochs: &epochs 500
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  callbacks:
+    - name: TestOnTrainEnd
+    - name: ExportOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.01
+      momentum: 0.9
+      weight_decay: 0.0005
+
+  scheduler:
+    name: CosineAnnealingLR
+    params:
+      T_max: *epochs
diff --git a/luxonis_train/models/predefined_models/__init__.py b/luxonis_train/models/predefined_models/__init__.py
index 0e8fe8c0..c52e359d 100644
--- a/luxonis_train/models/predefined_models/__init__.py
+++ b/luxonis_train/models/predefined_models/__init__.py
@@ -1,5 +1,6 @@
 from .base_predefined_model import BasePredefinedModel
 from .classification_model import ClassificationModel
+from .ddrnet_segmentation_model import DDRNetSegmentationModel
 from .detection_model import DetectionModel
 from .keypoint_detection_model import KeypointDetectionModel
 from .segmentation_model import SegmentationModel
@@ -10,4 +11,5 @@
     "DetectionModel",
     "KeypointDetectionModel",
     "ClassificationModel",
+    "DDRNetSegmentationModel",
 ]
diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py
new file mode 100644
index 00000000..beacca5e
--- /dev/null
+++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py
@@ -0,0 +1,77 @@
+from dataclasses import dataclass, field
+
+from luxonis_train.utils.config import (
+    LossModuleConfig,
+    ModelNodeConfig,
+)
+from luxonis_train.utils.types import Kwargs
+
+from .segmentation_model import SegmentationModel
+
+
+@dataclass
+class DDRNetSegmentationModel(SegmentationModel):
+    backbone: str = "DDRNet"
+    aux_head_params: Kwargs = field(default_factory=dict)
+
+    @property
+    def nodes(self) -> list[ModelNodeConfig]:
+        self.head_params.update({"attach_index": -1})
+
+        self.aux_head_params.update({"attach_index": -2})
+
+        node_list = [
+            ModelNodeConfig(
+                name=self.backbone,
+                alias="ddrnet_backbone",
+                freezing=self.backbone_params.pop("freezing", {}),
+                params=self.backbone_params,
+            ),
+            ModelNodeConfig(
+                name="DDRNetSegmentationHead",
+                alias="segmentation_head",
+                inputs=["ddrnet_backbone"],
+                freezing=self.head_params.pop("freezing", {}),
+                params=self.head_params,
+                task=self.task_name,
+            ),
+        ]
+        if self.backbone_params.get("use_aux_heads", False):
+            node_list.append(
+                ModelNodeConfig(
+                    name="DDRNetSegmentationHead",
+                    alias="aux_segmentation_head",
+                    inputs=["ddrnet_backbone"],
+                    freezing=self.aux_head_params.pop("freezing", {}),
+                    params=self.aux_head_params,
+                    task=self.task_name,
+                )
+            )
+        return node_list
+
+    @property
+    def losses(self) -> list[LossModuleConfig]:
+        loss_list = [
+            LossModuleConfig(
+                name="BCEWithLogitsLoss"
+                if self.task == "binary"
+                else "CrossEntropyLoss",
+                alias="segmentation_loss",
+                attached_to="segmentation_head",
+                params=self.loss_params,
+                weight=1.0,
+            ),
+        ]
+        if self.backbone_params.get("use_aux_heads", False):
+            loss_list.append(
+                LossModuleConfig(
+                    name="BCEWithLogitsLoss"
+                    if self.task == "binary"
+                    else "CrossEntropyLoss",
+                    alias="aux_segmentation_loss",
+                    attached_to="aux_segmentation_head",
+                    params=self.loss_params,
+                    weight=0.4,
+                )
+            )
+        return loss_list
diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py
index 9463124b..aad94198 100644
--- a/luxonis_train/nodes/backbones/__init__.py
+++ b/luxonis_train/nodes/backbones/__init__.py
@@ -1,4 +1,5 @@
 from .contextspatial import ContextSpatial
+from .ddrnet import DDRNet
 from .efficientnet import EfficientNet
 from .efficientrep import EfficientRep
 from .micronet import MicroNet
@@ -18,4 +19,5 @@
     "ReXNetV1_lite",
     "RepVGG",
     "ResNet",
+    "DDRNet",
 ]
diff --git a/luxonis_train/nodes/backbones/ddrnet/__init__.py b/luxonis_train/nodes/backbones/ddrnet/__init__.py
new file mode 100644
index 00000000..8ecc5814
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ddrnet/__init__.py
@@ -0,0 +1,3 @@
+from .ddrnet import DDRNet
+
+__all__ = ["DDRNet"]
diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py
new file mode 100644
index 00000000..59f76b8b
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py
@@ -0,0 +1,358 @@
+"""DDRNet blocks.
+
+Adapted from: U{https://github.com/Deci-AI/super-gradients/blob/master/src/super_gradients/training/models/segmentation_models/ddrnet.py}
+Original source: U{https://github.com/ydhongHIT/DDRNet}
+Paper: U{https://arxiv.org/pdf/2101.06085.pdf}
+@license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md}
+"""
+
+import torch
+from torch import Tensor, nn
+
+from luxonis_train.nodes.blocks import ConvModule, UpscaleOnline
+
+
+class DAPPMBranch(nn.Module):
+    def __init__(
+        self,
+        kernel_size: int,
+        stride: int,
+        in_channels: int,
+        branch_channels: int,
+        inter_mode: str = "bilinear",
+    ):
+        """A DAPPM branch.
+
+        @type kernel_size: int
+        @param kernel_size: The kernel size for the average pooling.
+            When stride=0, this parameter is omitted, and
+            AdaptiveAvgPool2d over all the input is performed.
+        @type stride: int
+        @param stride: Stride for the average pooling. When stride=0, an
+            AdaptiveAvgPool2d over all the input is performed (output is
+            1x1). When stride=1, no average pooling is performed. When
+            stride>1, average pooling is performed (scaling the input
+            down and up again).
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type branch_channels: int
+        @param branch_channels: Width after the first convolution.
+        @type inter_mode: str
+        @param inter_mode: Interpolation mode for upscaling. Defaults to
+            "bilinear".
+        """
+        super().__init__()
+
+        down_list = []
+        down_list.append(nn.BatchNorm2d(in_channels))
+        if stride == 0:
+            down_list.append(nn.AdaptiveAvgPool2d((1, 1)))
+        elif stride > 1:
+            down_list.append(
+                nn.AvgPool2d(
+                    kernel_size=kernel_size, stride=stride, padding=stride
+                )
+            )
+
+        down_list.append(nn.ReLU(inplace=True))
+        down_list.append(
+            nn.Conv2d(in_channels, branch_channels, kernel_size=1, bias=False)
+        )
+
+        self.down_scale = nn.Sequential(*down_list)
+        self.up_scale = UpscaleOnline(inter_mode)
+
+        if stride != 1:
+            self.process = nn.Sequential(
+                nn.BatchNorm2d(branch_channels),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(
+                    branch_channels,
+                    branch_channels,
+                    kernel_size=3,
+                    padding=1,
+                    bias=False,
+                ),
+            )
+
+    def forward(self, x: Tensor | list[Tensor]) -> Tensor:
+        """Process input through the DAPPM branch.
+
+        @type x: Tensor or list[Tensor]
+        @param x: In branch 0 - the original input of the DAPPM. In other branches - a list containing the original
+                  input and the output of the previous branch.
+
+        @return: Processed output tensor.
+        """
+        if isinstance(x, list):
+            output_of_prev_branch = x[1]
+            x = x[0]
+        else:
+            output_of_prev_branch = None
+
+        in_width = x.shape[-1]
+        in_height = x.shape[-2]
+        out = self.down_scale(x)
+        out = self.up_scale(
+            out, output_height=in_height, output_width=in_width
+        )
+
+        if output_of_prev_branch is not None:
+            out = self.process(out + output_of_prev_branch)
+
+        return out
+
+
+class DAPPM(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        branch_channels: int,
+        out_channels: int,
+        kernel_sizes: list[int],
+        strides: list[int],
+        inter_mode: str = "bilinear",
+    ):
+        """DAPPM (Dynamic Attention Pyramid Pooling Module).
+
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type branch_channels: int
+        @param branch_channels: Width after the first convolution in
+            each branch.
+        @type out_channels: int
+        @param out_channels: Number of output channels.
+        @type kernel_sizes: list[int]
+        @param kernel_sizes: List of kernel sizes for each branch.
+        @type strides: list[int]
+        @param strides: List of strides for each branch.
+        @type inter_mode: str
+        @param inter_mode: Interpolation mode for upscaling. Defaults to
+            "bilinear".
+
+        @raises ValueError: If the lengths of `kernel_sizes` and `strides`
+            are not the same.
+        """
+        super().__init__()
+
+        if len(kernel_sizes) != len(strides):  # pragma: no cover
+            raise ValueError(
+                "The lenghts of `kernel_sizes` and `strides` must be the same"
+            )
+
+        self.branches = nn.ModuleList(
+            [
+                DAPPMBranch(
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    in_channels=in_channels,
+                    branch_channels=branch_channels,
+                    inter_mode=inter_mode,
+                )
+                for kernel_size, stride in zip(kernel_sizes, strides)
+            ]
+        )
+
+        self.compression = nn.Sequential(
+            nn.BatchNorm2d(branch_channels * len(self.branches)),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(
+                branch_channels * len(self.branches),
+                out_channels,
+                kernel_size=1,
+                bias=False,
+            ),
+        )
+        self.shortcut = nn.Sequential(
+            nn.BatchNorm2d(in_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Forward pass through the DAPPM module.
+
+        @type x: Tensor
+        @param x: Input tensor.
+        @return: Output tensor after processing through all branches and
+            compression.
+        """
+        x_list = [self.branches[0](x)]
+
+        for i in range(1, len(self.branches)):
+            x_list.append(self.branches[i]([x, x_list[i - 1]]))
+
+        out = self.compression(torch.cat(x_list, dim=1)) + self.shortcut(x)
+        return out
+
+
+class BasicDDRBackbone(nn.Module):
+    def __init__(
+        self,
+        block: type[nn.Module],
+        stem_channels: int,
+        layers: list[int],
+        in_channels: int,
+        layer3_repeats: int = 1,
+    ):
+        """Initialize the BasicDDRBackBone with specified parameters.
+
+        @type block: Type[nn.Module]
+        @param block: The block class to use for layers.
+        @type stem_channels: int
+        @param stem_channels: Number of output channels in the stem layer.
+        @type layers: list[int]
+        @param layers: Number of blocks in each layer.
+        @type in_channels: int
+        @param in_channels: Number of input channels.
+        @type layer3_repeats: int
+        @param layer3_repeats: Number of repeats for layer3. Defaults to
+            1.
+        """
+        super().__init__()
+        self.input_channels = in_channels
+
+        self.stem = nn.Sequential(
+            ConvModule(
+                in_channels=in_channels,
+                out_channels=stem_channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=True,
+                activation=nn.ReLU(inplace=True),
+            ),
+            ConvModule(
+                in_channels=stem_channels,
+                out_channels=stem_channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=True,
+                activation=nn.ReLU(inplace=True),
+            ),
+        )
+
+        self.layer1 = make_layer(
+            block=block,
+            in_channels=stem_channels,
+            channels=stem_channels,
+            num_blocks=layers[0],
+        )
+
+        self.layer2 = make_layer(
+            block=block,
+            in_channels=stem_channels,
+            channels=stem_channels * 2,
+            num_blocks=layers[1],
+            stride=2,
+        )
+
+        self.layer3 = nn.ModuleList(
+            [
+                make_layer(
+                    block=block,
+                    in_channels=stem_channels * 2,
+                    channels=stem_channels * 4,
+                    num_blocks=layers[2],
+                    stride=2,
+                )
+            ]
+            + [
+                make_layer(
+                    block=block,
+                    in_channels=stem_channels * 4,
+                    channels=stem_channels * 4,
+                    num_blocks=layers[2],
+                    stride=1,
+                )
+                for _ in range(layer3_repeats - 1)
+            ]
+        )
+
+        self.layer4 = make_layer(
+            block=block,
+            in_channels=stem_channels * 4,
+            channels=stem_channels * 8,
+            num_blocks=layers[3],
+            stride=2,
+        )
+
+    def get_backbone_output_number_of_channels(self) -> dict[str, int]:
+        """Determine the number of output channels for each layer of the
+        backbone.
+
+        Returns a dictionary with keys "layer2", "layer3", "layer4" and
+        their respective number of output channels.
+
+        @return: Dictionary of output channel counts for each layer.
+        """
+        output_shapes = {}
+        x = torch.randn(1, self.input_channels, 320, 320)
+        x = self.stem(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        output_shapes["layer2"] = x.shape[1]
+
+        for layer in self.layer3:
+            x = layer(x)
+        output_shapes["layer3"] = x.shape[1]
+
+        x = self.layer4(x)
+        output_shapes["layer4"] = x.shape[1]
+
+        return output_shapes
+
+
+def make_layer(
+    block: type[nn.Module],
+    in_channels: int,
+    channels: int,
+    num_blocks: int,
+    stride: int = 1,
+    expansion: int = 1,
+) -> nn.Sequential:
+    """Creates a sequential layer consisting of a series of blocks.
+
+    @type block: Type[nn.Module]
+    @param block: The block class to be used.
+    @type in_channels: int
+    @param in_channels: Number of input channels.
+    @type channels: int
+    @param channels: Number of output channels.
+    @type num_blocks: int
+    @param num_blocks: Number of blocks in the layer.
+    @type stride: int
+    @param stride: Stride for the first block. Defaults to 1.
+    @type expansion: int
+    @param expansion: Expansion factor for the block. Defaults to 1.
+    @return: A sequential container of the blocks.
+    """
+    layers: list[nn.Module] = []
+
+    layers.append(
+        block(
+            in_channels,
+            channels,
+            stride,
+            final_relu=num_blocks > 1,
+            expansion=expansion,
+        )
+    )
+
+    in_channels = channels * expansion
+
+    if num_blocks > 1:
+        for i in range(1, num_blocks):
+            final_relu = i != (num_blocks - 1)
+            layers.append(
+                block(
+                    in_channels,
+                    channels,
+                    stride=1,
+                    final_relu=final_relu,
+                    expansion=expansion,
+                )
+            )
+
+    return nn.Sequential(*layers)
diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py
new file mode 100644
index 00000000..37779a19
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py
@@ -0,0 +1,294 @@
+from typing import Literal
+
+from torch import Tensor, nn
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.nodes.blocks import (
+    BasicResNetBlock,
+    Bottleneck,
+    ConvModule,
+    UpscaleOnline,
+)
+
+from .blocks import DAPPM, BasicDDRBackbone, make_layer
+from .variants import get_variant
+
+
+class DDRNet(BaseNode[Tensor, list[Tensor]]):
+    in_channels: int
+
+    def __init__(
+        self,
+        variant: Literal["23-slim", "23"] = "23-slim",
+        channels: int | None = None,
+        highres_channels: int | None = None,
+        use_aux_heads: bool = True,
+        upscale_module: nn.Module | None = None,
+        spp_width: int = 128,
+        ssp_inter_mode: str = "bilinear",
+        segmentation_inter_mode: str = "bilinear",
+        # TODO: nn.Module registry
+        block: type[nn.Module] = BasicResNetBlock,
+        skip_block: type[nn.Module] = BasicResNetBlock,
+        layer5_block: type[nn.Module] = Bottleneck,
+        layer5_bottleneck_expansion: int = 2,
+        spp_kernel_sizes: list[int] | None = None,
+        spp_strides: list[int] | None = None,
+        layer3_repeats: int = 1,
+        layers: list[int] | None = None,
+        **kwargs,
+    ):
+        """DDRNet backbone.
+
+        @see: U{Adapted from <https://github.com/Deci-AI/super-gradients/blob/master/src
+            /super_gradients/training/models/segmentation_models/ddrnet.py>}
+        @see: U{Original code <https://github.com/ydhongHIT/DDRNet>}
+        @see: U{Paper <https://arxiv.org/pdf/2101.06085.pdf>}
+        @license: U{Apache License, Version 2.0 <https://github.com/Deci-AI/super-
+            gradients/blob/master/LICENSE.md>}
+        @type variant: Literal["23-slim", "23"]
+        @param variant: DDRNet variant. Defaults to "23-slim".
+            The variant determines the number of channels and highres_channels.
+            The following variants are available:
+                - "23-slim" (default): channels=32, highres_channels=64
+                - "23": channels=64, highres_channels=128
+        @type channels: int | None
+        @param channels: Base number of channels. If provided, overrides the variant values.
+        @type highres_channels: int | None
+        @param highres_channels: Number of channels in the high resolution net. If provided, overrides the variant values.
+        @type use_aux_heads: bool
+        @param use_aux_heads: Whether to use auxiliary heads. Defaults to True.
+        @type upscale_module: nn.Module
+        @param upscale_module: Module for upscaling (e.g., bilinear interpolation).
+            Defaults to UpscaleOnline().
+        @type spp_width: int
+        @param spp_width: Width of the branches in the SPP block. Defaults to 128.
+        @type ssp_inter_mode: str
+        @param ssp_inter_mode: Interpolation mode for the SPP block. Defaults to
+            "bilinear".
+        @type segmentation_inter_mode: str
+        @param segmentation_inter_mode: Interpolation mode for the segmentation head.
+            Defaults to "bilinear".
+        @type block: type[nn.Module]
+        @param block: type of block to use in the backbone. Defaults to
+            BasicResNetBlock.
+        @type skip_block: type[nn.Module]
+        @param skip_block: type of block for skip connections. Defaults to
+            BasicResNetBlock.
+        @type layer5_block: type[nn.Module]
+        @param layer5_block: type of block for layer5 and layer5_skip. Defaults to
+            Bottleneck.
+        @type layer5_bottleneck_expansion: int
+        @param layer5_bottleneck_expansion: Expansion factor for Bottleneck block in
+            layer5. Defaults to 2.
+        @type spp_kernel_sizes: list[int]
+        @param spp_kernel_sizes: Kernel sizes for the SPP module pooling. Defaults to
+            [1, 5, 9, 17, 0].
+        @type spp_strides: list[int]
+        @param spp_strides: Strides for the SPP module pooling. Defaults to [1, 2, 4, 8,
+            0].
+        @type layer3_repeats: int
+        @param layer3_repeats: Number of times to repeat the 3rd stage. Defaults to 1.
+        @type layers: list[int]
+        @param layers: Number of blocks in each layer of the backbone. Defaults to [2,
+            2, 2, 2, 1, 2, 2, 1].
+        @type kwargs: Any
+        @param kwargs: Additional arguments to pass to L{BaseNode}.
+        """
+        super().__init__(**kwargs)
+
+        upscale_module = upscale_module or UpscaleOnline()
+        spp_kernel_sizes = spp_kernel_sizes or [1, 5, 9, 17, 0]
+        spp_strides = spp_strides or [1, 2, 4, 8, 0]
+        layers = layers or [2, 2, 2, 2, 1, 2, 2, 1]
+
+        var = get_variant(variant)
+
+        channels = channels or var.channels
+        highres_channels = highres_channels or var.highres_channels
+
+        self._use_aux_heads = use_aux_heads
+        self.upscale = upscale_module
+        self.ssp_inter_mode = ssp_inter_mode
+        self.segmentation_inter_mode = segmentation_inter_mode
+        self.relu = nn.ReLU(inplace=False)
+        self.layer3_repeats = layer3_repeats
+        self.channels = channels
+        self.layers = layers
+        self.backbone_layers, self.additional_layers = (
+            self.layers[:4],
+            self.layers[4:],
+        )
+
+        self._backbone = BasicDDRBackbone(
+            block=block,
+            stem_channels=self.channels,
+            layers=self.backbone_layers,
+            in_channels=self.in_channels,
+            layer3_repeats=self.layer3_repeats,
+        )
+        out_chan_backbone = (
+            self._backbone.get_backbone_output_number_of_channels()
+        )
+
+        # Define layers for layer 3
+        self.compression3 = nn.ModuleList()
+        self.down3 = nn.ModuleList()
+        self.layer3_skip = nn.ModuleList()
+        for i in range(layer3_repeats):
+            self.compression3.append(
+                ConvModule(
+                    in_channels=out_chan_backbone["layer3"],
+                    out_channels=highres_channels,
+                    kernel_size=1,
+                    bias=False,
+                    activation=nn.Identity(),
+                )
+            )
+            self.down3.append(
+                ConvModule(
+                    in_channels=highres_channels,
+                    out_channels=out_chan_backbone["layer3"],
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    bias=False,
+                    activation=nn.Identity(),
+                )
+            )
+            self.layer3_skip.append(
+                make_layer(
+                    in_channels=out_chan_backbone["layer2"]
+                    if i == 0
+                    else highres_channels,
+                    channels=highres_channels,
+                    block=skip_block,
+                    num_blocks=self.additional_layers[1],
+                )
+            )
+
+        self.compression4 = ConvModule(
+            in_channels=out_chan_backbone["layer4"],
+            out_channels=highres_channels,
+            kernel_size=1,
+            bias=False,
+            activation=nn.Identity(),
+        )
+
+        self.down4 = nn.Sequential(
+            ConvModule(
+                in_channels=highres_channels,
+                out_channels=highres_channels * 2,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=False,
+                activation=nn.ReLU(inplace=True),
+            ),
+            ConvModule(
+                in_channels=highres_channels * 2,
+                out_channels=out_chan_backbone["layer4"],
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                bias=False,
+                activation=nn.Identity(),
+            ),
+        )
+
+        self.layer4_skip = make_layer(
+            block=skip_block,
+            in_channels=highres_channels,
+            channels=highres_channels,
+            num_blocks=self.additional_layers[2],
+        )
+        self.layer5_skip = make_layer(
+            block=layer5_block,
+            in_channels=highres_channels,
+            channels=highres_channels,
+            num_blocks=self.additional_layers[3],
+            expansion=layer5_bottleneck_expansion,
+        )
+
+        self.layer5 = make_layer(
+            block=layer5_block,
+            in_channels=out_chan_backbone["layer4"],
+            channels=out_chan_backbone["layer4"],
+            num_blocks=self.additional_layers[0],
+            stride=2,
+            expansion=layer5_bottleneck_expansion,
+        )
+
+        self.spp = DAPPM(
+            in_channels=out_chan_backbone["layer4"]
+            * layer5_bottleneck_expansion,
+            branch_channels=spp_width,
+            out_channels=highres_channels * layer5_bottleneck_expansion,
+            inter_mode=self.ssp_inter_mode,
+            kernel_sizes=spp_kernel_sizes,
+            strides=spp_strides,
+        )
+
+        self.highres_channels = highres_channels
+        self.layer5_bottleneck_expansion = layer5_bottleneck_expansion
+        self.init_params()
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        width_output = inputs.shape[-1] // 8
+        height_output = inputs.shape[-2] // 8
+
+        x = self._backbone.stem(inputs)
+        x = self._backbone.layer1(x)
+        x = self._backbone.layer2(self.relu(x))
+
+        # Repeat layer 3
+        x_skip = x
+        for i in range(self.layer3_repeats):
+            out_layer3 = self._backbone.layer3[i](self.relu(x))
+            out_layer3_skip = self.layer3_skip[i](self.relu(x_skip))
+
+            x = out_layer3 + self.down3[i](self.relu(out_layer3_skip))
+            x_skip = out_layer3_skip + self.upscale(
+                self.compression3[i](self.relu(out_layer3)),
+                height_output,
+                width_output,
+            )
+
+        # Save for auxiliary head
+        if self._use_aux_heads:
+            x_extra = x_skip
+
+        out_layer4 = self._backbone.layer4(self.relu(x))
+        out_layer4_skip = self.layer4_skip(self.relu(x_skip))
+
+        x = out_layer4 + self.down4(self.relu(out_layer4_skip))
+        x_skip = out_layer4_skip + self.upscale(
+            self.compression4(self.relu(out_layer4)),
+            height_output,
+            width_output,
+        )
+
+        out_layer5_skip = self.layer5_skip(self.relu(x_skip))
+
+        x = self.upscale(
+            self.spp(self.layer5(self.relu(x))), height_output, width_output
+        )
+
+        x = x + out_layer5_skip
+
+        if self._use_aux_heads:
+            return [x_extra, x]
+        else:
+            return [x]
+
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
diff --git a/luxonis_train/nodes/backbones/ddrnet/variants.py b/luxonis_train/nodes/backbones/ddrnet/variants.py
new file mode 100644
index 00000000..0e2d66c7
--- /dev/null
+++ b/luxonis_train/nodes/backbones/ddrnet/variants.py
@@ -0,0 +1,27 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class DDRNetVariant(BaseModel):
+    channels: int = 32
+    highres_channels: int = 64
+
+
+def get_variant(variant: Literal["23-slim", "23"]) -> DDRNetVariant:
+    variants = {
+        "23-slim": DDRNetVariant(
+            channels=32,
+            highres_channels=64,
+        ),
+        "23": DDRNetVariant(
+            channels=64,
+            highres_channels=128,
+        ),
+    }
+    if variant not in variants:  # pragma: no cover
+        raise ValueError(
+            "DDRNet model variant should be in "
+            f"{list(variants.keys())}, got {variant}."
+        )
+    return variants[variant]
diff --git a/luxonis_train/nodes/blocks/__init__.py b/luxonis_train/nodes/blocks/__init__.py
index a87c336e..52c3408e 100644
--- a/luxonis_train/nodes/blocks/__init__.py
+++ b/luxonis_train/nodes/blocks/__init__.py
@@ -1,7 +1,10 @@
 from .blocks import (
     AttentionRefinmentBlock,
+    BasicResNetBlock,
     BlockRepeater,
+    Bottleneck,
     ConvModule,
+    DropPath,
     EfficientDecoupledBlock,
     FeatureFusionBlock,
     KeypointBlock,
@@ -14,6 +17,7 @@
     SpatialPyramidPoolingBlock,
     SqueezeExciteBlock,
     UpBlock,
+    UpscaleOnline,
     autopad,
 )
 
@@ -34,4 +38,8 @@
     "LearnableMulAddConv",
     "KeypointBlock",
     "RepUpBlock",
+    "BasicResNetBlock",
+    "Bottleneck",
+    "UpscaleOnline",
+    "DropPath",
 ]
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 9231ea85..99fe2a9a 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import torch
+import torch.nn.functional as F
 from torch import Tensor, nn
 
 from luxonis_train.nodes.activations import HSigmoid
@@ -725,3 +726,238 @@ def autopad(kernel_size: T, padding: T | None = None) -> T:
     if isinstance(kernel_size, int):
         return kernel_size // 2
     return tuple(x // 2 for x in kernel_size)
+
+
+class BasicResNetBlock(nn.Module):
+    def __init__(
+        self,
+        in_planes: int,
+        planes: int,
+        stride: int = 1,
+        expansion: int = 1,
+        final_relu: bool = True,
+        droppath_prob: float = 0.0,
+    ):
+        """A basic residual block for ResNet.
+
+        @type in_planes: int
+        @param in_planes: Number of input channels.
+        @type planes: int
+        @param planes: Number of output channels.
+        @type stride: int
+        @param stride: Stride for the convolutional layers. Defaults to 1.
+        @type expansion: int
+        @param expansion: Expansion factor for the output channels. Defaults to 1.
+        @type final_relu: bool
+        @param final_relu: Whether to apply a ReLU activation after the residual
+            addition. Defaults to True.
+        @type droppath_prob: float
+        @param droppath_prob: Drop path probability for stochastic depth. Defaults to
+            0.0.
+        """
+        super().__init__()
+        self.expansion = expansion
+        self.conv1 = nn.Conv2d(
+            in_planes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.final_relu = final_relu
+
+        self.drop_path = DropPath(drop_prob=droppath_prob)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_planes,
+                    self.expansion * planes,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
+
+    def forward(self, x: Tensor) -> Tensor:
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out = self.drop_path(out)
+        out += self.shortcut(x)
+        if self.final_relu:
+            out = F.relu(out)
+        return out
+
+
+class Bottleneck(nn.Module):
+    def __init__(
+        self,
+        in_planes: int,
+        planes: int,
+        stride: int = 1,
+        expansion: int = 4,
+        final_relu: bool = True,
+        droppath_prob: float = 0.0,
+    ):
+        """A bottleneck block for ResNet.
+
+        @type in_planes: int
+        @param in_planes: Number of input channels.
+        @type planes: int
+        @param planes: Number of intermediate channels.
+        @type stride: int
+        @param stride: Stride for the second convolutional layer. Defaults to 1.
+        @type expansion: int
+        @param expansion: Expansion factor for the output channels. Defaults to 4.
+        @type final_relu: bool
+        @param final_relu: Whether to apply a ReLU activation after the residual
+            addition. Defaults to True.
+        @type droppath_prob: float
+        @param droppath_prob: Drop path probability for stochastic depth. Defaults to
+            0.0.
+        """
+        super().__init__()
+        self.expansion = expansion
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(
+            planes, self.expansion * planes, kernel_size=1, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(self.expansion * planes)
+        self.final_relu = final_relu
+
+        self.drop_path = DropPath(drop_prob=droppath_prob)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(
+                    in_planes,
+                    self.expansion * planes,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(self.expansion * planes),
+            )
+
+    def forward(self, x: Tensor) -> Tensor:
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+
+        out = self.drop_path(out)
+        out += self.shortcut(x)
+
+        if self.final_relu:
+            out = F.relu(out)
+
+        return out
+
+
+class UpscaleOnline(nn.Module):
+    """Upscale tensor to a specified size during the forward pass.
+
+    This class supports cases where the required scale/size is only
+    known when the input is received. Only the interpolation mode is set
+    in advance.
+    """
+
+    def __init__(self, mode: str = "bilinear"):
+        """Initialize UpscaleOnline with the interpolation mode.
+
+        @type mode: str
+        @param mode: Interpolation mode for resizing. Defaults to
+            "bilinear".
+        """
+        super().__init__()
+        self.mode = mode
+
+    def forward(
+        self, x: Tensor, output_height: int, output_width: int
+    ) -> Tensor:
+        """Upscale the input tensor to the specified height and width.
+
+        @type x: Tensor
+        @param x: Input tensor to be upscaled.
+        @type output_height: int
+        @param output_height: Desired height of the output tensor.
+        @type output_width: int
+        @param output_width: Desired width of the output tensor.
+        @return: Upscaled tensor.
+        """
+        return F.interpolate(
+            x, size=[output_height, output_width], mode=self.mode
+        )
+
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample, when applied in the
+    main path of residual blocks.
+
+    Intended usage of this block is as follows:
+
+    >>> class ResNetBlock(nn.Module):
+    ...   def __init__(self, ..., drop_path_rate: float):
+    ...     self.drop_path = DropPath(drop_path_rate)
+
+    ...   def forward(self, x):
+    ...     return x + self.drop_path(self.conv_bn_act(x))
+
+    @see U{Original code (TIMM) <https://github.com/rwightman/pytorch-image-models>}
+    @license: U{Apache License 2.0 <https://github.com/huggingface/pytorch-image-models?tab=Apache-2.0-1-ov-file#readme>}
+    """
+
+    def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True):
+        """Initializes the DropPath module.
+
+        @type drop_prob: float
+        @param drop_prob: Probability of zeroing out individual vectors
+            (channel dimension) of each feature map. Defaults to 0.0.
+        @type scale_by_keep: bool
+        @param scale_by_keep: Whether to scale the output by the keep
+            probability. Enabled by default to maintain output mean &
+            std in the same range as without DropPath. Defaults to True.
+        """
+        super().__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+
+    def drop_path(
+        self, x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True
+    ) -> Tensor:
+        """Drop paths (Stochastic Depth) per sample when applied in the
+        main path of residual blocks.
+
+        @type x: Tensor
+        @param x: Input tensor.
+        @type drop_prob: float
+        @param drop_prob: Probability of dropping a path. Defaults to
+            0.0.
+        @type scale_by_keep: bool
+        @param scale_by_keep: Whether to scale the output by the keep
+            probability. Defaults to True.
+        @return: Tensor with dropped paths based on the provided drop
+            probability.
+        """
+        keep_prob = 1 - drop_prob
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+        random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+        if keep_prob > 0.0 and scale_by_keep:
+            random_tensor.div_(keep_prob)
+        return x * random_tensor
+
+    def forward(self, x: Tensor) -> Tensor:
+        if self.drop_prob == 0.0 or not self.training:
+            return x
+        return self.drop_path(x, self.drop_prob, self.scale_by_keep)
diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py
index 28b5e8ca..e188f188 100644
--- a/luxonis_train/nodes/heads/__init__.py
+++ b/luxonis_train/nodes/heads/__init__.py
@@ -1,5 +1,6 @@
 from .bisenet_head import BiSeNetHead
 from .classification_head import ClassificationHead
+from .ddrnet_segmentation_head import DDRNetSegmentationHead
 from .efficient_bbox_head import EfficientBBoxHead
 from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead
 from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
@@ -12,4 +13,5 @@
     "EfficientKeypointBBoxHead",
     "ImplicitKeypointBBoxHead",
     "SegmentationHead",
+    "DDRNetSegmentationHead",
 ]
diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py
new file mode 100644
index 00000000..5e8468b0
--- /dev/null
+++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py
@@ -0,0 +1,109 @@
+import logging
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+from luxonis_train.nodes.base_node import BaseNode
+from luxonis_train.utils.general import infer_upscale_factor
+from luxonis_train.utils.types import LabelType
+
+logger = logging.getLogger(__name__)
+
+
+class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]):
+    attach_index: int = -1
+    in_height: int
+    in_width: int
+    in_channels: int
+
+    tasks: list[LabelType] = [LabelType.SEGMENTATION]
+
+    def __init__(
+        self,
+        inter_channels: int = 64,
+        inter_mode: str = "bilinear",
+        **kwargs,
+    ):
+        """DDRNet segmentation head.
+
+        @see: U{Adapted from <https://github.com/Deci-AI/super-gradients/blob/master/src
+            /super_gradients/training/models/segmentation_models/ddrnet.py>}
+        @see: U{Original code <https://github.com/ydhongHIT/DDRNet>}
+        @see: U{Paper <https://arxiv.org/pdf/2101.06085.pdf>}
+        @license: U{Apache License, Version 2.0 <https://github.com/Deci-AI/super-
+            gradients/blob/master/LICENSE.md>}
+        @type inter_channels: int
+        @param inter_channels: Width of internal conv. Must be a multiple of
+            scale_factor^2 when inter_mode is pixel_shuffle. Defaults to 64.
+        @type inter_mode: str
+        @param inter_mode: Upsampling method. One of nearest, linear, bilinear, bicubic,
+            trilinear, area or pixel_shuffle. If pixel_shuffle is set, nn.PixelShuffle
+            is used for scaling. Defaults to "bilinear".
+        """
+        super().__init__(**kwargs)
+        model_in_h, model_in_w = self.original_in_shape[1:]
+        scale_factor = 2 ** infer_upscale_factor(
+            (self.in_height, self.in_width), (model_in_h, model_in_w)
+        )
+        self.scale_factor = scale_factor
+
+        if (
+            inter_mode == "pixel_shuffle"
+            and inter_channels % (scale_factor**2) != 0
+        ):
+            raise ValueError(
+                "For pixel_shuffle, inter_channels must be a multiple of scale_factor^2."
+            )
+
+        self.bn1 = nn.BatchNorm2d(self.in_channels)
+        self.conv1 = nn.Conv2d(
+            self.in_channels,
+            inter_channels,
+            kernel_size=3,
+            padding=1,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(inter_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+        self.conv2 = nn.Conv2d(
+            inter_channels,
+            inter_channels
+            if inter_mode == "pixel_shuffle"
+            else self.n_classes,
+            kernel_size=1,
+            padding=0,
+            bias=True,
+        )
+        self.upscale = (
+            nn.PixelShuffle(scale_factor)
+            if inter_mode == "pixel_shuffle"
+            else nn.Upsample(scale_factor=scale_factor, mode=inter_mode)
+        )
+
+    def forward(self, inputs: Tensor) -> Tensor:
+        x = self.relu(self.bn1(inputs))
+        x = self.conv1(x)
+        x = self.relu(self.bn2(x))
+        x = self.conv2(x)
+        x = self.upscale(x)
+        if self.export:
+            return x.argmax(dim=1)
+        return x
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Sets the module to export mode.
+
+        Replaces the forward method with a constant empty tensor.
+
+        @warning: The replacement is destructive and cannot be undone.
+        @type mode: bool
+        @param mode: Whether to set the export mode to True or False.
+            Defaults to True.
+        """
+        super().set_export_mode(mode)
+        if self.export and self.attach_index != -1:
+            logger.info("Removing the auxiliary head.")
+
+            self.forward = lambda inputs: torch.tensor([])
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index b94f08a5..09cb8795 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -120,7 +120,8 @@ def check_main_metric(self) -> Self:
             name = metric.alias or metric.name
             logger.info(f"Setting '{name}' as main metric.")
         else:
-            logger.error(
+            logger.warning(
+                "[Ignore if using predefined model] "
                 "No metrics specified. "
                 "This is likely unintended unless "
                 "the configuration is not used for training."
diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py
index 784db01a..069e53b0 100644
--- a/tests/integration/test_simple.py
+++ b/tests/integration/test_simple.py
@@ -47,6 +47,7 @@ def clear_files():
         "segmentation_model",
         "detection_model",
         "keypoint_bbox_model",
+        "ddrnet_segmentation_model",
     ],
 )
 def test_predefined_models(

From 6ab32042cfd9dfadcd0e2e6428175f253fd6e82d Mon Sep 17 00:00:00 2001
From: Anton Makoveev <makoveev90@gmail.com>
Date: Sun, 22 Sep 2024 22:07:59 +0200
Subject: [PATCH 75/75] [Fix]: fix docstrings

---
 luxonis_train/assigners/utils.py              |  18 +--
 .../metrics/mean_average_precision_obb.py     | 115 ++++++--------
 luxonis_train/utils/boundingbox.py            | 144 +++++++++---------
 3 files changed, 129 insertions(+), 148 deletions(-)

diff --git a/luxonis_train/assigners/utils.py b/luxonis_train/assigners/utils.py
index 553bc910..3c3af4ff 100644
--- a/luxonis_train/assigners/utils.py
+++ b/luxonis_train/assigners/utils.py
@@ -32,16 +32,16 @@ def candidates_in_gt(
     return candidates
 
 
-def candidates_in_gt_obb(xy_centers, gt_bboxes):
-    """Select the positive anchor center in gt for rotated bounding
-    boxes.
+def candidates_in_gt_obb(xy_centers: Tensor, gt_bboxes: Tensor) -> Tensor:
+    """Select the positive anchor center in ground truth for rotated
+    bounding boxes.
 
-    Args:
-        xy_centers (Tensor): shape(h*w, 2)
-        gt_bboxes (Tensor): shape(b, n_boxes, 5)
-
-    Returns:
-        (Tensor): shape(b, n_boxes, h*w)
+    @type xy_centers: Tensor
+    @param xy_centers: Shape (h*w, 2).
+    @type gt_bboxes: Tensor
+    @param gt_bboxes: Shape (b, n_boxes, 5).
+    @rtype: Tensor
+    @return: Shape (b, n_boxes, h*w).
     """
     corners = xywhr2xyxyxyxy(
         gt_bboxes
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
index d3434535..3421d765 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_obb.py
@@ -259,9 +259,8 @@ def _update_metrics(self, results: tuple[np.ndarray, ...]):
             - all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
             - ap_class_index (list): Index of class for each AP score. Shape: (nc,).
 
-        Side Effects:
-            Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based
-            on the values provided in the `results` tuple.
+        @note: Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`,
+            and `self.ap_class_index` based on the values provided in the `results` tuple.
         """
         # The following logic impies averaging AP over all classes
         self.p = torch.tensor(np.mean(results[0]))
@@ -269,18 +268,6 @@ def _update_metrics(self, results: tuple[np.ndarray, ...]):
         self.f1 = torch.tensor(np.mean(results[2]))
         self.all_ap = torch.tensor(np.mean(results[3]))
         self.ap_class_index = torch.tensor(np.mean(results[4]))
-        # (
-        #     self.p,
-        #     self.r,
-        #     self.f1,
-        #     self.all_ap,
-        #     self.ap_class_index,
-        #     _,  # self.p_curve,
-        #     _,  # self.r_curve,
-        #     _,  # self.f1_curve,
-        #     _,  # self.px,
-        #     _,  # self.prec_values,
-        # ) = results
 
     def _process(
         self,
@@ -296,10 +283,6 @@ def _process(
             conf,
             pred_cls,
             target_cls,
-            # plot=self.plot,
-            # save_dir=self.save_dir,
-            # names=self.names,
-            # on_plot=self.on_plot,
         )[2:]
         return results
 
@@ -309,42 +292,36 @@ def ap_per_class(
         conf: np.ndarray,
         pred_cls: np.ndarray,
         target_cls: np.ndarray,
-        # plot=False,
-        # on_plot=None,
-        # save_dir=Path(),
-        # names={},
         eps: float = 1e-16,
-        # prefix="",
     ) -> tuple[np.ndarray, ...]:
-        """Computes the average precision per class for object detection
+        """Compute the average precision per class for object detection
         evaluation.
 
-        Args:
-            tp (np.ndarray): Binary array indicating whether the detection is correct (True) or not (False).
-            conf (np.ndarray): Array of confidence scores of the detections.
-            pred_cls (np.ndarray): Array of predicted classes of the detections.
-            target_cls (np.ndarray): Array of true classes of the detections.
-            plot (bool, optional): Whether to plot PR curves or not. Defaults to False.
-            on_plot (func, optional): A callback to pass plots path and data when they are rendered. Defaults to None.
-            save_dir (Path, optional): Directory to save the PR curves. Defaults to an empty path.
-            names (dict, optional): Dict of class names to plot PR curves. Defaults to an empty tuple.
-            eps (float, optional): A small value to avoid division by zero. Defaults to 1e-16.
-            prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string.
-
-        Returns:
-            (tuple): A tuple of six arrays and one array of unique classes, where:
-                tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
-                fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
-                p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
-                r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
-                f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
-                ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
-                unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
-                p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
-                r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
-                f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
-                x (np.ndarray): X-axis values for the curves. Shape: (1000,).
-                prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
+        @type tp: np.ndarray
+        @param tp: Binary array indicating whether the detection is correct (True) or not (False).
+        @type conf: np.ndarray
+        @param conf: Array of confidence scores of the detections.
+        @type pred_cls: np.ndarray
+        @param pred_cls: Array of predicted classes of the detections.
+        @type target_cls: np.ndarray
+        @param target_cls: Array of true classes of the detections.
+        @type eps: float
+        @param eps: A small value to avoid division by zero. Defaults to 1e-16.
+
+        @rtype: tuple[np.ndarray, ...]
+        @return: A tuple of six arrays and one array of unique classes, where:
+            - tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
+            - fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
+            - p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
+            - r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
+            - f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
+            - ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
+            - unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
+            - p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
+            - r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
+            - f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
+            - x (np.ndarray): X-axis values for the curves. Shape: (1000,).
+            - prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
         """
         # Sort by objectness
         i = np.argsort(-conf)
@@ -391,8 +368,6 @@ def ap_per_class(
                 ap[ci, j], mpre, mrec = MeanAveragePrecisionOBB.compute_ap(
                     recall[:, j], precision[:, j]
                 )
-                # if plot and j == 0:
-                #     prec_values.append(np.interp(x, mrec, mpre))  # precision at mAP@0.5
 
         prec_values = np.array(prec_values)  # (nc, 1000)
 
@@ -428,17 +403,19 @@ def ap_per_class(
     def compute_ap(
         recall: list[float], precision: list[float]
     ) -> tuple[float, np.ndarray, np.ndarray]:
-        """Compute the average precision (AP) given the recall and
-        precision curves.
-
-        Args:
-            recall (list): The recall curve.
-            precision (list): The precision curve.
-
-        Returns:
-            (float): Average precision.
-            (np.ndarray): Precision envelope curve.
-            (np.ndarray): Modified recall curve with sentinel values added at the beginning and end.
+        """Compute average precision (AP) given recall and precision
+        curves.
+
+        @type recall: list[float]
+        @param recall: The recall curve.
+        @type precision: list
+        @param precision: The precision curve.
+
+        @rtype: tuple[float, np.ndarray, np.ndarray]
+        @return: A tuple containing:
+            - (float): Average precision.
+            - (np.ndarray): Precision envelope curve.
+            - (np.ndarray): Modified recall curve with sentinel values added at the beginning and end.
         """
         # Append sentinel values to beginning and end
         mrec = np.concatenate(([0.0], recall, [1.0]))
@@ -474,10 +451,12 @@ def smooth(y: np.ndarray, f: float = 0.05) -> np.ndarray:
 
     @staticmethod
     def map(all_ap: np.ndarray) -> float:
-        """
-        Returns the mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
+        """Return mean Average Precision (mAP) over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
+
+        @type all_ap: np.ndarray
+        @param all_ap: Average Precission for all classes.
 
-        Returns:
-            (float): The mAP over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
+        @rtype: float
+        @return: mAP over IoU thresholds of 0.5 - 0.95 in steps of 0.05.
         """
         return all_ap.mean() if len(all_ap) else 0.0
diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py
index a18bd823..4c9dab8c 100644
--- a/luxonis_train/utils/boundingbox.py
+++ b/luxonis_train/utils/boundingbox.py
@@ -173,18 +173,18 @@ def bbox2dist(bbox: Tensor, anchor_points: Tensor, reg_max: float) -> Tensor:
     return dist
 
 
-def xyxyxyxy2xywhr(x: Tensor) -> Tensor | np.ndarray:
+def xyxyxyxy2xywhr(x: np.ndarray | Tensor) -> np.ndarray | Tensor:
     """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2,
     xy3, xy4] to [xywh, rotation]. Rotation values are returned in
     radians from 0 to pi/2.
 
-    Args:
-        x (numpy.ndarray | torch.Tensor): Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8).
-
-    Returns:
-        (numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format of shape (n, 5).
+    @type x: np.ndarray | Tensor
+    @param x: Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8).
+    @rtype: np.ndarray | Tensor
+    @return: Converted data in [cx, cy, w, h, rotation] format of shape
+        (n, 5).
     """
-    is_torch = isinstance(x, torch.Tensor)
+    is_torch = isinstance(x, Tensor)
     points = x.cpu().numpy() if is_torch else x
     points = points.reshape(len(x), -1, 2)
     rboxes = []
@@ -200,20 +200,20 @@ def xyxyxyxy2xywhr(x: Tensor) -> Tensor | np.ndarray:
     )
 
 
-def xywhr2xyxyxyxy(x: Tensor) -> Tensor | np.ndarray:
+def xywhr2xyxyxyxy(x: Tensor) -> np.ndarray | Tensor:
     """Convert batched Oriented Bounding Boxes (OBB) from [xywh,
     rotation] to [xy1, xy2, xy3, xy4]. Rotation values should be in
     radians from 0 to pi/2.
 
-    Args:
-        x (numpy.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format of shape (n, 5) or (b, n, 5).
-
-    Returns:
-        (numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 4, 2) or (b, n, 4, 2).
+    @type x: Tensor
+    @param x: Boxes in [cx, cy, w, h, rotation] format of shape (n, 5)
+        or (b, n, 5).
+    @rtype: numpy.ndarray | Tensor
+    @return: Converted corner points of shape (n, 4, 2) or (b, n, 4, 2).
     """
     cos, sin, cat, stack = (
         (torch.cos, torch.sin, torch.cat, torch.stack)
-        if isinstance(x, torch.Tensor)
+        if isinstance(x, Tensor)
         else (np.cos, np.sin, np.concatenate, np.stack)
     )
 
@@ -234,21 +234,20 @@ def xywhr2xyxyxyxy(x: Tensor) -> Tensor | np.ndarray:
 def xyxy2xywh(x: Tensor) -> Tensor:
     """Convert bounding box coordinates from (x1, y1, x2, y2) format to
     (x, y, width, height) format where (x1, y1) is the top-left corner
-    and (x2, y2) is the bottom- right corner.
-
-    Args:
-        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
+    and (x2, y2) is the bottom-right corner.
 
-    Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
+    @type x: Tensor
+    @param x: The input bounding box coordinates in (x1, y1, x2, y2)
+        format.
+    @rtype: Tensor
+    @return: The bounding box coordinates in (x, y, width, height)
+        format.
     """
     assert (
         x.shape[-1] == 4
     ), f"input shape last dimension expected 4 but input shape is {x.shape}"
     y = (
-        torch.empty_like(x)
-        if isinstance(x, torch.Tensor)
-        else np.empty_like(x)
+        torch.empty_like(x) if isinstance(x, Tensor) else np.empty_like(x)
     )  # faster than clone/copy
     y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
     y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
@@ -260,22 +259,20 @@ def xyxy2xywh(x: Tensor) -> Tensor:
 def xywh2xyxy(x: Tensor) -> Tensor:
     """Convert bounding box coordinates from (x, y, width, height)
     format to (x1, y1, x2, y2) format where (x1, y1) is the top-left
-    corner and (x2, y2) is the bottom- right corner. Note: ops per 2
+    corner and (x2, y2) is the bottom-right corner. Note: ops per 2
     channels faster than per channel.
 
-    Args:
-        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
-
-    Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
+    @type x: Tensor
+    @param x: The input bounding box coordinates in (x, y, width,
+        height) format.
+    @rtype: Tensor
+    @return: The bounding box coordinates in (x1, y1, x2, y2) format.
     """
     assert (
         x.shape[-1] == 4
     ), f"input shape last dimension expected 4 but input shape is {x.shape}"
     y = (
-        torch.empty_like(x)
-        if isinstance(x, torch.Tensor)
-        else np.empty_like(x)
+        torch.empty_like(x) if isinstance(x, Tensor) else np.empty_like(x)
     )  # faster than clone/copy
     xy = x[..., :2]  # centers
     wh = x[..., 2:] / 2  # half width-height
@@ -410,20 +407,21 @@ def probiou(
 ) -> Tensor:
     """Calculate probabilistic IoU between oriented bounding boxes.
 
-    Implements the algorithm from https://arxiv.org/pdf/2106.06072v1.pdf.
-
-    Args:
-        obb1 (torch.Tensor): Ground truth OBBs, shape (N, 5), format xywhr.
-        obb2 (torch.Tensor): Predicted OBBs, shape (N, 5), format xywhr.
-        CIoU (bool, optional): If True, calculate CIoU. Defaults to False.
-        eps (float, optional): Small value to avoid division by zero. Defaults to 1e-7.
-
-    Returns:
-        (torch.Tensor): OBB similarities, shape (N,).
-
-    Note:
-        OBB format: [center_x, center_y, width, height, rotation_angle].
-        If CIoU is True, returns CIoU instead of IoU.
+    Implements the algorithm from
+    https://arxiv.org/pdf/2106.06072v1.pdf.
+
+    @type obb1: Tensor
+    @param obb1: Ground truth OBBs, shape (N, 5), format xywhr.
+    @type obb2: Tensor
+    @param obb2: Predicted OBBs, shape (N, 5), format xywhr.
+    @type CIoU: bool
+    @param CIoU: If True, calculate CIoU. Defaults to False.
+    @type eps: float
+    @param eps: Small value to avoid division by zero. Defaults to 1e-7.
+    @rtype: Tensor
+    @return: OBB similarities, shape (N,).
+    @note: OBB format: [center_x, center_y, width, height,
+        rotation_angle]. If CIoU is True, returns CIoU instead of IoU.
     """
     x1, y1 = obb1[..., :2].split(1, dim=-1)
     x2, y2 = obb2[..., :2].split(1, dim=-1)
@@ -464,16 +462,18 @@ def probiou(
 
 
 def batch_probiou(obb1: Tensor, obb2: Tensor, eps: float = 1e-7) -> Tensor:
-    """
-    Calculate the prob IoU between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.
+    """Calculate the probabilistic IoU between oriented bounding boxes,
+    https://arxiv.org/pdf/2106.06072v1.pdf.
 
-    Args:
-        obb1 (torch.Tensor | np.ndarray): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.
-        obb2 (torch.Tensor | np.ndarray): A tensor of shape (M, 5) representing predicted obbs, with xywhr format.
-        eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
+    @type obb1: Tensor
+    @param obb1: A tensor of shape (N, 5) representing ground truth OBBs, with xywhr format.
+    @type obb2: Tensor
+    @param obb2: A tensor of shape (M, 5) representing predicted OBBs, with xywhr format.
+    @type eps: float
+    @param eps: A small value to avoid division by zero. Defaults to 1e-7.
 
-    Returns:
-        (torch.Tensor): A tensor of shape (N, M) representing obb similarities.
+    @rtype: Tensor
+    @return: A tensor of shape (N, M) representing OBB similarities.
     """
     obb1 = torch.from_numpy(obb1) if isinstance(obb1, np.ndarray) else obb1
     obb2 = torch.from_numpy(obb2) if isinstance(obb2, np.ndarray) else obb2
@@ -509,13 +509,14 @@ def batch_probiou(obb1: Tensor, obb2: Tensor, eps: float = 1e-7) -> Tensor:
 
 
 def _get_covariance_matrix(boxes: Tensor) -> tuple[Tensor, ...]:
-    """Generating covariance matrix from obbs.
-
-    Args:
-        boxes (torch.Tensor): A tensor of shape (N, 5) representing rotated bounding boxes, with xywhr format.
-
-    Returns:
-        tuple(torch.Tensor): Covariance matrices corresponding to original rotated bounding boxes.
+    """Generate covariance matrix from OBBs.
+
+    @type boxes: Tensor
+    @param boxes: A tensor of shape (N, 5) representing rotated bounding
+        boxes, with xywhr format.
+    @rtype: tuple(Tensor)
+    @return: Covariance matrices corresponding to original rotated
+        bounding boxes.
     """
     # Gaussian bounding boxes, ignore the center points (the first two columns) because they are not needed here.
     gbbs = torch.cat((boxes[:, 2:4].pow(2) / 12, boxes[:, 4:]), dim=-1)
@@ -825,16 +826,17 @@ def batched_nms_obb(
 
 def batched_nms_rotated(
     boxes: Tensor, scores: Tensor, threshold: float = 0.45
-) -> Tensor:
-    """NMS for oriented bounding boxes using probiou and fast-nms.
-
-    Args:
-        boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
-        scores (torch.Tensor): Confidence scores, shape (N,).
-        threshold (float, optional): IoU threshold. Defaults to 0.45.
-
-    Returns:
-        (torch.Tensor): Indices of boxes to keep after NMS.
+) -> Tensor | np.ndarray:
+    """NMS for oriented bounding boxes using Probiou and Fast-NMS.
+
+    @type boxes: Tensor
+    @param boxes: Rotated bounding boxes, shape (N, 5), format xywhr.
+    @type scores: Tensor
+    @param scores: Confidence scores, shape (N,).
+    @type threshold: float
+    @param threshold: IoU threshold. Defaults to 0.45.
+    @rtype: Tensor | np.ndarray
+    @return: Indices of boxes to keep after NMS.
     """
     if len(boxes) == 0:
         return np.empty((0,), dtype=np.int8)