Computation prototype

lightly-ai · Nov 6, 2023 · fce0a84 · fce0a84
1 parent d126d11
commit fce0a84
Show file tree

Hide file tree

Showing 3 changed files with 235 additions and 2 deletions.
diff --git a/compute_prototype.py b/compute_prototype.py
@@ -0,0 +1,155 @@
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Counter, Dict, Set, Tuple
+
+from labelformat.formats import LightlyObjectDetectionInput
+from labelformat.model.object_detection import ObjectDetectionInput
+from PIL import Image
+
+
+def main() -> None:
+    image_folder = Path("/Users/michal/datasets/aquarium_predictions")
+    image_insights_data = get_image_insights(image_folder=image_folder)
+    present_image_insights(image_insights_data=image_insights_data)
+
+    label_folder = Path(
+        "/Users/michal/datasets/aquarium_predictions/.lightly/predictions/object-detection"
+    )
+    label_input = LightlyObjectDetectionInput(
+        input_folder=label_folder,
+        images_rel_path="../../..",
+    )
+    od_insights_data = get_object_detection_insights(label_input=label_input)
+    present_object_detection_insights(od_insights_data=od_insights_data)
+
+
+@dataclass(frozen=True)
+class ImageInsightsData:
+    num_images: int
+    images_sizes: Counter[Tuple[int, int]]
+    filename_set: Set[str]
+
+
+@dataclass
+class ObjectInsightsData:
+    num_objects: int
+    objects_per_image: Counter[int]
+    object_sizes_abs: Counter[Tuple[float, float]]
+    object_sizes_rel: Counter[Tuple[float, float]]
+
+    @classmethod
+    def create_empty(cls) -> "ObjectInsightsData":
+        return cls(
+            num_objects=0,
+            objects_per_image=Counter(),
+            object_sizes_abs=Counter(),
+            object_sizes_rel=Counter(),
+        )
+
+
+@dataclass(frozen=True)
+class ObjectDetectionInsightsData:
+    num_images: int
+    filename_set: Set[str]
+    total: ObjectInsightsData
+    classes: Dict[str, ObjectInsightsData]
+
+
+def get_image_insights(image_folder: Path) -> ImageInsightsData:
+    num_images = 0
+    images_sizes = Counter[Tuple[int, int]]()
+    filename_set = set()
+
+    # Param: Recursive?
+    # Param: Subsample?
+    for image_path in image_folder.glob("*.jpg"):
+        num_images += 1
+        filename_set.add(image_path.name)
+        with Image.open(image_path) as image:
+            images_sizes[image.size] += 1
+
+    return ImageInsightsData(
+        num_images=num_images,
+        images_sizes=images_sizes,
+        filename_set=filename_set,
+    )
+
+
+def present_image_insights(image_insights_data: ImageInsightsData) -> None:
+    print(f"Num images: {image_insights_data.num_images}")
+    print(f"Images sizes: {image_insights_data.images_sizes.most_common()}")
+    print(f"Filename sample: {list(image_insights_data.filename_set)[:5]}")
+
+
+def get_object_detection_insights(
+    label_input: ObjectDetectionInput,
+) -> ObjectDetectionInsightsData:
+    num_images = 0
+    filename_set = set()
+    total_data = ObjectInsightsData.create_empty()
+    class_data = {
+        category.name: ObjectInsightsData.create_empty()
+        for category in label_input.get_categories()
+    }
+
+    for label in label_input.get_labels():
+        num_images += 1
+        filename_set.add(label.image.filename)
+
+        for obj in label.objects:
+            # Number of objects.
+            total_data.num_objects += 1
+            class_data[obj.category.name].num_objects += 1
+
+            # Objects per image.
+            total_data.objects_per_image[len(label.objects)] += 1
+            class_data[obj.category.name].objects_per_image[len(label.objects)] += 1
+
+            # Object sizes.
+            obj_size_abs = (
+                obj.box.xmax - obj.box.xmin,
+                obj.box.ymax - obj.box.ymin,
+            )
+            obj_size_rel = (
+                (obj.box.xmax - obj.box.xmin) / label.image.width,
+                (obj.box.ymax - obj.box.ymin) / label.image.height,
+            )
+            total_data.object_sizes_abs[obj_size_abs] += 1
+            total_data.object_sizes_rel[obj_size_rel] += 1
+            class_data[obj.category.name].object_sizes_abs[obj_size_abs] += 1
+            class_data[obj.category.name].object_sizes_rel[obj_size_rel] += 1
+
+    return ObjectDetectionInsightsData(
+        num_images=num_images,
+        filename_set=filename_set,
+        total=total_data,
+        classes=class_data,
+    )
+
+
+def present_object_detection_insights(
+    od_insights_data: ObjectDetectionInsightsData,
+) -> None:
+    print(f"Num images with labels: {od_insights_data.num_images}")
+    print(f"Filename sample: {list(od_insights_data.filename_set)[:5]}")
+    print(f"Num objects: {od_insights_data.total.num_objects}")
+    print(
+        f"Objects per image: {od_insights_data.total.objects_per_image.most_common()}"
+    )
+    print(
+        f"Object sizes abs sample: {od_insights_data.total.object_sizes_abs.most_common()[:10]}"
+    )
+    print(
+        f"Object sizes rel sample: {od_insights_data.total.object_sizes_rel.most_common()[:10]}"
+    )
+    print(f"Num classes: {len(od_insights_data.classes)}")
+
+    # Class histogram.
+    class_histogram = Counter[str]()
+    for class_name, class_data in od_insights_data.classes.items():
+        class_histogram[class_name] += class_data.num_objects
+    print(f"Class histogram: {class_histogram.most_common()}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,14 +6,15 @@ build-backend = "poetry.core.masonry.api"
 name = "lightly-insights"
 version = "0.1.0"
 authors = ["Lightly.ai"]
-description = "A tool for converting computer vision label formats."
+description = "Easily get basic insights about your ML dataset."
 readme = "README.md"
 license = "MIT"
 
 [tool.poetry.dependencies]
 python = ">=3.7"
 tqdm = "*"
 pillow = "*"
+labelformat = "^0.1.1"
 
 [tool.poetry.group.dev.dependencies]
 mypy = "*"