Skip to content

Commit

Permalink
Computation prototype
Browse files Browse the repository at this point in the history
  • Loading branch information
michal-lightly committed Nov 6, 2023
1 parent d126d11 commit fce0a84
Show file tree
Hide file tree
Showing 3 changed files with 235 additions and 2 deletions.
155 changes: 155 additions & 0 deletions compute_prototype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Counter, Dict, Set, Tuple

from labelformat.formats import LightlyObjectDetectionInput
from labelformat.model.object_detection import ObjectDetectionInput
from PIL import Image


def main() -> None:
image_folder = Path("/Users/michal/datasets/aquarium_predictions")
image_insights_data = get_image_insights(image_folder=image_folder)
present_image_insights(image_insights_data=image_insights_data)

label_folder = Path(
"/Users/michal/datasets/aquarium_predictions/.lightly/predictions/object-detection"
)
label_input = LightlyObjectDetectionInput(
input_folder=label_folder,
images_rel_path="../../..",
)
od_insights_data = get_object_detection_insights(label_input=label_input)
present_object_detection_insights(od_insights_data=od_insights_data)


@dataclass(frozen=True)
class ImageInsightsData:
num_images: int
images_sizes: Counter[Tuple[int, int]]
filename_set: Set[str]


@dataclass
class ObjectInsightsData:
num_objects: int
objects_per_image: Counter[int]
object_sizes_abs: Counter[Tuple[float, float]]
object_sizes_rel: Counter[Tuple[float, float]]

@classmethod
def create_empty(cls) -> "ObjectInsightsData":
return cls(
num_objects=0,
objects_per_image=Counter(),
object_sizes_abs=Counter(),
object_sizes_rel=Counter(),
)


@dataclass(frozen=True)
class ObjectDetectionInsightsData:
num_images: int
filename_set: Set[str]
total: ObjectInsightsData
classes: Dict[str, ObjectInsightsData]


def get_image_insights(image_folder: Path) -> ImageInsightsData:
num_images = 0
images_sizes = Counter[Tuple[int, int]]()
filename_set = set()

# Param: Recursive?
# Param: Subsample?
for image_path in image_folder.glob("*.jpg"):
num_images += 1
filename_set.add(image_path.name)
with Image.open(image_path) as image:
images_sizes[image.size] += 1

return ImageInsightsData(
num_images=num_images,
images_sizes=images_sizes,
filename_set=filename_set,
)


def present_image_insights(image_insights_data: ImageInsightsData) -> None:
print(f"Num images: {image_insights_data.num_images}")
print(f"Images sizes: {image_insights_data.images_sizes.most_common()}")
print(f"Filename sample: {list(image_insights_data.filename_set)[:5]}")


def get_object_detection_insights(
label_input: ObjectDetectionInput,
) -> ObjectDetectionInsightsData:
num_images = 0
filename_set = set()
total_data = ObjectInsightsData.create_empty()
class_data = {
category.name: ObjectInsightsData.create_empty()
for category in label_input.get_categories()
}

for label in label_input.get_labels():
num_images += 1
filename_set.add(label.image.filename)

for obj in label.objects:
# Number of objects.
total_data.num_objects += 1
class_data[obj.category.name].num_objects += 1

# Objects per image.
total_data.objects_per_image[len(label.objects)] += 1
class_data[obj.category.name].objects_per_image[len(label.objects)] += 1

# Object sizes.
obj_size_abs = (
obj.box.xmax - obj.box.xmin,
obj.box.ymax - obj.box.ymin,
)
obj_size_rel = (
(obj.box.xmax - obj.box.xmin) / label.image.width,
(obj.box.ymax - obj.box.ymin) / label.image.height,
)
total_data.object_sizes_abs[obj_size_abs] += 1
total_data.object_sizes_rel[obj_size_rel] += 1
class_data[obj.category.name].object_sizes_abs[obj_size_abs] += 1
class_data[obj.category.name].object_sizes_rel[obj_size_rel] += 1

return ObjectDetectionInsightsData(
num_images=num_images,
filename_set=filename_set,
total=total_data,
classes=class_data,
)


def present_object_detection_insights(
od_insights_data: ObjectDetectionInsightsData,
) -> None:
print(f"Num images with labels: {od_insights_data.num_images}")
print(f"Filename sample: {list(od_insights_data.filename_set)[:5]}")
print(f"Num objects: {od_insights_data.total.num_objects}")
print(
f"Objects per image: {od_insights_data.total.objects_per_image.most_common()}"
)
print(
f"Object sizes abs sample: {od_insights_data.total.object_sizes_abs.most_common()[:10]}"
)
print(
f"Object sizes rel sample: {od_insights_data.total.object_sizes_rel.most_common()[:10]}"
)
print(f"Num classes: {len(od_insights_data.classes)}")

# Class histogram.
class_histogram = Counter[str]()
for class_name, class_data in od_insights_data.classes.items():
class_histogram[class_name] += class_data.num_objects
print(f"Class histogram: {class_histogram.most_common()}")


if __name__ == "__main__":
main()
79 changes: 78 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@ build-backend = "poetry.core.masonry.api"
name = "lightly-insights"
version = "0.1.0"
authors = ["Lightly.ai"]
description = "A tool for converting computer vision label formats."
description = "Easily get basic insights about your ML dataset."
readme = "README.md"
license = "MIT"

[tool.poetry.dependencies]
python = ">=3.7"
tqdm = "*"
pillow = "*"
labelformat = "^0.1.1"

[tool.poetry.group.dev.dependencies]
mypy = "*"
Expand Down

0 comments on commit fce0a84

Please sign in to comment.