Bump version to v2.4.2

CVHub520 · Sep 6, 2024 · d9302d6 · d9302d6
1 parent e72444a
commit d9302d6
Show file tree

Hide file tree

Showing 28 changed files with 14,935 additions and 14,814 deletions.
diff --git a/README.md b/README.md
@@ -34,6 +34,7 @@
 ## 🥳 What's New
 
 - Sep. 2024:
+  - Release version [2.4.2](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.4.2)
   - 🔥🔥🔥 Added support for interactive video object tracking based on [Segment-Anything-2](https://github.com/CVHub520/segment-anything-2). [[Tutorial](examples/interactive_video_object_segmentation/README.md)]
 
 <br>
@@ -199,6 +200,7 @@ For more details, please refer to 👉 [model_zoo](./docs/en/model_zoo.md) 👈
   - [Tracking by OBB Object Detection](./examples/multiple_object_tracking/README.md)
   - [Tracking by Instance Segmentation](./examples/multiple_object_tracking/README.md)
   - [Tracking by Pose Estimation](./examples/multiple_object_tracking/README.md)
+- [iVOS](./examples/interactive_video_object_segmentation/README.md)
 
 ## Contact
 

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -33,7 +33,8 @@
 ## 🥳 新功能
 
 - 2024年9月:
-  - 🔥🔥🔥 支持基于[Segment-Anything-2](https://github.com/CVHub520/segment-anything-2)交互式视频目标追踪功能。【[教程](examples/interactive_video_object_segmentation/README.md)】
+  - 发布[X-AnyLabeling v2.4.2](https://github.com/CVHub520/X-AnyLabeling/releases/tag/v2.4.2)版本。
+  - 🔥🔥🔥 支持基于[Segment-Anything-2](https://github.com/CVHub520/segment-anything-2)交互式视频目标追踪功能。【[教程](examples/interactive_video_object_segmentation/README.md) | [B站](https://www.bilibili.com/video/BV1kMpwedE8M/)】
 
 <br>
 
@@ -202,7 +203,7 @@
   - [Tracking by OBB Object Detection](./examples/multiple_object_tracking/README.md)
   - [Tracking by Instance Segmentation](./examples/multiple_object_tracking/README.md)
   - [Tracking by Pose Estimation](./examples/multiple_object_tracking/README.md)
-
+- [iVOS](./examples/interactive_video_object_segmentation/README.md)
 
 ## 联系
 

diff --git a/anylabeling/app_info.py b/anylabeling/app_info.py
@@ -1,4 +1,4 @@
 __appname__ = "X-AnyLabeling"
 __appdescription__ = "Advanced Auto Labeling Solution with Added Features"
-__version__ = "2.4.1"
+__version__ = "2.4.2"
 __preferred_device__ = "CPU"  # GPU or CPU
diff --git a/anylabeling/resources/resources.py b/anylabeling/resources/resources.py
diff --git a/anylabeling/resources/translations/en_US.ts b/anylabeling/resources/translations/en_US.ts
diff --git a/anylabeling/resources/translations/zh_CN.ts b/anylabeling/resources/translations/zh_CN.ts
diff --git a/anylabeling/services/auto_labeling/__base__/ram.py b/anylabeling/services/auto_labeling/__base__/ram.py
@@ -116,7 +116,9 @@ def predict_shapes(self, image, image_path=None):
         outs = self.inference(blob)
         tags = self.postprocess(outs)
         description = self.get_results(tags)
-        result = AutoLabelingResult(shapes=[], replace=False, description=description)
+        result = AutoLabelingResult(
+            shapes=[], replace=False, description=description
+        )
         return result
 
     @staticmethod

diff --git a/anylabeling/services/auto_labeling/internimage_cls.py b/anylabeling/services/auto_labeling/internimage_cls.py
@@ -116,7 +116,9 @@ def predict_shapes(self, image, image_path=None):
         blob = self.preprocess(image)
         predictions = self.net.get_ort_inference(blob, extract=False)
         label = self.postprocess(predictions)
-        result = AutoLabelingResult(shapes=[], replace=False, description=label)
+        result = AutoLabelingResult(
+            shapes=[], replace=False, description=label
+        )
         return result
 
     def unload(self):

diff --git a/anylabeling/services/auto_labeling/lru_cache.py b/anylabeling/services/auto_labeling/lru_cache.py
@@ -1,4 +1,5 @@
 """Thread-safe LRU cache implementation."""
+
 from collections import OrderedDict
 import threading
 

diff --git a/anylabeling/services/auto_labeling/model_manager.py b/anylabeling/services/auto_labeling/model_manager.py
@@ -971,6 +971,7 @@ def _load_model(self, model_id):
         elif model_config["type"] == "segment_anything_2_video":
             try:
                 from .segment_anything_2_video import SegmentAnything2Video
+
                 model_config["model"] = SegmentAnything2Video(
                     model_config, on_message=self.new_model_status.emit
                 )
@@ -1633,22 +1634,22 @@ def set_auto_labeling_preserve_existing_annotations_state(self, state):
             ].set_auto_labeling_preserve_existing_annotations_state(state)
 
     def set_auto_labeling_prompt(self):
-        model_list = ['segment_anything_2_video']
+        model_list = ["segment_anything_2_video"]
         if (
             self.loaded_model_config is not None
             and self.loaded_model_config["type"] in model_list
         ):
-            self.loaded_model_config[
-                "model"
-            ].set_auto_labeling_prompt()
+            self.loaded_model_config["model"].set_auto_labeling_prompt()
 
     def unload_model(self):
         """Unload model"""
         if self.loaded_model_config is not None:
             self.loaded_model_config["model"].unload()
             self.loaded_model_config = None
 
-    def predict_shapes(self, image, filename=None, text_prompt=None, run_tracker=False):
+    def predict_shapes(
+        self, image, filename=None, text_prompt=None, run_tracker=False
+    ):
         """Predict shapes.
         NOTE: This function is blocking. The model can take a long time to
         predict. So it is recommended to use predict_shapes_threading instead.
@@ -1686,7 +1687,9 @@ def predict_shapes(self, image, filename=None, text_prompt=None, run_tracker=Fal
         self.prediction_finished.emit()
 
     @pyqtSlot()
-    def predict_shapes_threading(self, image, filename=None, text_prompt=None, run_tracker=False):
+    def predict_shapes_threading(
+        self, image, filename=None, text_prompt=None, run_tracker=False
+    ):
         """Predict shapes.
         This function starts a thread to run the prediction.
         """
@@ -1717,11 +1720,17 @@ def predict_shapes_threading(self, image, filename=None, text_prompt=None, run_t
             self.model_execution_thread = QThread()
             if text_prompt is not None:
                 self.model_execution_worker = GenericWorker(
-                    self.predict_shapes, image, filename, text_prompt=text_prompt
+                    self.predict_shapes,
+                    image,
+                    filename,
+                    text_prompt=text_prompt,
                 )
             elif run_tracker is True:
                 self.model_execution_worker = GenericWorker(
-                    self.predict_shapes, image, filename, run_tracker=run_tracker
+                    self.predict_shapes,
+                    image,
+                    filename,
+                    run_tracker=run_tracker,
                 )
             else:
                 self.model_execution_worker = GenericWorker(

diff --git a/anylabeling/services/auto_labeling/segment_anything_2_video.py b/anylabeling/services/auto_labeling/segment_anything_2_video.py
@@ -3,7 +3,8 @@
 import traceback
 
 import warnings
-warnings.filterwarnings('ignore')
+
+warnings.filterwarnings("ignore")
 
 import cv2
 import numpy as np
@@ -32,6 +33,7 @@ class SegmentAnything2Video(Model):
 
     class Meta:
         """Meta class to define required configurations and UI elements."""
+
         required_config_names = [
             "type",
             "name",
@@ -69,7 +71,7 @@ def __init__(self, config_path, on_message) -> None:
         torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
 
         if torch.cuda.get_device_properties(0).major >= 8:
-            # turn on tfloat32 for Ampere GPUs 
+            # turn on tfloat32 for Ampere GPUs
             # (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
             torch.backends.cuda.matmul.allow_tf32 = True
             torch.backends.cudnn.allow_tf32 = True
@@ -78,19 +80,19 @@ def __init__(self, config_path, on_message) -> None:
         self.model_abs_path = self.get_model_abs_path(
             self.config, "model_path"
         )
-        if not self.model_abs_path or not os.path.isfile(
-            self.model_abs_path
-        ):
+        if not self.model_abs_path or not os.path.isfile(self.model_abs_path):
             raise FileNotFoundError(
                 QCoreApplication.translate(
                     "Model",
                     "Could not download or initialize model of Segment Anything 2.",
                 )
             )
-        self.model_cfg = self.config['model_cfg']
+        self.model_cfg = self.config["model_cfg"]
         sam2_image_model = build_sam2(self.model_cfg, self.model_abs_path)
         self.image_predictor = SAM2ImagePredictor(sam2_image_model)
-        self.video_predictor = build_sam2_camera_predictor(self.model_cfg, self.model_abs_path)
+        self.video_predictor = build_sam2_camera_predictor(
+            self.model_cfg, self.model_abs_path
+        )
         self.is_first_init = True
 
         # Initialize marking and prompting structures
@@ -111,7 +113,9 @@ def set_auto_labeling_reset_tracker(self):
         if self.prompts:
             try:
                 self.video_predictor.reset_state()
-                print(f'Successful: The tracker has been reset to its initial state.')
+                print(
+                    f"Successful: The tracker has been reset to its initial state."
+                )
             except Exception as e:  # noqa
                 pass
             self.prompts = []
@@ -121,33 +125,33 @@ def set_auto_labeling_prompt(self):
         point_coords, point_labels, box = self.marks_to_prompts()
         if box:
             promot = {
-                'type': 'rectangle',
-                'data': np.array([[*box[:2]], [*box[2:]]], dtype=np.float32)
+                "type": "rectangle",
+                "data": np.array([[*box[:2]], [*box[2:]]], dtype=np.float32),
             }
             self.prompts.append(promot)
-        elif (point_coords and point_labels):
+        elif point_coords and point_labels:
             promot = {
-                'type': 'point',
-                'data': {
-                    'point_coords': np.array(point_coords, dtype=np.float32),
-                    'point_labels': np.array(point_labels, dtype=np.int32),
-                }
+                "type": "point",
+                "data": {
+                    "point_coords": np.array(point_coords, dtype=np.float32),
+                    "point_labels": np.array(point_labels, dtype=np.int32),
+                },
             }
             self.prompts.append(promot)
 
     def marks_to_prompts(self):
         """Convert marks to prompts for the model."""
         point_coords, point_labels, box = None, None, None
         for marks in self.marks:
-            if marks['type'] == 'rectangle':
-                box = marks['data']
-            elif marks['type'] == 'point':
+            if marks["type"] == "rectangle":
+                box = marks["data"]
+            elif marks["type"] == "point":
                 if point_coords is None and point_labels is None:
-                    point_coords = [marks['data']]
-                    point_labels = [marks['label']]
+                    point_coords = [marks["data"]]
+                    point_labels = [marks["label"]]
                 else:
-                    point_coords.append(marks['data'])
-                    point_labels.append(marks['label'])
+                    point_coords.append(marks["data"])
+                    point_labels.append(marks["label"])
         return point_coords, point_labels, box
 
     def post_process(self, masks, label=None):
@@ -162,7 +166,7 @@ def post_process(self, masks, label=None):
         """
         # Convert masks to binary format
         masks[masks > 0.0] = 255
-        masks[masks <= 0.] = 0
+        masks[masks <= 0.0] = 0
         masks = masks.astype(np.uint8)
 
         # Find contours of the masks
@@ -302,25 +306,39 @@ def video_process(self, cv_image, filename):
         if not self.prompts:
             return [], False
 
-        if not any(filename.endswith(ext) for ext in [".jpg", ".jpeg", ".JPG", ".JPEG"]):
+        if not any(
+            filename.endswith(ext)
+            for ext in [".jpg", ".jpeg", ".JPG", ".JPEG"]
+        ):
             print(f"Only JPEG format is supported, but got {filename}")
             return [], False
 
         if self.is_first_init:
             self.video_predictor.load_first_frame(cv_image)
             ann_frame_idx = 0
             for i, prompt in enumerate(self.prompts):
-                ann_obj_id = i + 1  # give a unique id to each object we interact with (it can be any integers)
-                if prompt['type'] == 'rectangle':
-                    bbox = prompt['data']
-                    _, out_obj_ids, out_mask_logits = self.video_predictor.add_new_prompt(
-                        frame_idx=ann_frame_idx, obj_id=ann_obj_id, bbox=bbox
+                ann_obj_id = (
+                    i + 1
+                )  # give a unique id to each object we interact with (it can be any integers)
+                if prompt["type"] == "rectangle":
+                    bbox = prompt["data"]
+                    _, out_obj_ids, out_mask_logits = (
+                        self.video_predictor.add_new_prompt(
+                            frame_idx=ann_frame_idx,
+                            obj_id=ann_obj_id,
+                            bbox=bbox,
+                        )
                     )
-                elif prompt['type'] == 'point':
-                    points = prompt['data']['point_coords']
-                    labels = prompt['data']['point_labels']
-                    _, out_obj_ids, out_mask_logits = self.video_predictor.add_new_prompt(
-                        frame_idx=ann_frame_idx, obj_id=ann_obj_id, points=points, labels=labels
+                elif prompt["type"] == "point":
+                    points = prompt["data"]["point_coords"]
+                    labels = prompt["data"]["point_labels"]
+                    _, out_obj_ids, out_mask_logits = (
+                        self.video_predictor.add_new_prompt(
+                            frame_idx=ann_frame_idx,
+                            obj_id=ann_obj_id,
+                            points=points,
+                            labels=labels,
+                        )
                     )
             self.is_first_init = False
             return [], False
@@ -333,10 +351,12 @@ def video_process(self, cv_image, filename):
                     masks = masks[0][0]
                 else:
                     masks = masks[0]
-                shapes.extend(self.post_process(masks, label=f'object{i}'))
+                shapes.extend(self.post_process(masks, label=f"object{i}"))
             return shapes, True
 
-    def predict_shapes(self, image, filename=None, run_tracker=False) -> AutoLabelingResult:
+    def predict_shapes(
+        self, image, filename=None, run_tracker=False
+    ) -> AutoLabelingResult:
         """Predict shapes from an image or video frame.
 
         Args:
@@ -378,7 +398,8 @@ def get_ann_frame_idx(filename):
             int: The index of the frame in the sorted list of frames, or -1 if not found.
         """
         frame_names = [
-            p for p in os.listdir(os.path.dirname(filename))
+            p
+            for p in os.listdir(os.path.dirname(filename))
             if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
         ]
         if not frame_names:

diff --git a/anylabeling/services/auto_labeling/utils/sahi/models/detectron2.py b/anylabeling/services/auto_labeling/utils/sahi/models/detectron2.py
@@ -164,9 +164,11 @@ def _create_object_prediction_list_from_original_predictions(
             object_prediction_list = [
                 ObjectPrediction(
                     bbox=box.tolist() if mask is None else None,
-                    bool_mask=mask.detach().cpu().numpy()
-                    if mask is not None
-                    else None,
+                    bool_mask=(
+                        mask.detach().cpu().numpy()
+                        if mask is not None
+                        else None
+                    ),
                     category_id=category_id.item(),
                     category_name=self.category_mapping[
                         str(category_id.item())

diff --git a/anylabeling/services/auto_labeling/utils/sahi/postprocess/combine.py b/anylabeling/services/auto_labeling/utils/sahi/postprocess/combine.py
@@ -409,11 +409,11 @@ def __call__(
                     self.match_metric,
                     self.match_threshold,
                 ):
-                    object_prediction_list[
-                        keep_ind
-                    ] = merge_object_prediction_pair(
-                        object_prediction_list[keep_ind].tolist(),
-                        object_prediction_list[merge_ind].tolist(),
+                    object_prediction_list[keep_ind] = (
+                        merge_object_prediction_pair(
+                            object_prediction_list[keep_ind].tolist(),
+                            object_prediction_list[merge_ind].tolist(),
+                        )
                     )
             selected_object_predictions.append(
                 object_prediction_list[keep_ind].tolist()
@@ -451,11 +451,11 @@ def __call__(
                     self.match_metric,
                     self.match_threshold,
                 ):
-                    object_prediction_list[
-                        keep_ind
-                    ] = merge_object_prediction_pair(
-                        object_prediction_list[keep_ind].tolist(),
-                        object_prediction_list[merge_ind].tolist(),
+                    object_prediction_list[keep_ind] = (
+                        merge_object_prediction_pair(
+                            object_prediction_list[keep_ind].tolist(),
+                            object_prediction_list[merge_ind].tolist(),
+                        )
                     )
             selected_object_predictions.append(
                 object_prediction_list[keep_ind].tolist()

diff --git a/anylabeling/services/auto_labeling/utils/sahi/predict.py b/anylabeling/services/auto_labeling/utils/sahi/predict.py
@@ -113,9 +113,9 @@ def get_prediction(
         shift_amount=shift_amount,
         full_shape=full_shape,
     )
-    object_prediction_list: List[
-        ObjectPrediction
-    ] = detection_model.object_prediction_list
+    object_prediction_list: List[ObjectPrediction] = (
+        detection_model.object_prediction_list
+    )
 
     # postprocess matching predictions
     if postprocess is not None: